From 7f70ecfa18b3d823ea87c1fc7aa9224fbc5ebb6f Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 1 Feb 2016 18:31:19 +0000 Subject: [PATCH 1/5] [SystemZ] Fix wrong-code generation for certain always-false conditions We've found another bug in the code generation logic conditions for a certain class of always-false conditions, those of the form if ((a & 1) < 0) These only reach the back end when compiling without optimization. The bug was introduced by the choice of using TEST UNDER MASK to implement a check for if ((a & MASK) < VAL) as if ((a & MASK) == 0) where VAL is less than the the lowest bit of MASK. This is correct in all cases except for VAL == 0, in which case the original condition is always false, but the replacement isn't. Fixed by excluding that particular case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259381 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 2 +- test/CodeGen/SystemZ/int-cmp-53.ll | 26 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/SystemZ/int-cmp-53.ll diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ee732675fb3..b0a61276463 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1849,7 +1849,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, if (CCMask == SystemZ::CCMASK_CMP_NE) return SystemZ::CCMASK_TM_SOME_1; } - if (EffectivelyUnsigned && CmpVal <= Low) { + if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_GE) diff --git a/test/CodeGen/SystemZ/int-cmp-53.ll b/test/CodeGen/SystemZ/int-cmp-53.ll new file mode 100644 index 00000000000..b7d985eeefe --- /dev/null +++ b/test/CodeGen/SystemZ/int-cmp-53.ll @@ -0,0 +1,26 @@ +; This used to incorrectly use a TMLL for an always-false test at -O0. +; +; RUN: llc -O0 < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @test(i8 *%input, i32 *%result) { +entry: +; CHECK-NOT: tmll + + %0 = load i8, i8* %input, align 1 + %1 = trunc i8 %0 to i1 + %2 = zext i1 %1 to i32 + %3 = icmp sge i32 %2, 0 + br i1 %3, label %if.then, label %if.else + +if.then: + store i32 1, i32* %result, align 4 + br label %return + +if.else: + store i32 0, i32* %result, align 4 + br label %return + +return: + ret void +} + From 29644f9e4ba759c561a3495af32713ed8593df70 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 4 Apr 2016 12:44:55 +0000 Subject: [PATCH 2/5] [SystemZ] Support llvm.frameaddress/llvm.returnaddress intrinsics Enable the SystemZ back-end to lower FRAMEADDR and RETURNADDR, which previously would cause the back-end to crash. Currently, only a frame count of zero is supported. Author: bryanpkc Differential Revision: http://reviews.llvm.org/D18514 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265291 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 55 +++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 2 + .../SystemZ/SystemZMachineFunctionInfo.h | 9 ++- test/CodeGen/SystemZ/frameaddr-01.ll | 28 ++++++++++ test/CodeGen/SystemZ/ret-addr-01.ll | 15 +++++ 5 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/SystemZ/frameaddr-01.ll create mode 100644 test/CodeGen/SystemZ/ret-addr-01.ll diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index b0a61276463..a7d33f929fc 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2640,6 +2640,57 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } +SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + SDLoc DL(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // If the back chain frame index has not been allocated yet, do so. + SystemZMachineFunctionInfo *FI = MF.getInfo(); + int BackChainIdx = FI->getFramePointerSaveIndex(); + if (!BackChainIdx) { + // By definition, the frame address is the address of the back chain. + BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + FI->setFramePointerSaveIndex(BackChainIdx); + } + SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + return BackChain; +} + +SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc DL(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // FIXME The frontend should detect this case. + if (Depth > 0) { + report_fatal_error("Unsupported stack frame traversal count"); + } + + // Return R14D, which has the return address. Mark it an implicit live-in. + unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); +} + SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -4312,6 +4363,10 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); case ISD::BR_CC: return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 391636e5467..af8c67d7849 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -467,6 +467,8 @@ class SystemZTargetLowering : public TargetLowering { SelectionDAG &DAG) const; SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index f4a517bd54d..4f64f4c65f1 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -22,14 +22,15 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsFirstFPR; unsigned VarArgsFrameIndex; unsigned RegSaveFrameIndex; + int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), - NumLocalDynamics(0) {} + VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), + ManipulatesSP(false), NumLocalDynamics(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -59,6 +60,10 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } + // Get and set the frame index of where the old frame pointer is stored. + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + // Get and set whether the function directly manipulates the stack pointer, // e.g. through STACKSAVE or STACKRESTORE. bool getManipulatesSP() const { return ManipulatesSP; } diff --git a/test/CodeGen/SystemZ/frameaddr-01.ll b/test/CodeGen/SystemZ/frameaddr-01.ll new file mode 100644 index 00000000000..4dfdf308e8a --- /dev/null +++ b/test/CodeGen/SystemZ/frameaddr-01.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The current function's frame address is the address of +; the optional back chain slot. +define i8* @fp0() nounwind { +entry: +; CHECK-LABEL: fp0: +; CHECK: la %r2, 0(%r15) +; CHECK: br %r14 + %0 = tail call i8* @llvm.frameaddress(i32 0) + ret i8* %0 +} + +; Check that the frame address is correct in a presence +; of a stack frame. +define i8* @fp0f() nounwind { +entry: +; CHECK-LABEL: fp0f: +; CHECK: aghi %r15, -168 +; CHECK: la %r2, 168(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %0 = alloca i64, align 8 + %1 = tail call i8* @llvm.frameaddress(i32 0) + ret i8* %1 +} + +declare i8* @llvm.frameaddress(i32) nounwind readnone diff --git a/test/CodeGen/SystemZ/ret-addr-01.ll b/test/CodeGen/SystemZ/ret-addr-01.ll new file mode 100644 index 00000000000..9c3b246af57 --- /dev/null +++ b/test/CodeGen/SystemZ/ret-addr-01.ll @@ -0,0 +1,15 @@ +; Test support for the llvm.returnaddress intrinsic. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The current function's return address is in the link register. +define i8* @rt0() norecurse nounwind readnone { +entry: +; CHECK-LABEL: rt0: +; CHECK: lgr %r2, %r14 +; CHECK: br %r14 + %0 = tail call i8* @llvm.returnaddress(i32 0) + ret i8* %0 +} + +declare i8* @llvm.returnaddress(i32) nounwind readnone From 2a8cdc50f16ac4d0645254a88d89b8522f9f179e Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 4 Apr 2016 12:45:44 +0000 Subject: [PATCH 3/5] [SystemZ] Support ATOMIC_FENCE A cross-thread sequentially consistent fence should be lowered into z/Architecture's BCR serialization instruction, instead of causing a fatal error in the back-end. Author: bryanpkc Differential Revision: http://reviews.llvm.org/D18644 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265292 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 5 +++++ lib/Target/SystemZ/SystemZISelLowering.cpp | 25 ++++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 4 ++++ lib/Target/SystemZ/SystemZInstrInfo.td | 4 ++++ lib/Target/SystemZ/SystemZOperators.td | 2 ++ test/CodeGen/SystemZ/atomic-fence-01.ll | 16 ++++++++++++++ test/CodeGen/SystemZ/atomic-fence-02.ll | 13 +++++++++++ 7 files changed, 69 insertions(+) create mode 100644 test/CodeGen/SystemZ/atomic-fence-01.ll create mode 100644 test/CodeGen/SystemZ/atomic-fence-02.ll diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 75273114d62..be729d360e3 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -260,6 +260,11 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(15).addReg(SystemZ::R0D); break; + // Emit nothing here but a comment if we can. + case SystemZ::MemBarrier: + OutStreamer->emitRawComment("MEMBARRIER"); + return; + default: Lower.lower(MI, LoweredMI); break; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index a7d33f929fc..d23828bbe2d 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -216,6 +216,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. if (!Subtarget.hasFPExtension()) { @@ -3082,6 +3084,25 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, return Op; } +SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast( + cast(Op.getOperand(1))->getZExtValue()); + SynchronizationScope FenceScope = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { + return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, + Op.getOperand(0)), 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -4409,6 +4430,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::CTTZ_ZERO_UNDEF: return DAG.getNode(ISD::CTTZ, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -4512,6 +4535,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); @@ -5273,6 +5297,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *MBB) const { + MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index af8c67d7849..e6e3e1701ae 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -146,6 +146,9 @@ enum NodeType : unsigned { // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) SERIALIZE, + // Compiler barrier only; generate a no-op. + MEMBARRIER, + // Transaction begin. The first operand is the chain, the second // the TDB pointer, and the third the immediate control field. // Returns chain and glue. @@ -479,6 +482,7 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 4f59d5c572e..d86e241f370 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1224,6 +1224,10 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; let hasSideEffects = 1 in def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; +// A pseudo instruction that serves as a compiler barrier. +let hasSideEffects = 1 in +def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>; + let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 3c95a1e11b4..730b9b31868 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -188,6 +188,8 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, [SDNPHasChain, SDNPMayStore]>; +def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; // Defined because the index is an i32 rather than a pointer. def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", diff --git a/test/CodeGen/SystemZ/atomic-fence-01.ll b/test/CodeGen/SystemZ/atomic-fence-01.ll new file mode 100644 index 00000000000..25566db9078 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-fence-01.ll @@ -0,0 +1,16 @@ +; Test (fast) serialization. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefix=Z196 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s --check-prefix=ZEC12 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s --check-prefix=Z13 + +define void @test() { +; Z10: bcr 15, %r0 +; Z196: bcr 14, %r0 +; ZEC12: bcr 14, %r0 +; Z13: bcr 14, %r0 + fence seq_cst + ret void +} + diff --git a/test/CodeGen/SystemZ/atomic-fence-02.ll b/test/CodeGen/SystemZ/atomic-fence-02.ll new file mode 100644 index 00000000000..4c4375ef669 --- /dev/null +++ b/test/CodeGen/SystemZ/atomic-fence-02.ll @@ -0,0 +1,13 @@ +; Serialization is emitted only for fence seq_cst. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +define void @test() { +; CHECK: #MEMBARRIER + fence acquire +; CHECK: #MEMBARRIER + fence release +; CHECK: #MEMBARRIER + fence acq_rel + ret void +} From 6032ab290b5ce8ac98e32115ecbdedbd0cab069d Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Thu, 28 Apr 2016 00:17:23 +0000 Subject: [PATCH 4/5] [SystemZ] Support Swift Calling Convention Summary: Port rL265480, rL264754, rL265997 and rL266252 to SystemZ, in order to enable the Swift port on the architecture. SwiftSelf and SwiftError are assigned to R10 and R9, respectively, which are normally callee-saved registers. For more information, see: RFC: Implementing the Swift calling convention in LLVM and Clang https://groups.google.com/forum/#!topic/llvm-dev/epDd2w93kZ0 Reviewers: kbarton, manmanren, rjmccall, uweigand Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D19414 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@267823 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZCallingConv.td | 12 + lib/Target/SystemZ/SystemZISelLowering.cpp | 10 +- lib/Target/SystemZ/SystemZISelLowering.h | 4 + lib/Target/SystemZ/SystemZRegisterInfo.cpp | 8 + test/CodeGen/SystemZ/swift-return.ll | 203 ++++++++++++ test/CodeGen/SystemZ/swifterror.ll | 358 +++++++++++++++++++++ test/CodeGen/SystemZ/swiftself.ll | 66 ++++ 7 files changed, 658 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/SystemZ/swift-return.ll create mode 100644 test/CodeGen/SystemZ/swifterror.ll create mode 100644 test/CodeGen/SystemZ/swiftself.ll diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td index bdd1b1598ad..eeb04f0302e 100644 --- a/lib/Target/SystemZ/SystemZCallingConv.td +++ b/lib/Target/SystemZ/SystemZCallingConv.td @@ -33,6 +33,9 @@ def RetCC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. CCIfType<[i32], CCIfExtend>>, + // A SwiftError is returned in R9. + CCIfSwiftError>>, + // ABI-compliant code returns 64-bit integers in R2. Make the other // call-clobbered argument registers available for code that doesn't // care about the ABI. (R6 is an argument register too, but is @@ -65,6 +68,12 @@ def CC_SystemZ : CallingConv<[ // are smaller than 64 bits shouldn't. CCIfType<[i32], CCIfExtend>>, + // A SwiftSelf is passed in callee-saved R10. + CCIfSwiftSelf>>, + + // A SwiftError is passed in callee-saved R9. + CCIfSwiftError>>, + // Force long double values to the stack and pass i64 pointers to them. CCIfType<[f128], CCPassIndirect>, @@ -105,3 +114,6 @@ def CC_SystemZ : CallingConv<[ //===----------------------------------------------------------------------===// def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), (sequence "F%dD", 8, 15))>; + +// R9 is used to return SwiftError; remove it from CSR. +def CSR_SystemZ_SwiftError : CalleeSavedRegs<(sub CSR_SystemZ, R9D)>; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d23828bbe2d..58a4d2508a3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -989,9 +989,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, } static bool canUseSiblingCall(const CCState &ArgCCInfo, - SmallVectorImpl &ArgLocs) { + SmallVectorImpl &ArgLocs, + SmallVectorImpl &Outs) { // Punt if there are any indirect or stack arguments, or if the call - // needs the call-saved argument register R6. + // needs the callee-saved argument register R6, or if the call uses + // the callee-saved register arguments SwiftSelf and SwiftError. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; if (VA.getLocInfo() == CCValAssign::Indirect) @@ -1001,6 +1003,8 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo, unsigned Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; + if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) + return false; } return true; } @@ -1034,7 +1038,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // We don't support GuaranteedTailCallOpt, only automatically-detected // sibling calls. - if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) + if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) IsTailCall = false; // Get a count of how many bytes are to be pushed on the stack. diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index e6e3e1701ae..e6329c79439 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -452,6 +452,10 @@ class SystemZTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool supportSwiftError() const override { + return true; + } + private: const SystemZSubtarget &Subtarget; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6fd24e3df62..2bc4e8728be 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -24,12 +24,20 @@ SystemZRegisterInfo::SystemZRegisterInfo() const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + if (MF->getSubtarget().getTargetLowering()->supportSwiftError() && + MF->getFunction()->getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_SaveList; return CSR_SystemZ_SaveList; } const uint32_t * SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { + if (MF.getSubtarget().getTargetLowering()->supportSwiftError() && + MF.getFunction()->getAttributes().hasAttrSomewhere( + Attribute::SwiftError)) + return CSR_SystemZ_SwiftError_RegMask; return CSR_SystemZ_RegMask; } diff --git a/test/CodeGen/SystemZ/swift-return.ll b/test/CodeGen/SystemZ/swift-return.ll new file mode 100644 index 00000000000..e72d6def84e --- /dev/null +++ b/test/CodeGen/SystemZ/swift-return.ll @@ -0,0 +1,203 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -O0 -verify-machineinstrs | FileCheck --check-prefix=CHECK-O0 %s + +@var = global i32 0 + +; Test how llvm handles return type of {i16, i8}. The return value will be +; passed in %r2 and %r3. +; CHECK-LABEL: test: +; CHECK: st %r2 +; CHECK: brasl %r14, gen +; CHECK-DAG: lhr %r2, %r2 +; CHECK-DAG: lbr %[[REG1:r[0-9]+]], %r3 +; CHECK: ar %r2, %[[REG1]] +; CHECK-O0-LABEL: test +; CHECK-O0: st %r2 +; CHECK-O0: brasl %r14, gen +; CHECK-O0-DAG: lhr %[[REG1:r[0-9]+]], %r2 +; CHECK-O0-DAG: lbr %[[REG2:r[0-9]+]], %r3 +; CHECK-O0: ar %[[REG1]], %[[REG2]] +; CHECK-O0: lr %r2, %[[REG1]] +define i16 @test(i32 %key) { +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swiftcc { i16, i8 } @gen(i32 %0) + %v3 = extractvalue { i16, i8 } %call, 0 + %v1 = sext i16 %v3 to i32 + %v5 = extractvalue { i16, i8 } %call, 1 + %v2 = sext i8 %v5 to i32 + %add = add nsw i32 %v1, %v2 + %conv = trunc i32 %add to i16 + ret i16 %conv +} + +declare swiftcc { i16, i8 } @gen(i32) + +; If we can't pass every return value in registers, we will pass everything +; in memroy. The caller provides space for the return value and passes +; the address in %r2. The first input argument will be in %r3. +; CHECK-LABEL: test2: +; CHECK: lr %[[REG1:r[0-9]+]], %r2 +; CHECK-DAG: la %r2, 160(%r15) +; CHECK-DAG: lr %r3, %[[REG1]] +; CHECK: brasl %r14, gen2 +; CHECK: l %r2, 160(%r15) +; CHECK: a %r2, 164(%r15) +; CHECK: a %r2, 168(%r15) +; CHECK: a %r2, 172(%r15) +; CHECK: a %r2, 176(%r15) +; CHECK-O0-LABEL: test2: +; CHECK-O0: la %[[REG1:r[0-9]+]], 168(%r15) +; CHECK-O0: st %r2, [[SPILL1:[0-9]+]](%r15) +; CHECK-O0: lgr %r2, %[[REG1]] +; CHECK-O0: l %r3, [[SPILL1]](%r15) +; CHECK-O0: brasl %r14, gen2 +; CHECK-O0-DAG: l %r{{.*}}, 184(%r15) +; CHECK-O0-DAG: l %r{{.*}}, 180(%r15) +; CHECK-O0-DAG: l %r{{.*}}, 176(%r15) +; CHECK-O0-DAG: l %r{{.*}}, 172(%r15) +; CHECK-O0-DAG: l %r{{.*}}, 168(%r15) +; CHECK-O0: ar +; CHECK-O0: ar +; CHECK-O0: ar +; CHECK-O0: ar +; CHECK-O0: lr %r2 +define i32 @test2(i32 %key) #0 { +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swiftcc { i32, i32, i32, i32, i32 } @gen2(i32 %0) + + %v3 = extractvalue { i32, i32, i32, i32, i32 } %call, 0 + %v5 = extractvalue { i32, i32, i32, i32, i32 } %call, 1 + %v6 = extractvalue { i32, i32, i32, i32, i32 } %call, 2 + %v7 = extractvalue { i32, i32, i32, i32, i32 } %call, 3 + %v8 = extractvalue { i32, i32, i32, i32, i32 } %call, 4 + + %add = add nsw i32 %v3, %v5 + %add1 = add nsw i32 %add, %v6 + %add2 = add nsw i32 %add1, %v7 + %add3 = add nsw i32 %add2, %v8 + ret i32 %add3 +} + +; The address of the return value is passed in %r2. +; On return, %r2 will contain the adddress that has been passed in by the caller in %r2. +; CHECK-LABEL: gen2: +; CHECK: st %r3, 16(%r2) +; CHECK: st %r3, 12(%r2) +; CHECK: st %r3, 8(%r2) +; CHECK: st %r3, 4(%r2) +; CHECK: st %r3, 0(%r2) +; CHECK-O0-LABEL: gen2: +; CHECK-O0-DAG: st %r3, 16(%r2) +; CHECK-O0-DAG: st %r3, 12(%r2) +; CHECK-O0-DAG: st %r3, 8(%r2) +; CHECK-O0-DAG: st %r3, 4(%r2) +; CHECK-O0-DAG: st %r3, 0(%r2) +define swiftcc { i32, i32, i32, i32, i32 } @gen2(i32 %key) { + %Y = insertvalue { i32, i32, i32, i32, i32 } undef, i32 %key, 0 + %Z = insertvalue { i32, i32, i32, i32, i32 } %Y, i32 %key, 1 + %Z2 = insertvalue { i32, i32, i32, i32, i32 } %Z, i32 %key, 2 + %Z3 = insertvalue { i32, i32, i32, i32, i32 } %Z2, i32 %key, 3 + %Z4 = insertvalue { i32, i32, i32, i32, i32 } %Z3, i32 %key, 4 + ret { i32, i32, i32, i32, i32 } %Z4 +} + +; The return value {i32, i32, i32, i32} will be returned via registers +; %r2, %r3, %r4, %r5. +; CHECK-LABEL: test3: +; CHECK: brasl %r14, gen3 +; CHECK: ar %r2, %r3 +; CHECK: ar %r2, %r4 +; CHECK: ar %r2, %r5 +; CHECK-O0-LABEL: test3: +; CHECK-O0: brasl %r14, gen3 +; CHECK-O0: ar %r2, %r3 +; CHECK-O0: ar %r2, %r4 +; CHECK-O0: ar %r2, %r5 +define i32 @test3(i32 %key) #0 { +entry: + %key.addr = alloca i32, align 4 + store i32 %key, i32* %key.addr, align 4 + %0 = load i32, i32* %key.addr, align 4 + %call = call swiftcc { i32, i32, i32, i32 } @gen3(i32 %0) + + %v3 = extractvalue { i32, i32, i32, i32 } %call, 0 + %v5 = extractvalue { i32, i32, i32, i32 } %call, 1 + %v6 = extractvalue { i32, i32, i32, i32 } %call, 2 + %v7 = extractvalue { i32, i32, i32, i32 } %call, 3 + + %add = add nsw i32 %v3, %v5 + %add1 = add nsw i32 %add, %v6 + %add2 = add nsw i32 %add1, %v7 + ret i32 %add2 +} + +declare swiftcc { i32, i32, i32, i32 } @gen3(i32 %key) + +; The return value {float, float, float, float} will be returned via registers +; %f0, %f2, %f4, %f6. +; CHECK-LABEL: test4: +; CHECK: brasl %r14, gen4 +; CHECK: aebr %f0, %f2 +; CHECK: aebr %f0, %f4 +; CHECK: aebr %f0, %f6 +; CHECK-O0-LABEL: test4: +; CHECK-O0: brasl %r14, gen4 +; CHECK-O0: aebr %f0, %f2 +; CHECK-O0: aebr %f0, %f4 +; CHECK-O0: aebr %f0, %f6 +define float @test4(float %key) #0 { +entry: + %key.addr = alloca float, align 4 + store float %key, float* %key.addr, align 4 + %0 = load float, float* %key.addr, align 4 + %call = call swiftcc { float, float, float, float } @gen4(float %0) + + %v3 = extractvalue { float, float, float, float } %call, 0 + %v5 = extractvalue { float, float, float, float } %call, 1 + %v6 = extractvalue { float, float, float, float } %call, 2 + %v7 = extractvalue { float, float, float, float } %call, 3 + + %add = fadd float %v3, %v5 + %add1 = fadd float %add, %v6 + %add2 = fadd float %add1, %v7 + ret float %add2 +} + +declare swiftcc { float, float, float, float } @gen4(float %key) + +; CHECK-LABEL: consume_i1_ret: +; CHECK: brasl %r14, produce_i1_ret +; CHECK: nilf %r2, 1 +; CHECK: nilf %r3, 1 +; CHECK: nilf %r4, 1 +; CHECK: nilf %r5, 1 +; CHECK-O0-LABEL: consume_i1_ret: +; CHECK-O0: brasl %r14, produce_i1_ret +; CHECK-O0: nilf %r2, 1 +; CHECK-O0: nilf %r3, 1 +; CHECK-O0: nilf %r4, 1 +; CHECK-O0: nilf %r5, 1 +define void @consume_i1_ret() { + %call = call swiftcc { i1, i1, i1, i1 } @produce_i1_ret() + %v3 = extractvalue { i1, i1, i1, i1 } %call, 0 + %v5 = extractvalue { i1, i1, i1, i1 } %call, 1 + %v6 = extractvalue { i1, i1, i1, i1 } %call, 2 + %v7 = extractvalue { i1, i1, i1, i1 } %call, 3 + %val = zext i1 %v3 to i32 + store i32 %val, i32* @var + %val2 = zext i1 %v5 to i32 + store i32 %val2, i32* @var + %val3 = zext i1 %v6 to i32 + store i32 %val3, i32* @var + %val4 = zext i1 %v7 to i32 + store i32 %val4, i32* @var + ret void +} + +declare swiftcc { i1, i1, i1, i1 } @produce_i1_ret() diff --git a/test/CodeGen/SystemZ/swifterror.ll b/test/CodeGen/SystemZ/swifterror.ll new file mode 100644 index 00000000000..90d55eef4ae --- /dev/null +++ b/test/CodeGen/SystemZ/swifterror.ll @@ -0,0 +1,358 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu| FileCheck %s +; RUN: llc < %s -O0 -mtriple=s390x-linux-gnu | FileCheck --check-prefix=CHECK-O0 %s + +declare i8* @malloc(i64) +declare void @free(i8*) +%swift_error = type {i64, i8} + +; This tests the basic usage of a swifterror parameter. "foo" is the function +; that takes a swifterror parameter and "caller" is the caller of "foo". +define float @foo(%swift_error** swifterror %error_ptr_ref) { +; CHECK-LABEL: foo: +; CHECK: lghi %r2, 16 +; CHECK: brasl %r14, malloc +; CHECK: mvi 8(%r2), 1 +; CHECK: lgr %r9, %r2 +; CHECK-O0-LABEL: foo: +; CHECK-O0: lghi %r2, 16 +; CHECK-O0: brasl %r14, malloc +; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: lgr %r9, %r[[REG1]] +entry: + %call = call i8* @malloc(i64 16) + %call.0 = bitcast i8* %call to %swift_error* + store %swift_error* %call.0, %swift_error** %error_ptr_ref + %tmp = getelementptr inbounds i8, i8* %call, i64 8 + store i8 1, i8* %tmp + ret float 1.0 +} + +; "caller" calls "foo" that takes a swifterror parameter. +define float @caller(i8* %error_ref) { +; CHECK-LABEL: caller: +; Make a copy of error_ref because r2 is getting clobbered +; CHECK: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK: lghi %r9, 0 +; CHECK: brasl %r14, foo +; CHECK: cgijlh %r9, 0, +; Access part of the error object and save it to error_ref +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) +; CHECK: lgr %r2, %r9 +; CHECK: brasl %r14, free +; CHECK-O0-LABEL: caller: +; CHECK-O0: lghi %r9, 0 +; CHECK-O0: brasl %r14, foo +; CHECK-O0: cghi %r9, 0 +; CHECK-O0: jlh +entry: + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + %call = call float @foo(%swift_error** swifterror %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} + +; "caller2" is the caller of "foo", it calls "foo" inside a loop. +define float @caller2(i8* %error_ref) { +; CHECK-LABEL: caller2: +; Make a copy of error_ref because r2 is getting clobbered +; CHECK: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK: lghi %r9, 0 +; CHECK: brasl %r14, foo +; CHECK: cgijlh %r9, 0, +; CHECK: ceb %f0, +; CHECK: jnh +; Access part of the error object and save it to error_ref +; CHECK: lb %r[[REG2:[0-9]+]], 8(%r9) +; CHECK: stc %r[[REG2]], 0(%r[[REG1]]) +; CHECK: lgr %r2, %r9 +; CHECK: brasl %r14, free +; CHECK-O0-LABEL: caller2: +; CHECK-O0: lghi %r9, 0 +; CHECK-O0: brasl %r14, foo +; CHECK-O0: cghi %r9, 0 +; CHECK-O0: jlh +entry: + %error_ptr_ref = alloca swifterror %swift_error* + br label %bb_loop +bb_loop: + store %swift_error* null, %swift_error** %error_ptr_ref + %call = call float @foo(%swift_error** swifterror %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %cmp = fcmp ogt float %call, 1.000000e+00 + br i1 %cmp, label %bb_end, label %bb_loop +bb_end: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} + +; "foo_if" is a function that takes a swifterror parameter, it sets swifterror +; under a certain condition. +define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { +; CHECK-LABEL: foo_if: +; CHECK: cije %r2, 0 +; CHECK: lghi %r2, 16 +; CHECK: brasl %r14, malloc +; CHECK: mvi 8(%r2), 1 +; CHECK: lgr %r9, %r2 +; CHECK-NOT: %r9 +; CHECK: br %r14 +; CHECK-O0-LABEL: foo_if: +; CHECK-O0: chi %r2, 0 +; spill to stack +; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: je +; CHECK-O0: lghi %r2, 16 +; CHECK-O0: brasl %r14, malloc +; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: lgr %r9, %r[[REG1]] +; CHECK-O0: br %r14 +; reload from stack +; CHECK-O0: lg %r9, [[OFFS]](%r15) +; CHECK-O0: br %r14 +entry: + %cond = icmp ne i32 %cc, 0 + br i1 %cond, label %gen_error, label %normal + +gen_error: + %call = call i8* @malloc(i64 16) + %call.0 = bitcast i8* %call to %swift_error* + store %swift_error* %call.0, %swift_error** %error_ptr_ref + %tmp = getelementptr inbounds i8, i8* %call, i64 8 + store i8 1, i8* %tmp + ret float 1.0 + +normal: + ret float 0.0 +} + +; "foo_loop" is a function that takes a swifterror parameter, it sets swifterror +; under a certain condition inside a loop. +define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { +; CHECK-LABEL: foo_loop: +; CHECK: lr %r[[REG1:[0-9]+]], %r2 +; CHECK: cije %r[[REG1]], 0 +; CHECK: lghi %r2, 16 +; CHECK: brasl %r14, malloc +; CHECK: mvi 8(%r2), 1 +; CHECK: ceb %f8, +; CHECK: jnh +; CHECK: lgr %r9, %r2 +; CHECK: br %r14 +; CHECK-O0-LABEL: foo_loop: +; spill to stack +; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: chi %r{{.*}}, 0 +; CHECK-O0: je +; CHECK-O0: lghi %r2, 16 +; CHECK-O0: brasl %r14, malloc +; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: jnh +; reload from stack +; CHECK-O0: lg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: br %r14 +entry: + br label %bb_loop + +bb_loop: + %cond = icmp ne i32 %cc, 0 + br i1 %cond, label %gen_error, label %bb_cont + +gen_error: + %call = call i8* @malloc(i64 16) + %call.0 = bitcast i8* %call to %swift_error* + store %swift_error* %call.0, %swift_error** %error_ptr_ref + %tmp = getelementptr inbounds i8, i8* %call, i64 8 + store i8 1, i8* %tmp + br label %bb_cont + +bb_cont: + %cmp = fcmp ogt float %cc2, 1.000000e+00 + br i1 %cmp, label %bb_end, label %bb_loop +bb_end: + ret float 0.0 +} + +%struct.S = type { i32, i32, i32, i32, i32, i32 } + +; "foo_sret" is a function that takes a swifterror parameter, it also has a sret +; parameter. +define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) { +; CHECK-LABEL: foo_sret: +; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-DAG: lr %r[[REG2:[0-9]+]], %r3 +; CHECK: lghi %r2, 16 +; CHECK: brasl %r14, malloc +; CHECK: mvi 8(%r2), 1 +; CHECK: st %r[[REG2]], 4(%r[[REG1]]) +; CHECK: lgr %r9, %r2 +; CHECK-NOT: %r9 +; CHECK: br %r14 + +; CHECK-O0-LABEL: foo_sret: +; CHECK-O0: lghi %r{{.*}}, 16 +; spill sret to stack +; CHECK-O0: stg %r2, [[OFFS1:[0-9]+]](%r15) +; CHECK-O0: lgr %r2, %r{{.*}} +; CHECK-O0: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0: brasl %r14, malloc +; CHECK-O0: lgr {{.*}}, %r2 +; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0-DAG: lg %r[[REG1:[0-9]+]], [[OFFS1]](%r15) +; CHECK-O0-DAG: l %r[[REG2:[0-9]+]], [[OFFS2]](%r15) +; CHECK-O0: st %r[[REG2]], 4(%r[[REG1]]) +; CHECK-O0: lgr %r9, {{.*}} +; CHECK-O0: br %r14 +entry: + %call = call i8* @malloc(i64 16) + %call.0 = bitcast i8* %call to %swift_error* + store %swift_error* %call.0, %swift_error** %error_ptr_ref + %tmp = getelementptr inbounds i8, i8* %call, i64 8 + store i8 1, i8* %tmp + %v2 = getelementptr inbounds %struct.S, %struct.S* %agg.result, i32 0, i32 1 + store i32 %val1, i32* %v2 + ret void +} + +; "caller3" calls "foo_sret" that takes a swifterror parameter. +define float @caller3(i8* %error_ref) { +; CHECK-LABEL: caller3: +; Make a copy of error_ref because r2 is getting clobbered +; CHECK: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK: lhi %r3, 1 +; CHECK: lghi %r9, 0 +; CHECK: brasl %r14, foo_sret +; CHECK: cgijlh %r9, 0, +; Access part of the error object and save it to error_ref +; CHECK: lb %r0, 8(%r9) +; CHECK: stc %r0, 0(%r[[REG1]]) +; CHECK: lgr %r2, %r9 +; CHECK: brasl %r14, free + +; CHECK-O0-LABEL: caller3: +; CHECK-O0: lghi %r9, 0 +; CHECK-O0: lhi %r3, 1 +; CHECK-O0: stg %r2, {{.*}}(%r15) +; CHECK-O0: lgr %r2, {{.*}} +; CHECK-O0: brasl %r14, foo_sret +; CHECK-O0: lgr {{.*}}, %r9 +; CHECK-O0: cghi %r9, 0 +; CHECK-O0: jlh +; Access part of the error object and save it to error_ref +; CHECK-O0: lb %r0, 8(%r{{.*}}) +; CHECK-O0: stc %r0, 0(%r{{.*}}) +; reload from stack +; CHECK-O0: lg %r2, {{.*}}(%r15) +; CHECK-O0: brasl %r14, free +entry: + %s = alloca %struct.S, align 8 + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + call void @foo_sret(%struct.S* sret %s, i32 1, %swift_error** swifterror %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} + +; This is a caller with multiple swifterror values, it calls "foo" twice, each +; time with a different swifterror value, from "alloca swifterror". +define float @caller_with_multiple_swifterror_values(i8* %error_ref, i8* %error_ref2) { +; CHECK-LABEL: caller_with_multiple_swifterror_values: +; CHECK-DAG: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-DAG: lgr %r[[REG2:[0-9]+]], %r3 +; The first swifterror value: +; CHECK: lghi %r9, 0 +; CHECK: brasl %r14, foo +; CHECK: cgijlh %r9, 0, +; Access part of the error object and save it to error_ref +; CHECK: lb %r0, 8(%r9) +; CHECK: stc %r0, 0(%r[[REG1]]) +; CHECK: lgr %r2, %r9 +; CHECK: brasl %r14, free + +; The second swifterror value: +; CHECK: lghi %r9, 0 +; CHECK: brasl %r14, foo +; CHECK: cgijlh %r9, 0, +; Access part of the error object and save it to error_ref +; CHECK: lb %r0, 8(%r9) +; CHECK: stc %r0, 0(%r[[REG2]]) +; CHECK: lgr %r2, %r9 +; CHECK: brasl %r14, free + +; CHECK-O0-LABEL: caller_with_multiple_swifterror_values: + +; The first swifterror value: +; CHECK-O0: lghi %r9, 0 +; CHECK-O0: brasl %r14, foo +; CHECK-O0: jlh + +; The second swifterror value: +; CHECK-O0: lghi %r9, 0 +; CHECK-O0: brasl %r14, foo +; CHECK-O0: jlh +entry: + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + %call = call float @foo(%swift_error** swifterror %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + + %error_ptr_ref2 = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref2 + %call2 = call float @foo(%swift_error** swifterror %error_ptr_ref2) + %error_from_foo2 = load %swift_error*, %swift_error** %error_ptr_ref2 + %had_error_from_foo2 = icmp ne %swift_error* %error_from_foo2, null + %bitcast2 = bitcast %swift_error* %error_from_foo2 to i8* + br i1 %had_error_from_foo2, label %handler2, label %cont2 +cont2: + %v2 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo2, i64 0, i32 1 + %t2 = load i8, i8* %v2 + store i8 %t2, i8* %error_ref2 + br label %handler2 +handler2: + call void @free(i8* %bitcast2) + + ret float 1.0 +} diff --git a/test/CodeGen/SystemZ/swiftself.ll b/test/CodeGen/SystemZ/swiftself.ll new file mode 100644 index 00000000000..ee6104ad203 --- /dev/null +++ b/test/CodeGen/SystemZ/swiftself.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Parameter with swiftself should be allocated to r10. +; CHECK-LABEL: swiftself_param: +; CHECK: lgr %r2, %r10 +define i8 *@swiftself_param(i8* swiftself %addr0) { + ret i8 *%addr0 +} + +; Check that r10 is used to pass a swiftself argument. +; CHECK-LABEL: call_swiftself: +; CHECK: lgr %r10, %r2 +; CHECK: brasl %r14, swiftself_param +define i8 *@call_swiftself(i8* %arg) { + %res = call i8 *@swiftself_param(i8* swiftself %arg) + ret i8 *%res +} + +; r10 should be saved by the callee even if used for swiftself +; CHECK-LABEL: swiftself_clobber: +; CHECK: stmg %r10, +; ... +; CHECK: lmg %r10, +; CHECK: br %r14 +define i8 *@swiftself_clobber(i8* swiftself %addr0) { + call void asm sideeffect "", "~{r10}"() + ret i8 *%addr0 +} + +; Demonstrate that we do not need any loads when calling multiple functions +; with swiftself argument. +; CHECK-LABEL: swiftself_passthrough: +; CHECK-NOT: lg{{.*}}r10, +; CHECK: brasl %r14, swiftself_param +; CHECK-NOT: lg{{.*}}r10, +; CHECK-NEXT: brasl %r14, swiftself_param +define void @swiftself_passthrough(i8* swiftself %addr0) { + call i8 *@swiftself_param(i8* swiftself %addr0) + call i8 *@swiftself_param(i8* swiftself %addr0) + ret void +} + +; Normally, we can use a tail call if the callee swiftself is the same as the +; caller one. Not yet supported on SystemZ. +; CHECK-LABEL: swiftself_tail: +; CHECK: lgr %r[[REG1:[0-9]+]], %r10 +; CHECK: lgr %r10, %r[[REG1]] +; CHECK: brasl %r14, swiftself_param +; CHECK: br %r14 +define i8* @swiftself_tail(i8* swiftself %addr0) { + call void asm sideeffect "", "~{r10}"() + %res = tail call i8* @swiftself_param(i8* swiftself %addr0) + ret i8* %res +} + +; We can not use a tail call if the callee swiftself is not the same as the +; caller one. +; CHECK-LABEL: swiftself_notail: +; CHECK: lgr %r10, %r2 +; CHECK: brasl %r14, swiftself_param +; CHECK: lmg %r10, +; CHECK: br %r14 +define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { + %res = tail call i8* @swiftself_param(i8* swiftself %addr1) + ret i8* %res +} From 3f4217ad3531b7323b24cc5eb7732a29c5286520 Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Fri, 13 May 2016 17:23:48 +0000 Subject: [PATCH 5/5] [RuntimeDyld] Support R_390_PC64 relocation type Summary: When the MCJIT generates ELF code, some DWARF data requires 64-bit PC-relative relocation (R_390_PC64). This patch adds support for R_390_PC64 relocation to RuntimeDyld::resolveSystemZRelocation, to avoid an assertion failure. Reviewers: uweigand Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D20033 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269436 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../RuntimeDyld/RuntimeDyldELF.cpp | 5 +++ .../SystemZ/Inputs/rtdyld-globals.ll | 1 + .../RuntimeDyld/SystemZ/cfi-relo-pc64.s | 32 +++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 test/ExecutionEngine/RuntimeDyld/SystemZ/Inputs/rtdyld-globals.ll create mode 100644 test/ExecutionEngine/RuntimeDyld/SystemZ/cfi-relo-pc64.s diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index e09b71af18a..35fc4dea020 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1047,6 +1047,11 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, case ELF::R_390_64: writeInt64BE(LocalAddress, Value + Addend); break; + case ELF::R_390_PC64: { + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); + writeInt64BE(LocalAddress, Delta); + break; + } } } diff --git a/test/ExecutionEngine/RuntimeDyld/SystemZ/Inputs/rtdyld-globals.ll b/test/ExecutionEngine/RuntimeDyld/SystemZ/Inputs/rtdyld-globals.ll new file mode 100644 index 00000000000..165e8ace796 --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/SystemZ/Inputs/rtdyld-globals.ll @@ -0,0 +1 @@ +@foo = global i8 0 diff --git a/test/ExecutionEngine/RuntimeDyld/SystemZ/cfi-relo-pc64.s b/test/ExecutionEngine/RuntimeDyld/SystemZ/cfi-relo-pc64.s new file mode 100644 index 00000000000..6fc166d612f --- /dev/null +++ b/test/ExecutionEngine/RuntimeDyld/SystemZ/cfi-relo-pc64.s @@ -0,0 +1,32 @@ +// Test that R_390_PC32 and R_390_PC64 relocations can be generated. +// RUN: llvm-mc -triple s390x-linux-gnu -relocation-model=pic -filetype=obj %s -o - | llvm-readobj -s -sr -sd | FileCheck %s + +// Test that RuntimeDyld can fix up such relocations. +// RUN: llvm-mc -triple s390x-linux-gnu -relocation-model=pic -filetype=obj %s -o %T/test-s390x-cfi-relo-pc64.o +// RUN: llc -mtriple=s390x-linux-gnu -relocation-model=pic -filetype=obj %S/Inputs/rtdyld-globals.ll -o %T/test-s390x-rtdyld-globals.o +// RUN: llvm-rtdyld -triple=s390x-linux-gnu -verify %T/test-s390x-cfi-relo-pc64.o %T/test-s390x-rtdyld-globals.o + +f1: + .cfi_startproc + .cfi_personality 0x9c, foo // DW_EH_PE_indirect|DW_EH_PE_pcrel|DW_EH_PE_sdata8 (0x9c) + lr %r0, %r0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 48 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x12 R_390_PC64 foo 0x0 +// CHECK-NEXT: 0x28 R_390_PC32 .text 0x0 +// CHECK-NEXT: ] +// CHECK: }