From aa3519f178fc6ea563f950a4077b34d8dc6c4470 Mon Sep 17 00:00:00 2001 From: Anirudh Prasad Date: Thu, 21 Oct 2021 09:48:21 -0400 Subject: [PATCH] [SystemZ][z/OS] Initial implementation for lowerCall on z/OS - This patch provides the initial implementation for lowering a call on z/OS according to the XPLINK64 calling convention - A series of changes have been made to SystemZCallingConv.td to account for these additional XPLINK64 changes including adding a new helper function to shadow the stack along with allocation of a register wherever appropriate - For the cases of copying a f64 to a gr64 and a f128 / 128-bit vector type to a gr64, a `CCBitConvertToType` has been added and has been bitcasted appropriately in the lowering phase - Support for the ADA register (R5) will be provided in a later patch. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D111662 --- .../lib/Target/SystemZ/SystemZCallingConv.cpp | 4 + llvm/lib/Target/SystemZ/SystemZCallingConv.h | 71 +++++-- llvm/lib/Target/SystemZ/SystemZCallingConv.td | 33 +-- .../Target/SystemZ/SystemZISelLowering.cpp | 69 +++++-- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 14 ++ llvm/lib/Target/TargetMachine.cpp | 3 + llvm/test/CodeGen/SystemZ/call-zos-01.ll | 191 +++++++++++++++++ llvm/test/CodeGen/SystemZ/call-zos-vararg.ll | 195 ++++++++++++++++++ llvm/test/CodeGen/SystemZ/call-zos-vec.ll | 66 ++++++ 9 files changed, 601 insertions(+), 45 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/call-zos-01.ll create mode 100644 llvm/test/CodeGen/SystemZ/call-zos-vararg.ll create mode 100644 llvm/test/CodeGen/SystemZ/call-zos-vec.ll diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp index 86eb8365d527..9c73757d7f5c 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = { const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = { SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D }; + +const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = { + SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, + SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31}; diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h index 96c1080d5237..f82c61c0f344 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -27,6 +27,9 @@ namespace SystemZ { const unsigned XPLINK64NumArgFPRs = 4; extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs]; + + const unsigned XPLINK64NumArgVRs = 8; + extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs]; } // end namespace SystemZ class SystemZCCState : public CCState { @@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT, else llvm_unreachable("Unknown Calling Convention!"); - unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8)); + unsigned Offset = Reg && !Subtarget.isTargetXPLINK64() + ? 0 + : State.AllocateStack(8, Align(8)); // Use that same location for all the pending parts. for (auto &It : PendingMembers) { @@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - if (LocVT.getSizeInBits() < 128) - return false; - - if (static_cast(&State)->IsFixed(ValNo)) - return false; - // For any C or C++ program, this should always be // false, since it is illegal to have a function // where the first argument is variadic. Therefore @@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT, bool AllocGPR3 = State.AllocateReg(SystemZ::R3D); // If GPR2 and GPR3 are available, then we may pass vararg in R2Q. - if (AllocGPR2 && AllocGPR3) { - State.addLoc( - CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo)); + // If only GPR3 is available, we need to set custom handling to copy + // hi bits into GPR3. + // Either way, we allocate on the stack. + if (AllocGPR3) { + // For f128 and vector var arg case, set the bitcast flag to bitcast to + // i128. + LocVT = MVT::i128; + LocInfo = CCValAssign::BCvt; + auto Offset = State.AllocateStack(16, Align(8)); + if (AllocGPR2) + State.addLoc( + CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo)); + else + State.addLoc( + CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } - // If only GPR3 is available, we allocate on stack but need to - // set custom handling to copy hi bits into GPR3. - if (!AllocGPR2 && AllocGPR3) { - auto Offset = State.AllocateStack(16, Align(8)); - State.addLoc( - CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return true; + return false; +} + +inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + ArrayRef RegList; + + switch (LocVT.SimpleTy) { + case MVT::i64: + RegList = SystemZ::XPLINK64ArgGPRs; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + RegList = SystemZ::XPLINK64ArgVRs; + break; + case MVT::f32: + case MVT::f64: + case MVT::f128: + RegList = SystemZ::XPLINK64ArgFPRs; + break; + default: + return false; } + unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList); + // Every time we can allocate a register, allocate on the stack. + if (UnallocatedRegisterIndex < RegList.size()) + State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8)); + return false; } diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td index c606e78b69b6..373023effb4a 100644 --- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -224,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // XPLINK64 ABI compliant code widens integral types smaller than i64 // to i64 before placing the parameters either on the stack or in registers. CCIfType<[i32], CCIfExtend>>, + // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS. + CCIfType<[f32], CCIfNotFixed>>, + CCIfType<[f64], CCIfNotFixed>>, + // long double, can only be passed in GPR2 and GPR3, if available, + // hence R2Q + CCIfType<[f128], CCIfNotFixed>>, + // Non fixed vector arguments are treated in the same way as long + // doubles. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfNotFixed>>>, // A SwiftSelf is passed in callee-saved R10. CCIfSwiftSelf>>, @@ -238,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // The first 3 integer arguments are passed in registers R1D-R3D. // The rest will be passed in the user area. The address offset of the user // area can be found in register R4D. - CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>, + CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>, CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>, // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors @@ -247,6 +258,9 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfFixed>>>, + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed>>>, CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfFixed>>, + CCIfType<[f32, f64], CCIfFixed>>, CCIfType<[f32], CCIfFixed>>, CCIfType<[f64], CCIfFixed>>, // The first 2 long double arguments are passed in register FPR0/FPR2 // and FPR4/FPR6. The rest will be passed in the user area. CCIfType<[f128], CCIfFixed>>, + CCIfType<[f128], CCIfFixed>>, CCIfType<[f128], CCIfFixed>>, - // Non fixed floats are passed in GPRs - // Promote f32 to f64, if it needs to be passed in GPRs. - CCIfType<[f32], CCIfNotFixed>>, - // Assign f64 varargs to their proper GPRs. - CCIfType<[f64], CCIfNotFixed>>, - // long double, can only be passed in GPR2 and GPR3, if available, - // hence R2Q - CCIfType<[f128], CCIfNotFixed>>, - - // Non fixed vector arguments are treated in the same way as long - // doubles. - CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfNotFixed>>>, - // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, // Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e3a79e6d1a99..a9bc4f30fff6 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1358,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::AExt: return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); - case CCValAssign::BCvt: - // If this is a short vector argument to be stored to the stack, + case CCValAssign::BCvt: { + assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); + assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || + VA.getValVT() == MVT::f128); + MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 + ? MVT::v2i64 + : VA.getLocVT(); + Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); + // For ELF, this is a short vector argument to be stored to the stack, // bitcast to v2i64 and then extract first element. - assert(VA.getLocVT() == MVT::i64); - assert(VA.getValVT().isVector()); - Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, - DAG.getConstant(0, DL, MVT::i32)); + if (BitCastToType == MVT::v2i64) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, + DAG.getConstant(0, DL, MVT::i32)); + return Value; + } case CCValAssign::Full: return Value; default: @@ -1472,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments( NumFixedFPRs += 1; RC = &SystemZ::FP64BitRegClass; break; + case MVT::f128: + NumFixedFPRs += 2; + RC = &SystemZ::FP128BitRegClass; + break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: @@ -1525,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments( InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); } - if (IsVarArg) { + // FIXME: Add support for lowering varargs for XPLINK64 in a later patch. + if (IsVarArg && Subtarget.isTargetELF()) { // Save the number of non-varargs registers for later use by va_start, etc. FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); @@ -1564,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments( } } + // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special + // register (R5) return Chain; } @@ -1604,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); LLVMContext &Ctx = *DAG.getContext(); + SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); + + // FIXME: z/OS support to be added in later. + if (Subtarget.isTargetXPLINK64()) + IsTailCall = false; // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { @@ -1624,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + if (Subtarget.isTargetXPLINK64()) + // Although the XPLINK specifications for AMODE64 state that minimum size + // of the param area is minimum 32 bytes and no rounding is otherwise + // specified, we round this area in 64 bytes increments to be compatible + // with existing compilers. + NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64)); + // Mark the start of the call. if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); @@ -1674,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); - if (VA.isRegLoc()) + if (VA.isRegLoc()) { + // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a + // MVT::i128 type. We decompose the 128-bit type to a pair of its high + // and low values. + if (VA.getLocVT() == MVT::i128) + ArgValue = lowerI128ToGR128(DAG, ArgValue); // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); - else { + } else { assert(VA.isMemLoc() && "Argument not register or memory"); // Work out the address of the stack slot. Unpromoted ints and // floats are passed as right-justified 8-byte values. if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); - unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset(); + StackPtr = DAG.getCopyFromReg(Chain, DL, + Regs->getStackPointerRegister(), PtrVT); + unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + + VA.getLocMemOffset(); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, @@ -1693,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the store. MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); + + // Although long doubles or vectors are passed through the stack when + // they are vararg (non-fixed arguments), if a long double or vector + // occupies the third and fourth slot of the argument list GPR3 should + // still shadow the third slot of the argument list. + if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { + SDValue ShadowArgValue = + DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, + DAG.getIntPtrConstant(1, DL)); + RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); + } } } @@ -1704,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // associated Target* opcodes. Force %r1 to be used for indirect // tail calls. SDValue Glue; + // FIXME: Add support for XPLINK using the ADA register. if (auto *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 2a4253e2deaf..8ce01074873a 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -66,6 +66,12 @@ public: virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const = 0; + /// \returns the offset to the locals area. + virtual int getCallFrameSize() = 0; + + /// \returns the stack pointer bias. + virtual int getStackPointerBias() = 0; + /// Destroys the object. Bogus destructor allowing derived classes /// to override it. virtual ~SystemZCallingConventionRegisters(){}; @@ -91,6 +97,10 @@ public: const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override final; + int getCallFrameSize() override final { return 128; } + + int getStackPointerBias() override final { return 2048; } + /// Destroys the object. Bogus destructor overriding base class destructor ~SystemZXPLINK64Registers(){}; }; @@ -113,6 +123,10 @@ public: const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override final; + int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; } + + int getStackPointerBias() override final { return 0; } + /// Destroys the object. Bogus destructor overriding base class destructor ~SystemZELFRegisters(){}; }; diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 08295df376e1..390457dbb2bc 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -135,6 +135,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, return true; } + if (TT.isOSBinFormatGOFF()) + return true; + if (TT.isOSBinFormatMachO()) { if (RM == Reloc::Static) return true; diff --git a/llvm/test/CodeGen/SystemZ/call-zos-01.ll b/llvm/test/CodeGen/SystemZ/call-zos-01.ll new file mode 100644 index 000000000000..7194d09cba16 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/call-zos-01.ll @@ -0,0 +1,191 @@ +; Test the passing of scalar values in GPRs, FPRs in 64-bit calls on z/OS. +; +; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s + +; CHECK-LABEL: call_char: +; CHECK: lghi 1, 8 +define i8 @call_char(){ + %retval = call i8 (i8) @pass_char(i8 8) + ret i8 %retval +} + +; CHECK-LABEL: call_short: +; CHECK: lghi 1, 16 +define i16 @call_short() { +entry: + %retval = call i16 (i16) @pass_short(i16 16) + ret i16 %retval +} + +; CHECK-LABEL: call_int: +; CHECK: lghi 1, 32 +; CHECK: lghi 2, 33 +define i32 @call_int() { +entry: + %retval = call i32 (i32, i32) @pass_int(i32 32, i32 33) + ret i32 %retval +} + +; CHECK-LABEL: call_long: +; CHECK: lghi 1, 64 +; CHECK: lghi 2, 65 +; CHECK: lghi 3, 66 +define i64 @call_long() { +entry: + %retval = call i64 (i64, i64, i64) @pass_long(i64 64, i64 65, i64 66) + ret i64 %retval +} + +; CHECK-LABEL: call_ptr: +; CHECK: lgr 1, 2 +define i32 @call_ptr(i32* %p1, i32* %p2) { +entry: + %retval = call i32 (i32*) @pass_ptr(i32* %p2) + ret i32 %retval +} + +; CHECK-LABEL: call_integrals: +; CHECK: lghi 1, 64 +; CHECK: lghi 2, 32 +; CHECK: lghi 3, 16 +define i64 @call_integrals() { +entry: + %retval = call i64 (i64, i32, i16, i64) @pass_integrals0(i64 64, i32 32, i16 16, i64 128) + ret i64 %retval +} + +; CHECK-LABEL: pass_char: +; CHECK: lgr 3, 1 +define signext i8 @pass_char(i8 signext %arg) { +entry: + ret i8 %arg +} + +; CHECK-LABEL: pass_short: +; CHECK: lgr 3, 1 +define signext i16 @pass_short(i16 signext %arg) { +entry: + ret i16 %arg +} + +; CHECK-LABEL: pass_int: +; CHECK: lgr 3, 2 +define signext i32 @pass_int(i32 signext %arg0, i32 signext %arg1) { +entry: + ret i32 %arg1 +} + +; CHECK-LABEL: pass_long: +; CHECK: agr 1, 2 +; CHECK: agr 3, 1 +define signext i64 @pass_long(i64 signext %arg0, i64 signext %arg1, i64 signext %arg2) { +entry: + %N = add i64 %arg0, %arg1 + %M = add i64 %N, %arg2 + ret i64 %M +} + +; CHECK-LABEL: pass_integrals0: +; CHECK: ag 2, -{{[0-9]+}}(4) +; CHECK-NEXT: lgr 3, 2 +define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) { +entry: + %N = sext i32 %arg1 to i64 + %M = add i64 %arg3, %N + ret i64 %M +} + +; CHECK-LABEL: call_float: +; CHECK: le 0, 0({{[0-9]}}) +define float @call_float() { +entry: + %ret = call float (float) @pass_float(float 0x400921FB60000000) + ret float %ret +} + +; CHECK-LABEL: call_double: +; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}} +; CHECK-NEXT: ld 0, 0([[GENREG]]) +define double @call_double() { +entry: + %ret = call double (double) @pass_double(double 3.141000e+00) + ret double %ret +} + +; CHECK-LABEL: call_longdouble: +; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}} +; CHECK-NEXT: ld 0, 0([[GENREG]]) +; CHECK-NEXT: ld 2, 8([[GENREG]]) +define fp128 @call_longdouble() { +entry: + %ret = call fp128 (fp128) @pass_longdouble(fp128 0xLE0FC1518450562CD4000921FB5444261) + ret fp128 %ret +} + +; CHECK-LABEL: call_floats0 +; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}} +; CHECK-NEXT: ld 1, 0([[GENREG]]) +; CHECK-NEXT: ld 3, 8([[GENREG]]) +; CHECK: lxr 5, 0 +; CHECK: lxr 0, 1 +; CHECK: lxr 4, 5 +define i64 @call_floats0(fp128 %arg0, double %arg1) { +entry: + %ret = call i64 (fp128, fp128, double) @pass_floats0(fp128 0xLE0FC1518450562CD4000921FB5444261, fp128 %arg0, double %arg1) + ret i64 %ret +} + +; CHECK-LABEL: call_floats1 +; CHECK: lxr 1, 0 +; CHECK: ldr 0, 4 +; CHECK: lxr 4, 1 +define i64 @call_floats1(fp128 %arg0, double %arg1) { +entry: + %ret = call i64 (double, fp128) @pass_floats1(double %arg1, fp128 %arg0) + ret i64 %ret +} + +; CHECK-LABEL: pass_float: +; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}} +; CHECK: aeb 0, 0(1) +define float @pass_float(float %arg) { +entry: + %X = fadd float %arg, 0x400821FB60000000 + ret float %X +} + +; CHECK-LABEL: pass_double: +; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}} +; CHECK: adb 0, 0(1) +define double @pass_double(double %arg) { +entry: + %X = fadd double %arg, 1.414213e+00 + ret double %X +} + +; CHECK-LABEL: pass_longdouble +; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}} +; CHECK: lxdb 1, 0(1) +; CHECK: axbr 0, 1 +define fp128 @pass_longdouble(fp128 %arg) { +entry: + %X = fadd fp128 %arg, 0xL10000000000000004000921FB53C8D4F + ret fp128 %X +} + +; CHECK-LABEL: pass_floats0 +; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}} +; CHECK: axbr 0, 4 +; CHECK: axbr 1, 0 +; CHECK: cxbr 1, 5 +define i64 @pass_floats0(fp128 %arg0, fp128 %arg1, double %arg2) { + %X = fadd fp128 %arg0, %arg1 + %arg2_ext = fpext double %arg2 to fp128 + %Y = fadd fp128 %X, %arg2_ext + %ret_bool = fcmp ueq fp128 %Y, 0xLE0FC1518450562CD4000921FB5444261 + %ret = sext i1 %ret_bool to i64 + ret i64 %ret +} + +declare i64 @pass_floats1(double %arg0, fp128 %arg1) +declare i32 @pass_ptr(i32* %arg) diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll new file mode 100644 index 000000000000..2efe27172efc --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll @@ -0,0 +1,195 @@ +; Test passing variable argument lists in 64-bit calls on z/OS. +; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z14 | FileCheck %s -check-prefix=ARCH12 +; CHECK-LABEL: call_vararg_double0 +; CHECK: llihf 3, 1074118262 +; CHECK-NEXT: oilf 3, 3367254360 +; CHECK: lghi 1, 1 +; CHECK: lghi 2, 2 +define i64 @call_vararg_double0() { +entry: + %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 2.718000e+00) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_double1 +; CHECK: llihf 0, 1074118262 +; CHECK-NEXT: oilf 0, 3367254360 +; CHECK: llihf 3, 1074340036 +; CHECK-NEXT: oilf 3, 2611340116 +; CHECK: lghi 1, 1 +; CHECK: lghi 2, 2 +; CHECK: stg 0, 2200(4) +define i64 @call_vararg_double1() { +entry: + %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 3.141000e+00, double 2.718000e+00) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_double2 +; CHECK-NOT: llihf 0 +; CHECK-NOT: oilf 0 +; CHECK: llihf 2, 1074118262 +; CHECK-NEXT: oilf 2, 3367254360 +; CHECK: lghi 1, 8200 +define i64 @call_vararg_double2() { +entry: + %retval = call i64 (i64, ...) @pass_vararg2(i64 8200, double 2.718000e+00) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_double3 +; CHECK: llihf 0, 1072703839 +; CHECK-NEXT: oilf 0, 2861204133 +; CHECK: llihf 1, 1074118262 +; CHECK-NEXT: oilf 1, 3367254360 +; CHECK: llihf 2, 1074340036 +; CHECK-NEXT: oilf 2, 2611340116 +; CHECK: llihf 3, 1073127358 +; CHECK-NEXT: oilf 3, 1992864825 +; CHECK: stg 0, 2200(4) +define i64 @call_vararg_double3() { +entry: + %retval = call i64 (...) @pass_vararg3(double 2.718000e+00, double 3.141000e+00, double 1.414000e+00, double 1.010101e+00) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_both0 +; CHECK: lgr 2, 1 +; CHECK: lgdr 1, 0 +define i64 @call_vararg_both0(i64 %arg0, double %arg1) { + %retval = call i64(...) @pass_vararg3(double %arg1, i64 %arg0) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_long_double0 +; CHECK: larl 1, @CPI5_0 +; CHECK-NEXT: ld 0, 0(1) +; CHECK-NEXT: ld 2, 8(1) +; CHECK-NEXT: lgdr 3, 0 +; CHECK: lghi 1, 1 +; CHECK: lghi 2, 2 +; CHECK: std 0, 2192(4) +; CHECK-NEXT: std 2, 2200(4) +define i64 @call_vararg_long_double0() { +entry: + %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 0xLE0FC1518450562CD4000921FB5444261) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_long_double1 +; CHECK: lgdr 3, 0 +; CHECK: lghi 1, 1 +; CHECK: lghi 2, 2 +; CHECK: std 0, 2192(4) +; CHECK-NEXT: std 2, 2200(4) +define i64 @call_vararg_long_double1(fp128 %arg0) { +entry: + %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_long_double2 +; CHECK: std 4, 2208(4) +; CHECK-NEXT: std 6, 2216(4) +; CHECK: lgdr 3, 0 +; CHECK: lghi 1, 1 +; CHECK: lghi 2, 2 +; CHECK: std 0, 2192(4) +; CHECK-NEXT: std 2, 2200(4) +define i64 @call_vararg_long_double2(fp128 %arg0, fp128 %arg1) { +entry: + %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0, fp128 %arg1) + ret i64 %retval +} + +; CHECK-LABEL: call_vararg_long_double3 +; CHECK: lgdr 3, 2 +; CHECK-NEXT: lgdr 2, 0 +define i64 @call_vararg_long_double3(fp128 %arg0) { +entry: + %retval = call i64 (...) @pass_vararg3(fp128 %arg0) + ret i64 %retval +} + +; ARCH12-LABEL: call_vec_vararg_test0 +; ARCH12: vlgvg 3, 24, 1 +; ARCH12: vlgvg 2, 24, 0 +; ARCH12: lghi 1, 1 +define void @call_vec_vararg_test0(<2 x double> %v) { + %retval = call i64(i64, ...) @pass_vararg2(i64 1, <2 x double> %v) + ret void +} + +; ARCH12-LABEL: call_vec_vararg_test1 +; ARCH12: larl 1, @CPI10_0 +; ARCH12: vl 0, 0(1), 3 +; ARCH12: vlgvg 3, 24, 0 +; ARCH12: vrepg 2, 0, 1 +; ARCH12: vst 25, 2208(4), 3 +; ARCH12: vst 24, 2192(4), 3 +define void @call_vec_vararg_test1(<4 x i32> %v, <2 x i64> %w) { + %retval = call i64(fp128, ...) @pass_vararg1(fp128 0xLE0FC1518450562CD4000921FB5444261, <4 x i32> %v, <2 x i64> %w) + ret void +} + +; ARCH12-LABEL: call_vec_char_vararg_straddle +; ARCH12: vlgvg 3, 24, 0 +; ARCH12: lghi 1, 1 +; ARCH12: lghi 2, 2 +; ARCH12: vst 24, 2192(4), 3 +define void @call_vec_char_vararg_straddle(<16 x i8> %v) { + %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <16 x i8> %v) + ret void +} + +; ARCH12-LABEL: call_vec_short_vararg_straddle +; ARCH12: vlgvg 3, 24, 0 +; ARCH12: lghi 1, 1 +; ARCH12: lghi 2, 2 +; ARCH12: vst 24, 2192(4), 3 +define void @call_vec_short_vararg_straddle(<8 x i16> %v) { + %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <8 x i16> %v) + ret void +} + +; ARCH12-LABEL: call_vec_int_vararg_straddle +; ARCH12: vlgvg 3, 24, 0 +; ARCH12: lghi 1, 1 +; ARCH12: lghi 2, 2 +; ARCH12: vst 24, 2192(4), 3 +define void @call_vec_int_vararg_straddle(<4 x i32> %v) { + %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <4 x i32> %v) + ret void +} + +; ARCH12-LABEL: call_vec_double_vararg_straddle +; ARCH12: vlgvg 3, 24, 0 +; ARCH12: lghi 1, 1 +; ARCH12: lghi 2, 2 +; ARCH12: vst 24, 2192(4), 3 +define void @call_vec_double_vararg_straddle(<2 x double> %v) { + %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <2 x double> %v) + ret void +} + +; CHECK-LABEL: call_vararg_integral0 +; Since arguments 0, 1, and 2 are already in the correct +; registers, we should have no loads of any sort into +; GPRs 1, 2, and 3. +; CHECK-NOT: lg 1 +; CHECK-NOT: lgr 1 +; CHECK-NOT: lg 2 +; CHECK-NOT: lgr 2 +; CHECK-NOT: lg 3 +; CHECK-NOT: lgr 3 +define i64 @call_vararg_integral0(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) { +entry: + %retval = call i64(...) @pass_vararg3(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) + ret i64 %retval +} + +declare i64 @pass_vararg0(i64 %arg0, i64 %arg1, ...) +declare i64 @pass_vararg1(fp128 %arg0, ...) +declare i64 @pass_vararg2(i64 %arg0, ...) +declare i64 @pass_vararg3(...) diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll new file mode 100644 index 000000000000..8d6b93387330 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck %s + +; CHECK-LABEL: sum_vecs0 +; CHECK: vag 24, 24, 25 +define <2 x i64> @sum_vecs0(<2 x i64> %v1, <2 x i64> %v2) { +entry: + %add0 = add <2 x i64> %v1, %v2 + ret <2 x i64> %add0 +} + +; CHECK-LABEL: sum_vecs1 +; CHECK: vaf 1, 24, 25 +; CHECK: vaf 1, 1, 26 +; CHECK: vaf 1, 1, 27 +; CHECK: vaf 1, 1, 28 +; CHECK: vaf 1, 1, 29 +; CHECK: vl 0, 32(4), 4 +; CHECK: vaf 1, 1, 30 +; CHECK: vaf 1, 1, 31 +; CHECK: vaf 24, 1, 0 +define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) { +entry: + %add0 = add <4 x i32> %v1, %v2 + %add1 = add <4 x i32> %add0, %v3 + %add2 = add <4 x i32> %add1, %v4 + %add3 = add <4 x i32> %add2, %v5 + %add4 = add <4 x i32> %add3, %v6 + %add5 = add <4 x i32> %add4, %v7 + %add6 = add <4 x i32> %add5, %v8 + %add7 = add <4 x i32> %add6, %v9 + ret <4 x i32> %add7 +} + +; Verify that 3 is used for passing integral types if +; only 24 is used. +; CHECK-LABEL: call_vecs0 +; CHECK: lgr 3, 1 +define i64 @call_vecs0(i64 %n, <2 x i64> %v1) { +entry: + %ret = call i64 (<2 x i64>, i64) @pass_vecs0(<2 x i64> %v1, i64 %n) + ret i64 %ret +} + +; Verify that 3 is not allocated for passing integral types +; if 24 and %f0 are used. +; CHECK-LABEL: call_vecs1 +; CHECK: vlr 24, 25 +; CHECK: stg 1, 2200(4) +define i64 @call_vecs1(i64 %n, <2 x i64> %v1, double %x, <2 x i64> %v2) { +entry: + %ret = call i64 (<2 x i64>, double, i64) @pass_vecs1(<2 x i64> %v2, double %x, i64 %n) + ret i64 %ret +} + +; Verify that 3 is not allocated for passing integral types +; if 24 and 25 are used. +; CHECK-LABEL: call_vecs2 +; CHECK: mvghi 2208(4), 55 +define i64 @call_vecs2(<2 x i64> %v1, <2 x i64> %v2) { + %ret = call i64 (<2 x i64>, <2 x i64>, i64) @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 55) + ret i64 %ret +} + +declare i64 @pass_vecs0(<2 x i64> %v1, i64 %n) +declare i64 @pass_vecs1(<2 x i64> %v1, double %x, i64 %n) +declare i64 @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 %n)