[SystemZ][z/OS] Initial implementation for lowerCall on z/OS

- This patch provides the initial implementation for lowering a call on z/OS according to the XPLINK64 calling convention - A series of changes have been made to SystemZCallingConv.td to account for these additional XPLINK64 changes including adding a new helper function to shadow the stack along with allocation of a register wherever appropriate - For the cases of copying a f64 to a gr64 and a f128 / 128-bit vector type to a gr64, a `CCBitConvertToType` has been added and has been bitcasted appropriately in the lowering phase - Support for the ADA register (R5) will be provided in a later patch. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D111662
2021-10-21 09:48:21 -04:00 · 2021-10-21 09:48:21 -04:00 · aa3519f178
parent d2198771e9
commit aa3519f178
9 changed files with 601 additions and 45 deletions
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
 const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
    SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
 };
 const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
    SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
    SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@ -27,6 +27,9 @@ namespace SystemZ {
  const unsigned XPLINK64NumArgFPRs = 4;
  extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
  const unsigned XPLINK64NumArgVRs = 8;
  extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
 } // end namespace SystemZ
 class SystemZCCState : public CCState {
@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
  else
    llvm_unreachable("Unknown Calling Convention!");
-  unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
+  unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
                        ? 0
                        : State.AllocateStack(8, Align(8));
  // Use that same location for all the pending parts.
  for (auto &It : PendingMembers) {
@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State) {
  if (LocVT.getSizeInBits() < 128)
    return false;
  if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
    return false;
  // For any C or C++ program, this should always be
  // false, since it is illegal to have a function
  // where the first argument is variadic. Therefore
@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
  bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
  // If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
-  if (AllocGPR2 && AllocGPR3) {
+  // If only GPR3 is available, we need to set custom handling to copy
-    State.addLoc(
+  // hi bits into GPR3.
-        CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+  // Either way, we allocate on the stack.
  if (AllocGPR3) {
    // For f128 and vector var arg case, set the bitcast flag to bitcast to
    // i128.
    LocVT = MVT::i128;
    LocInfo = CCValAssign::BCvt;
    auto Offset = State.AllocateStack(16, Align(8));
    if (AllocGPR2)
      State.addLoc(
          CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
    else
      State.addLoc(
          CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
    return true;
  }
-  // If only GPR3 is available, we allocate on stack but need to
+  return false;
-  // set custom handling to copy hi bits into GPR3.
+}
-  if (!AllocGPR2 && AllocGPR3) {
+
-    auto Offset = State.AllocateStack(16, Align(8));
+inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-    State.addLoc(
+                                     CCValAssign::LocInfo &LocInfo,
-        CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+                                     ISD::ArgFlagsTy &ArgFlags,
-    return true;
+                                     CCState &State) {
  ArrayRef<MCPhysReg> RegList;
  switch (LocVT.SimpleTy) {
  case MVT::i64:
    RegList = SystemZ::XPLINK64ArgGPRs;
    break;
  case MVT::v16i8:
  case MVT::v8i16:
  case MVT::v4i32:
  case MVT::v2i64:
  case MVT::v4f32:
  case MVT::v2f64:
    RegList = SystemZ::XPLINK64ArgVRs;
    break;
  case MVT::f32:
  case MVT::f64:
  case MVT::f128:
    RegList = SystemZ::XPLINK64ArgFPRs;
    break;
  default:
    return false;
  }
  unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
  // Every time we can allocate a register, allocate on the stack.
  if (UnallocatedRegisterIndex < RegList.size())
    State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
  return false;
 }
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@ -224,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
  // XPLINK64 ABI compliant code widens integral types smaller than i64
  // to i64 before placing the parameters either on the stack or in registers.
  CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
  // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
  CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
  CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
  // long double, can only be passed in GPR2 and GPR3, if available,
  // hence R2Q
  CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
  // Non fixed vector arguments are treated in the same way as long
  // doubles.
  CCIfSubtarget<"hasVector()",
    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
      CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
  // A SwiftSelf is passed in callee-saved R10.
  CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
@ -238,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
  // The first 3 integer arguments are passed in registers R1D-R3D.
  // The rest will be passed in the user area. The address offset of the user
  // area can be found in register R4D.
-  CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
+  CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
  CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
  // The first 8 named vector arguments are passed in V24-V31.  Sub-128 vectors
@ -247,6 +258,9 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
  CCIfSubtarget<"hasVector()",
    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
  CCIfSubtarget<"hasVector()",
    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
  CCIfSubtarget<"hasVector()",
    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
@ -255,28 +269,15 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
  // The first 4 named  float and double arguments are passed in registers FPR0-FPR6.
  // The rest will be passed in the user area.
  CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
  CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
  CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
  CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
  // The first 2 long double arguments are passed in register FPR0/FPR2
  // and FPR4/FPR6. The rest will be passed in the user area.
  CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
  CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
  CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
  // Non fixed floats are passed in GPRs
  // Promote f32 to f64, if it needs to be passed in GPRs.
  CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
  // Assign f64 varargs to their proper GPRs.
  CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
  // long double, can only be passed in GPR2 and GPR3, if available,
  // hence R2Q
  CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
  // Non fixed vector arguments are treated in the same way as long
  // doubles.
  CCIfSubtarget<"hasVector()",
    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
      CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
  // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
  // Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@ -1358,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
    return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
  case CCValAssign::AExt:
    return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
-  case CCValAssign::BCvt:
+  case CCValAssign::BCvt: {
-    // If this is a short vector argument to be stored to the stack,
+    assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
    assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
           VA.getValVT() == MVT::f128);
    MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
                            ? MVT::v2i64
                            : VA.getLocVT();
    Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
    // For ELF, this is a short vector argument to be stored to the stack,
    // bitcast to v2i64 and then extract first element.
-    assert(VA.getLocVT() == MVT::i64);
+    if (BitCastToType == MVT::v2i64)
-    assert(VA.getValVT().isVector());
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
-    Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+                         DAG.getConstant(0, DL, MVT::i32));
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+    return Value;
-                       DAG.getConstant(0, DL, MVT::i32));
+  }
  case CCValAssign::Full:
    return Value;
  default:
@ -1472,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
        NumFixedFPRs += 1;
        RC = &SystemZ::FP64BitRegClass;
        break;
      case MVT::f128:
        NumFixedFPRs += 2;
        RC = &SystemZ::FP128BitRegClass;
        break;
      case MVT::v16i8:
      case MVT::v8i16:
      case MVT::v4i32:
@ -1525,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
      InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
  }
-  if (IsVarArg) {
+  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
  if (IsVarArg && Subtarget.isTargetELF()) {
    // Save the number of non-varargs registers for later use by va_start, etc.
    FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
    FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
@ -1564,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
    }
  }
  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
  // register (R5)
  return Chain;
 }
@ -1604,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
  MachineFunction &MF = DAG.getMachineFunction();
  EVT PtrVT = getPointerTy(MF.getDataLayout());
  LLVMContext &Ctx = *DAG.getContext();
  SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
  // FIXME: z/OS support to be added in later.
  if (Subtarget.isTargetXPLINK64())
    IsTailCall = false;
  // Detect unsupported vector argument and return types.
  if (Subtarget.hasVector()) {
@ -1624,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
  if (Subtarget.isTargetXPLINK64())
    // Although the XPLINK specifications for AMODE64 state that minimum size
    // of the param area is minimum 32 bytes and no rounding is otherwise
    // specified, we round this area in 64 bytes increments to be compatible
    // with existing compilers.
    NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
  // Mark the start of the call.
  if (!IsTailCall)
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@ -1674,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
    } else
      ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
-    if (VA.isRegLoc())
+    if (VA.isRegLoc()) {
      // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
      // MVT::i128 type. We decompose the 128-bit type to a pair of its high
      // and low values.
      if (VA.getLocVT() == MVT::i128)
        ArgValue = lowerI128ToGR128(DAG, ArgValue);
      // Queue up the argument copies and emit them at the end.
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
-    else {
+    } else {
      assert(VA.isMemLoc() && "Argument not register or memory");
      // Work out the address of the stack slot.  Unpromoted ints and
      // floats are passed as right-justified 8-byte values.
      if (!StackPtr.getNode())
-        StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
+        StackPtr = DAG.getCopyFromReg(Chain, DL,
-      unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
+                                      Regs->getStackPointerRegister(), PtrVT);
      unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
                        VA.getLocMemOffset();
      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
        Offset += 4;
      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
@ -1693,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
      // Emit the store.
      MemOpChains.push_back(
          DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
      // Although long doubles or vectors are passed through the stack when
      // they are vararg (non-fixed arguments), if a long double or vector
      // occupies the third and fourth slot of the argument list GPR3 should
      // still shadow the third slot of the argument list.
      if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
        SDValue ShadowArgValue =
            DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
                        DAG.getIntPtrConstant(1, DL));
        RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
      }
    }
  }
@ -1704,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
  // associated Target* opcodes.  Force %r1 to be used for indirect
  // tail calls.
  SDValue Glue;
  // FIXME: Add support for XPLINK using the ADA register.
  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@ -66,6 +66,12 @@ public:
  virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                               CallingConv::ID CC) const = 0;
  /// \returns the offset to the locals area.
  virtual int getCallFrameSize() = 0;
  /// \returns the stack pointer bias.
  virtual int getStackPointerBias() = 0;
  /// Destroys the object. Bogus destructor allowing derived classes
  /// to override it.
  virtual ~SystemZCallingConventionRegisters(){};
@ -91,6 +97,10 @@ public:
  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const override final;
  int getCallFrameSize() override final { return 128; }
  int getStackPointerBias() override final { return 2048; }
  /// Destroys the object. Bogus destructor overriding base class destructor
  ~SystemZXPLINK64Registers(){};
 };
@ -113,6 +123,10 @@ public:
  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const override final;
  int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
  int getStackPointerBias() override final { return 0; }
  /// Destroys the object. Bogus destructor overriding base class destructor
  ~SystemZELFRegisters(){};
 };
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@ -135,6 +135,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
    return true;
  }
  if (TT.isOSBinFormatGOFF())
    return true;
  if (TT.isOSBinFormatMachO()) {
    if (RM == Reloc::Static)
      return true;
--- a/llvm/test/CodeGen/SystemZ/call-zos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
@ -0,0 +1,191 @@
 ; Test the passing of scalar values in GPRs, FPRs in 64-bit calls on z/OS.
 ;
 ; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
 ; CHECK-LABEL: call_char:
 ; CHECK: lghi  1, 8
 define i8 @call_char(){
  %retval = call i8 (i8) @pass_char(i8 8)
  ret i8 %retval
 }
 ; CHECK-LABEL: call_short:
 ; CHECK: lghi  1, 16
 define i16 @call_short() {
 entry:
  %retval = call i16 (i16) @pass_short(i16 16)
  ret i16 %retval
 }
 ; CHECK-LABEL: call_int:
 ; CHECK: lghi  1, 32
 ; CHECK: lghi  2, 33
 define i32 @call_int() {
 entry:
  %retval = call i32 (i32, i32) @pass_int(i32 32, i32 33)
  ret i32 %retval
 }
 ; CHECK-LABEL: call_long:
 ; CHECK: lghi  1, 64
 ; CHECK: lghi  2, 65
 ; CHECK: lghi  3, 66
 define i64 @call_long() {
 entry:
  %retval = call i64 (i64, i64, i64) @pass_long(i64 64, i64 65, i64 66)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_ptr:
 ; CHECK: lgr 1, 2
 define i32 @call_ptr(i32* %p1, i32* %p2) {
 entry:
  %retval = call i32 (i32*) @pass_ptr(i32* %p2)
  ret i32 %retval
 }
 ; CHECK-LABEL: call_integrals:
 ; CHECK: lghi  1, 64
 ; CHECK: lghi  2, 32
 ; CHECK: lghi  3, 16
 define i64 @call_integrals() {
 entry:
  %retval = call i64 (i64, i32, i16, i64) @pass_integrals0(i64 64, i32 32, i16 16, i64 128)
  ret i64 %retval
 }
 ; CHECK-LABEL: pass_char:
 ; CHECK: lgr 3, 1
 define signext i8 @pass_char(i8 signext %arg) {
 entry:
  ret i8 %arg
 }
 ; CHECK-LABEL: pass_short:
 ; CHECK: lgr 3, 1
 define signext i16 @pass_short(i16 signext %arg) {
 entry:
  ret i16 %arg
 }
 ; CHECK-LABEL: pass_int:
 ; CHECK: lgr 3, 2
 define signext i32 @pass_int(i32 signext %arg0, i32 signext %arg1) {
 entry:
  ret i32 %arg1
 }
 ; CHECK-LABEL: pass_long:
 ; CHECK: agr 1, 2
 ; CHECK: agr 3, 1
 define signext i64 @pass_long(i64 signext %arg0, i64 signext %arg1, i64 signext %arg2) {
 entry:
  %N = add i64 %arg0, %arg1
  %M = add i64 %N, %arg2
  ret i64 %M
 }
 ; CHECK-LABEL: pass_integrals0:
 ; CHECK: ag  2, -{{[0-9]+}}(4)
 ; CHECK-NEXT: lgr 3, 2
 define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
 entry:
  %N = sext i32 %arg1 to i64
  %M = add i64 %arg3, %N
  ret i64 %M
 }
 ; CHECK-LABEL: call_float:
 ; CHECK: le 0, 0({{[0-9]}})
 define float @call_float() {
 entry:
  %ret = call float (float) @pass_float(float 0x400921FB60000000)
  ret float %ret
 }
 ; CHECK-LABEL: call_double:
 ; CHECK: larl  [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
 ; CHECK-NEXT: ld  0, 0([[GENREG]])
 define double @call_double() {
 entry:
  %ret = call double (double) @pass_double(double 3.141000e+00)
  ret double %ret
 }
 ; CHECK-LABEL: call_longdouble:
 ; CHECK: larl  [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
 ; CHECK-NEXT: ld  0, 0([[GENREG]])
 ; CHECK-NEXT: ld  2, 8([[GENREG]])
 define fp128 @call_longdouble() {
 entry:
  %ret = call fp128 (fp128) @pass_longdouble(fp128 0xLE0FC1518450562CD4000921FB5444261)
  ret fp128 %ret
 }
 ; CHECK-LABEL: call_floats0
 ; CHECK: larl  [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
 ; CHECK-NEXT: ld  1, 0([[GENREG]])
 ; CHECK-NEXT: ld  3, 8([[GENREG]])
 ; CHECK: lxr 5, 0
 ; CHECK: lxr 0, 1
 ; CHECK: lxr 4, 5
 define i64 @call_floats0(fp128 %arg0, double %arg1) {
 entry:
  %ret = call i64 (fp128, fp128, double) @pass_floats0(fp128 0xLE0FC1518450562CD4000921FB5444261, fp128 %arg0, double %arg1)
  ret i64 %ret
 }
 ; CHECK-LABEL: call_floats1
 ; CHECK: lxr 1, 0
 ; CHECK: ldr 0, 4
 ; CHECK: lxr 4, 1
 define i64 @call_floats1(fp128 %arg0, double %arg1) {
 entry:
  %ret = call i64 (double, fp128) @pass_floats1(double %arg1, fp128 %arg0)
  ret i64 %ret
 }
 ; CHECK-LABEL: pass_float:
 ; CHECK: larl  1, @{{CPI[0-9]+_[0-9]+}}
 ; CHECK: aeb 0, 0(1)
 define float @pass_float(float %arg) {
 entry:
  %X = fadd float %arg, 0x400821FB60000000
  ret float %X
 }
 ; CHECK-LABEL: pass_double:
 ; CHECK: larl  1, @{{CPI[0-9]+_[0-9]+}}
 ; CHECK: adb 0, 0(1)
 define double @pass_double(double %arg) {
 entry:
  %X = fadd double %arg, 1.414213e+00
  ret double %X
 }
 ; CHECK-LABEL: pass_longdouble
 ; CHECK: larl  1, @{{CPI[0-9]+_[0-9]+}}
 ; CHECK: lxdb  1, 0(1)
 ; CHECK: axbr  0, 1
 define fp128 @pass_longdouble(fp128 %arg) {
 entry:
  %X = fadd fp128 %arg, 0xL10000000000000004000921FB53C8D4F
  ret fp128 %X
 }
 ; CHECK-LABEL: pass_floats0
 ; CHECK: larl  1, @{{CPI[0-9]+_[0-9]+}}
 ; CHECK: axbr  0, 4
 ; CHECK: axbr  1, 0
 ; CHECK: cxbr  1, 5
 define i64 @pass_floats0(fp128 %arg0, fp128 %arg1, double %arg2) {
  %X = fadd fp128 %arg0, %arg1
  %arg2_ext = fpext double %arg2 to fp128
  %Y = fadd fp128 %X, %arg2_ext
  %ret_bool = fcmp ueq fp128 %Y, 0xLE0FC1518450562CD4000921FB5444261
  %ret = sext i1 %ret_bool to i64
  ret i64 %ret
 }
 declare i64 @pass_floats1(double %arg0, fp128 %arg1)
 declare i32 @pass_ptr(i32* %arg)
--- a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@ -0,0 +1,195 @@
 ; Test passing variable argument lists in 64-bit calls on z/OS.
 ; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z14 | FileCheck %s -check-prefix=ARCH12
 ; CHECK-LABEL: call_vararg_double0
 ; CHECK:       llihf 3, 1074118262
 ; CHECK-NEXT:  oilf  3, 3367254360
 ; CHECK:       lghi  1, 1
 ; CHECK:       lghi  2, 2
 define i64 @call_vararg_double0() {
 entry:
  %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 2.718000e+00)
  ret i64 %retval
 }
 ; CHECK-LABEL:  call_vararg_double1
 ; CHECK:        llihf 0, 1074118262
 ; CHECK-NEXT:   oilf  0, 3367254360
 ; CHECK:        llihf 3, 1074340036
 ; CHECK-NEXT:   oilf  3, 2611340116
 ; CHECK:        lghi  1, 1
 ; CHECK:        lghi  2, 2
 ; CHECK:        stg 0, 2200(4)
 define i64 @call_vararg_double1() {
 entry:
  %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 3.141000e+00, double 2.718000e+00)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_double2
 ; CHECK-NOT:   llihf 0
 ; CHECK-NOT:   oilf 0
 ; CHECK:       llihf 2, 1074118262
 ; CHECK-NEXT:  oilf  2, 3367254360
 ; CHECK:       lghi  1, 8200
 define i64 @call_vararg_double2() {
 entry:
  %retval = call i64 (i64, ...) @pass_vararg2(i64 8200, double 2.718000e+00)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_double3
 ; CHECK:       llihf   0, 1072703839
 ; CHECK-NEXT:  oilf    0, 2861204133
 ; CHECK:       llihf   1, 1074118262
 ; CHECK-NEXT:  oilf    1, 3367254360
 ; CHECK:       llihf   2, 1074340036
 ; CHECK-NEXT:  oilf    2, 2611340116
 ; CHECK:       llihf   3, 1073127358
 ; CHECK-NEXT:  oilf    3, 1992864825
 ; CHECK:       stg     0, 2200(4)
 define i64 @call_vararg_double3() {
 entry:
  %retval = call i64 (...) @pass_vararg3(double 2.718000e+00, double 3.141000e+00, double 1.414000e+00, double 1.010101e+00)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_both0
 ; CHECK:       lgr   2, 1
 ; CHECK:       lgdr  1, 0
 define i64 @call_vararg_both0(i64 %arg0, double %arg1) {
  %retval  = call i64(...) @pass_vararg3(double %arg1, i64 %arg0)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_long_double0
 ; CHECK:       larl  1, @CPI5_0
 ; CHECK-NEXT:  ld    0, 0(1)
 ; CHECK-NEXT:  ld    2, 8(1)
 ; CHECK-NEXT:  lgdr  3, 0
 ; CHECK:       lghi  1, 1
 ; CHECK:       lghi  2, 2
 ; CHECK:       std   0, 2192(4)
 ; CHECK-NEXT:  std   2, 2200(4)
 define i64 @call_vararg_long_double0() {
 entry:
  %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 0xLE0FC1518450562CD4000921FB5444261)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_long_double1
 ; CHECK:       lgdr  3, 0
 ; CHECK:       lghi  1, 1
 ; CHECK:       lghi  2, 2
 ; CHECK:       std   0, 2192(4)
 ; CHECK-NEXT:  std   2, 2200(4)
 define i64 @call_vararg_long_double1(fp128 %arg0) {
 entry:
  %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_long_double2
 ; CHECK:      std   4, 2208(4)
 ; CHECK-NEXT: std   6, 2216(4)
 ; CHECK:      lgdr  3, 0
 ; CHECK:      lghi  1, 1
 ; CHECK:      lghi  2, 2
 ; CHECK:      std   0, 2192(4)
 ; CHECK-NEXT: std   2, 2200(4)
 define i64 @call_vararg_long_double2(fp128 %arg0, fp128 %arg1) {
 entry:
  %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0, fp128 %arg1)
  ret i64 %retval
 }
 ; CHECK-LABEL: call_vararg_long_double3
 ; CHECK:       lgdr 3, 2
 ; CHECK-NEXT:  lgdr 2, 0
 define i64 @call_vararg_long_double3(fp128 %arg0) {
 entry:
  %retval = call i64 (...) @pass_vararg3(fp128 %arg0)
  ret i64 %retval
 }
 ; ARCH12-LABEL: call_vec_vararg_test0
 ; ARCH12: vlgvg 3, 24, 1
 ; ARCH12: vlgvg 2, 24, 0
 ; ARCH12: lghi  1, 1
 define void @call_vec_vararg_test0(<2 x double> %v) {
  %retval = call i64(i64, ...) @pass_vararg2(i64 1, <2 x double> %v)
  ret void
 }
 ; ARCH12-LABEL: call_vec_vararg_test1
 ; ARCH12: larl  1, @CPI10_0
 ; ARCH12: vl    0, 0(1), 3
 ; ARCH12: vlgvg 3, 24, 0
 ; ARCH12: vrepg 2, 0, 1
 ; ARCH12: vst   25, 2208(4), 3
 ; ARCH12: vst   24, 2192(4), 3
 define void @call_vec_vararg_test1(<4 x i32> %v, <2 x i64> %w) {
  %retval = call i64(fp128, ...) @pass_vararg1(fp128 0xLE0FC1518450562CD4000921FB5444261, <4 x i32> %v, <2 x i64> %w)
  ret void
 }
 ; ARCH12-LABEL: call_vec_char_vararg_straddle
 ; ARCH12: vlgvg 3, 24, 0
 ; ARCH12: lghi  1, 1
 ; ARCH12: lghi  2, 2
 ; ARCH12: vst   24, 2192(4), 3
 define void @call_vec_char_vararg_straddle(<16 x i8> %v) {
  %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <16 x i8> %v)
  ret void
 }
 ; ARCH12-LABEL: call_vec_short_vararg_straddle
 ; ARCH12: vlgvg 3, 24, 0
 ; ARCH12: lghi  1, 1
 ; ARCH12: lghi  2, 2
 ; ARCH12: vst   24, 2192(4), 3
 define void @call_vec_short_vararg_straddle(<8 x i16> %v) {
  %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <8 x i16> %v)
  ret void
 }
 ; ARCH12-LABEL: call_vec_int_vararg_straddle
 ; ARCH12: vlgvg 3, 24, 0
 ; ARCH12: lghi  1, 1
 ; ARCH12: lghi  2, 2
 ; ARCH12: vst 24, 2192(4), 3
 define void @call_vec_int_vararg_straddle(<4 x i32> %v) {
  %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <4 x i32> %v)
  ret void
 }
 ; ARCH12-LABEL: call_vec_double_vararg_straddle
 ; ARCH12: vlgvg 3, 24, 0
 ; ARCH12: lghi  1, 1
 ; ARCH12: lghi  2, 2
 ; ARCH12: vst 24, 2192(4), 3
 define void @call_vec_double_vararg_straddle(<2 x double> %v) {
  %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <2 x double> %v)
  ret void
 }
 ; CHECK-LABEL: call_vararg_integral0
 ; Since arguments 0, 1, and 2 are already in the correct
 ; registers, we should have no loads of any sort into
 ; GPRs 1, 2, and 3.
 ; CHECK-NOT: lg  1
 ; CHECK-NOT: lgr  1
 ; CHECK-NOT: lg  2
 ; CHECK-NOT: lgr  2
 ; CHECK-NOT: lg  3
 ; CHECK-NOT: lgr  3
 define i64 @call_vararg_integral0(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) {
 entry:
  %retval = call i64(...) @pass_vararg3(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3)
  ret i64 %retval
 }
 declare i64 @pass_vararg0(i64 %arg0, i64 %arg1, ...)
 declare i64 @pass_vararg1(fp128 %arg0, ...)
 declare i64 @pass_vararg2(i64 %arg0, ...)
 declare i64 @pass_vararg3(...)
--- a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
@ -0,0 +1,66 @@
 ; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck %s
 ; CHECK-LABEL: sum_vecs0
 ; CHECK: vag 24, 24, 25
 define <2 x i64> @sum_vecs0(<2 x i64> %v1, <2 x i64> %v2) {
 entry:
  %add0 = add <2 x i64> %v1, %v2
  ret <2 x i64> %add0
 }
 ; CHECK-LABEL: sum_vecs1
 ; CHECK: vaf 1, 24, 25
 ; CHECK: vaf 1, 1, 26
 ; CHECK: vaf 1, 1, 27
 ; CHECK: vaf 1, 1, 28
 ; CHECK: vaf 1, 1, 29
 ; CHECK: vl  0, 32(4), 4
 ; CHECK: vaf 1, 1, 30
 ; CHECK: vaf 1, 1, 31
 ; CHECK: vaf 24, 1, 0
 define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) {
 entry:
  %add0 = add <4 x i32> %v1, %v2
  %add1 = add <4 x i32> %add0, %v3
  %add2 = add <4 x i32> %add1, %v4
  %add3 = add <4 x i32> %add2, %v5
  %add4 = add <4 x i32> %add3, %v6
  %add5 = add <4 x i32> %add4, %v7
  %add6 = add <4 x i32> %add5, %v8
  %add7 = add <4 x i32> %add6, %v9
  ret <4 x i32> %add7
 }
 ; Verify that 3 is used for passing integral types if
 ; only 24 is used.
 ; CHECK-LABEL: call_vecs0
 ; CHECK: lgr 3, 1
 define i64 @call_vecs0(i64 %n, <2 x i64> %v1) {
 entry:
  %ret = call i64 (<2 x i64>, i64) @pass_vecs0(<2 x i64> %v1, i64 %n)
  ret i64 %ret
 }
 ; Verify that 3 is not allocated for passing integral types
 ; if 24 and %f0 are used.
 ; CHECK-LABEL: call_vecs1
 ; CHECK: vlr 24, 25
 ; CHECK: stg 1, 2200(4)
 define i64 @call_vecs1(i64 %n, <2 x i64> %v1, double %x, <2 x i64> %v2) {
 entry:
  %ret = call i64 (<2 x i64>, double, i64) @pass_vecs1(<2 x i64> %v2, double %x, i64 %n)
  ret i64 %ret
 }
 ; Verify that 3 is not allocated for passing integral types
 ; if 24 and 25 are used.
 ; CHECK-LABEL: call_vecs2
 ; CHECK: mvghi 2208(4), 55
 define i64 @call_vecs2(<2 x i64> %v1, <2 x i64> %v2) {
  %ret = call i64 (<2 x i64>, <2 x i64>, i64) @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 55)
  ret i64 %ret
 }
 declare i64 @pass_vecs0(<2 x i64> %v1, i64 %n)
 declare i64 @pass_vecs1(<2 x i64> %v1, double %x, i64 %n)
 declare i64 @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 %n)