[SystemZ][z/OS] Initial implementation for lowerCall on z/OS

- This patch provides the initial implementation for lowering a call on z/OS according to the XPLINK64 calling convention
- A series of changes have been made to SystemZCallingConv.td to account for these additional XPLINK64 changes including adding a new helper function to shadow the stack along with allocation of a register wherever appropriate
- For the cases of copying a f64 to a gr64 and a f128 / 128-bit vector type to a gr64, a `CCBitConvertToType` has been added and has been bitcasted appropriately in the lowering phase
- Support for the ADA register (R5) will be provided in a later patch.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D111662
This commit is contained in:
Anirudh Prasad 2021-10-21 09:48:21 -04:00
parent d2198771e9
commit aa3519f178
9 changed files with 601 additions and 45 deletions

View File

@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};

View File

@ -27,6 +27,9 @@ namespace SystemZ {
const unsigned XPLINK64NumArgFPRs = 4;
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
const unsigned XPLINK64NumArgVRs = 8;
extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
else
llvm_unreachable("Unknown Calling Convention!");
unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
? 0
: State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
if (LocVT.getSizeInBits() < 128)
return false;
if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
return false;
// For any C or C++ program, this should always be
// false, since it is illegal to have a function
// where the first argument is variadic. Therefore
@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
// If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
if (AllocGPR2 && AllocGPR3) {
State.addLoc(
CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
// If only GPR3 is available, we need to set custom handling to copy
// hi bits into GPR3.
// Either way, we allocate on the stack.
if (AllocGPR3) {
// For f128 and vector var arg case, set the bitcast flag to bitcast to
// i128.
LocVT = MVT::i128;
LocInfo = CCValAssign::BCvt;
auto Offset = State.AllocateStack(16, Align(8));
if (AllocGPR2)
State.addLoc(
CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
else
State.addLoc(
CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
// If only GPR3 is available, we allocate on stack but need to
// set custom handling to copy hi bits into GPR3.
if (!AllocGPR2 && AllocGPR3) {
auto Offset = State.AllocateStack(16, Align(8));
State.addLoc(
CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
return false;
}
inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
ArrayRef<MCPhysReg> RegList;
switch (LocVT.SimpleTy) {
case MVT::i64:
RegList = SystemZ::XPLINK64ArgGPRs;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
RegList = SystemZ::XPLINK64ArgVRs;
break;
case MVT::f32:
case MVT::f64:
case MVT::f128:
RegList = SystemZ::XPLINK64ArgFPRs;
break;
default:
return false;
}
unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
// Every time we can allocate a register, allocate on the stack.
if (UnallocatedRegisterIndex < RegList.size())
State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
return false;
}

View File

@ -224,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// XPLINK64 ABI compliant code widens integral types smaller than i64
// to i64 before placing the parameters either on the stack or in registers.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
// Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
// long double, can only be passed in GPR2 and GPR3, if available,
// hence R2Q
CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
// Non fixed vector arguments are treated in the same way as long
// doubles.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
// A SwiftSelf is passed in callee-saved R10.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
@ -238,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
// area can be found in register R4D.
CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
// The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
@ -247,6 +258,9 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
@ -255,28 +269,15 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 4 named float and double arguments are passed in registers FPR0-FPR6.
// The rest will be passed in the user area.
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
// The first 2 long double arguments are passed in register FPR0/FPR2
// and FPR4/FPR6. The rest will be passed in the user area.
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
// Non fixed floats are passed in GPRs
// Promote f32 to f64, if it needs to be passed in GPRs.
CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
// Assign f64 varargs to their proper GPRs.
CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
// long double, can only be passed in GPR2 and GPR3, if available,
// hence R2Q
CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
// Non fixed vector arguments are treated in the same way as long
// doubles.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.

View File

@ -1358,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::BCvt:
// If this is a short vector argument to be stored to the stack,
case CCValAssign::BCvt: {
assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
VA.getValVT() == MVT::f128);
MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
? MVT::v2i64
: VA.getLocVT();
Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
// For ELF, this is a short vector argument to be stored to the stack,
// bitcast to v2i64 and then extract first element.
assert(VA.getLocVT() == MVT::i64);
assert(VA.getValVT().isVector());
Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
DAG.getConstant(0, DL, MVT::i32));
if (BitCastToType == MVT::v2i64)
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
DAG.getConstant(0, DL, MVT::i32));
return Value;
}
case CCValAssign::Full:
return Value;
default:
@ -1472,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
break;
case MVT::f128:
NumFixedFPRs += 2;
RC = &SystemZ::FP128BitRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@ -1525,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
if (IsVarArg) {
// FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
if (IsVarArg && Subtarget.isTargetELF()) {
// Save the number of non-varargs registers for later use by va_start, etc.
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
@ -1564,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
}
}
// FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
// register (R5)
return Chain;
}
@ -1604,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
// FIXME: z/OS support to be added in later.
if (Subtarget.isTargetXPLINK64())
IsTailCall = false;
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@ -1624,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
if (Subtarget.isTargetXPLINK64())
// Although the XPLINK specifications for AMODE64 state that minimum size
// of the param area is minimum 32 bytes and no rounding is otherwise
// specified, we round this area in 64 bytes increments to be compatible
// with existing compilers.
NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@ -1674,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
if (VA.isRegLoc())
if (VA.isRegLoc()) {
// In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
// MVT::i128 type. We decompose the 128-bit type to a pair of its high
// and low values.
if (VA.getLocVT() == MVT::i128)
ArgValue = lowerI128ToGR128(DAG, ArgValue);
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
else {
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Work out the address of the stack slot. Unpromoted ints and
// floats are passed as right-justified 8-byte values.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
StackPtr = DAG.getCopyFromReg(Chain, DL,
Regs->getStackPointerRegister(), PtrVT);
unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
VA.getLocMemOffset();
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
@ -1693,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
// Although long doubles or vectors are passed through the stack when
// they are vararg (non-fixed arguments), if a long double or vector
// occupies the third and fourth slot of the argument list GPR3 should
// still shadow the third slot of the argument list.
if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
SDValue ShadowArgValue =
DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
DAG.getIntPtrConstant(1, DL));
RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
}
}
}
@ -1704,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
// FIXME: Add support for XPLINK using the ADA register.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);

View File

@ -66,6 +66,12 @@ public:
virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const = 0;
/// \returns the offset to the locals area.
virtual int getCallFrameSize() = 0;
/// \returns the stack pointer bias.
virtual int getStackPointerBias() = 0;
/// Destroys the object. Bogus destructor allowing derived classes
/// to override it.
virtual ~SystemZCallingConventionRegisters(){};
@ -91,6 +97,10 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
int getCallFrameSize() override final { return 128; }
int getStackPointerBias() override final { return 2048; }
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZXPLINK64Registers(){};
};
@ -113,6 +123,10 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
int getStackPointerBias() override final { return 0; }
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZELFRegisters(){};
};

View File

@ -135,6 +135,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return true;
}
if (TT.isOSBinFormatGOFF())
return true;
if (TT.isOSBinFormatMachO()) {
if (RM == Reloc::Static)
return true;

View File

@ -0,0 +1,191 @@
; Test the passing of scalar values in GPRs, FPRs in 64-bit calls on z/OS.
;
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
; CHECK-LABEL: call_char:
; CHECK: lghi 1, 8
define i8 @call_char(){
%retval = call i8 (i8) @pass_char(i8 8)
ret i8 %retval
}
; CHECK-LABEL: call_short:
; CHECK: lghi 1, 16
define i16 @call_short() {
entry:
%retval = call i16 (i16) @pass_short(i16 16)
ret i16 %retval
}
; CHECK-LABEL: call_int:
; CHECK: lghi 1, 32
; CHECK: lghi 2, 33
define i32 @call_int() {
entry:
%retval = call i32 (i32, i32) @pass_int(i32 32, i32 33)
ret i32 %retval
}
; CHECK-LABEL: call_long:
; CHECK: lghi 1, 64
; CHECK: lghi 2, 65
; CHECK: lghi 3, 66
define i64 @call_long() {
entry:
%retval = call i64 (i64, i64, i64) @pass_long(i64 64, i64 65, i64 66)
ret i64 %retval
}
; CHECK-LABEL: call_ptr:
; CHECK: lgr 1, 2
define i32 @call_ptr(i32* %p1, i32* %p2) {
entry:
%retval = call i32 (i32*) @pass_ptr(i32* %p2)
ret i32 %retval
}
; CHECK-LABEL: call_integrals:
; CHECK: lghi 1, 64
; CHECK: lghi 2, 32
; CHECK: lghi 3, 16
define i64 @call_integrals() {
entry:
%retval = call i64 (i64, i32, i16, i64) @pass_integrals0(i64 64, i32 32, i16 16, i64 128)
ret i64 %retval
}
; CHECK-LABEL: pass_char:
; CHECK: lgr 3, 1
define signext i8 @pass_char(i8 signext %arg) {
entry:
ret i8 %arg
}
; CHECK-LABEL: pass_short:
; CHECK: lgr 3, 1
define signext i16 @pass_short(i16 signext %arg) {
entry:
ret i16 %arg
}
; CHECK-LABEL: pass_int:
; CHECK: lgr 3, 2
define signext i32 @pass_int(i32 signext %arg0, i32 signext %arg1) {
entry:
ret i32 %arg1
}
; CHECK-LABEL: pass_long:
; CHECK: agr 1, 2
; CHECK: agr 3, 1
define signext i64 @pass_long(i64 signext %arg0, i64 signext %arg1, i64 signext %arg2) {
entry:
%N = add i64 %arg0, %arg1
%M = add i64 %N, %arg2
ret i64 %M
}
; CHECK-LABEL: pass_integrals0:
; CHECK: ag 2, -{{[0-9]+}}(4)
; CHECK-NEXT: lgr 3, 2
define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
entry:
%N = sext i32 %arg1 to i64
%M = add i64 %arg3, %N
ret i64 %M
}
; CHECK-LABEL: call_float:
; CHECK: le 0, 0({{[0-9]}})
define float @call_float() {
entry:
%ret = call float (float) @pass_float(float 0x400921FB60000000)
ret float %ret
}
; CHECK-LABEL: call_double:
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
; CHECK-NEXT: ld 0, 0([[GENREG]])
define double @call_double() {
entry:
%ret = call double (double) @pass_double(double 3.141000e+00)
ret double %ret
}
; CHECK-LABEL: call_longdouble:
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
; CHECK-NEXT: ld 0, 0([[GENREG]])
; CHECK-NEXT: ld 2, 8([[GENREG]])
define fp128 @call_longdouble() {
entry:
%ret = call fp128 (fp128) @pass_longdouble(fp128 0xLE0FC1518450562CD4000921FB5444261)
ret fp128 %ret
}
; CHECK-LABEL: call_floats0
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
; CHECK-NEXT: ld 1, 0([[GENREG]])
; CHECK-NEXT: ld 3, 8([[GENREG]])
; CHECK: lxr 5, 0
; CHECK: lxr 0, 1
; CHECK: lxr 4, 5
define i64 @call_floats0(fp128 %arg0, double %arg1) {
entry:
%ret = call i64 (fp128, fp128, double) @pass_floats0(fp128 0xLE0FC1518450562CD4000921FB5444261, fp128 %arg0, double %arg1)
ret i64 %ret
}
; CHECK-LABEL: call_floats1
; CHECK: lxr 1, 0
; CHECK: ldr 0, 4
; CHECK: lxr 4, 1
define i64 @call_floats1(fp128 %arg0, double %arg1) {
entry:
%ret = call i64 (double, fp128) @pass_floats1(double %arg1, fp128 %arg0)
ret i64 %ret
}
; CHECK-LABEL: pass_float:
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
; CHECK: aeb 0, 0(1)
define float @pass_float(float %arg) {
entry:
%X = fadd float %arg, 0x400821FB60000000
ret float %X
}
; CHECK-LABEL: pass_double:
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
; CHECK: adb 0, 0(1)
define double @pass_double(double %arg) {
entry:
%X = fadd double %arg, 1.414213e+00
ret double %X
}
; CHECK-LABEL: pass_longdouble
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
; CHECK: lxdb 1, 0(1)
; CHECK: axbr 0, 1
define fp128 @pass_longdouble(fp128 %arg) {
entry:
%X = fadd fp128 %arg, 0xL10000000000000004000921FB53C8D4F
ret fp128 %X
}
; CHECK-LABEL: pass_floats0
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
; CHECK: axbr 0, 4
; CHECK: axbr 1, 0
; CHECK: cxbr 1, 5
define i64 @pass_floats0(fp128 %arg0, fp128 %arg1, double %arg2) {
%X = fadd fp128 %arg0, %arg1
%arg2_ext = fpext double %arg2 to fp128
%Y = fadd fp128 %X, %arg2_ext
%ret_bool = fcmp ueq fp128 %Y, 0xLE0FC1518450562CD4000921FB5444261
%ret = sext i1 %ret_bool to i64
ret i64 %ret
}
declare i64 @pass_floats1(double %arg0, fp128 %arg1)
declare i32 @pass_ptr(i32* %arg)

View File

@ -0,0 +1,195 @@
; Test passing variable argument lists in 64-bit calls on z/OS.
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z14 | FileCheck %s -check-prefix=ARCH12
; CHECK-LABEL: call_vararg_double0
; CHECK: llihf 3, 1074118262
; CHECK-NEXT: oilf 3, 3367254360
; CHECK: lghi 1, 1
; CHECK: lghi 2, 2
define i64 @call_vararg_double0() {
entry:
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 2.718000e+00)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_double1
; CHECK: llihf 0, 1074118262
; CHECK-NEXT: oilf 0, 3367254360
; CHECK: llihf 3, 1074340036
; CHECK-NEXT: oilf 3, 2611340116
; CHECK: lghi 1, 1
; CHECK: lghi 2, 2
; CHECK: stg 0, 2200(4)
define i64 @call_vararg_double1() {
entry:
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 3.141000e+00, double 2.718000e+00)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_double2
; CHECK-NOT: llihf 0
; CHECK-NOT: oilf 0
; CHECK: llihf 2, 1074118262
; CHECK-NEXT: oilf 2, 3367254360
; CHECK: lghi 1, 8200
define i64 @call_vararg_double2() {
entry:
%retval = call i64 (i64, ...) @pass_vararg2(i64 8200, double 2.718000e+00)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_double3
; CHECK: llihf 0, 1072703839
; CHECK-NEXT: oilf 0, 2861204133
; CHECK: llihf 1, 1074118262
; CHECK-NEXT: oilf 1, 3367254360
; CHECK: llihf 2, 1074340036
; CHECK-NEXT: oilf 2, 2611340116
; CHECK: llihf 3, 1073127358
; CHECK-NEXT: oilf 3, 1992864825
; CHECK: stg 0, 2200(4)
define i64 @call_vararg_double3() {
entry:
%retval = call i64 (...) @pass_vararg3(double 2.718000e+00, double 3.141000e+00, double 1.414000e+00, double 1.010101e+00)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_both0
; CHECK: lgr 2, 1
; CHECK: lgdr 1, 0
define i64 @call_vararg_both0(i64 %arg0, double %arg1) {
%retval = call i64(...) @pass_vararg3(double %arg1, i64 %arg0)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_long_double0
; CHECK: larl 1, @CPI5_0
; CHECK-NEXT: ld 0, 0(1)
; CHECK-NEXT: ld 2, 8(1)
; CHECK-NEXT: lgdr 3, 0
; CHECK: lghi 1, 1
; CHECK: lghi 2, 2
; CHECK: std 0, 2192(4)
; CHECK-NEXT: std 2, 2200(4)
define i64 @call_vararg_long_double0() {
entry:
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 0xLE0FC1518450562CD4000921FB5444261)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_long_double1
; CHECK: lgdr 3, 0
; CHECK: lghi 1, 1
; CHECK: lghi 2, 2
; CHECK: std 0, 2192(4)
; CHECK-NEXT: std 2, 2200(4)
define i64 @call_vararg_long_double1(fp128 %arg0) {
entry:
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_long_double2
; CHECK: std 4, 2208(4)
; CHECK-NEXT: std 6, 2216(4)
; CHECK: lgdr 3, 0
; CHECK: lghi 1, 1
; CHECK: lghi 2, 2
; CHECK: std 0, 2192(4)
; CHECK-NEXT: std 2, 2200(4)
define i64 @call_vararg_long_double2(fp128 %arg0, fp128 %arg1) {
entry:
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0, fp128 %arg1)
ret i64 %retval
}
; CHECK-LABEL: call_vararg_long_double3
; CHECK: lgdr 3, 2
; CHECK-NEXT: lgdr 2, 0
define i64 @call_vararg_long_double3(fp128 %arg0) {
entry:
%retval = call i64 (...) @pass_vararg3(fp128 %arg0)
ret i64 %retval
}
; ARCH12-LABEL: call_vec_vararg_test0
; ARCH12: vlgvg 3, 24, 1
; ARCH12: vlgvg 2, 24, 0
; ARCH12: lghi 1, 1
define void @call_vec_vararg_test0(<2 x double> %v) {
%retval = call i64(i64, ...) @pass_vararg2(i64 1, <2 x double> %v)
ret void
}
; ARCH12-LABEL: call_vec_vararg_test1
; ARCH12: larl 1, @CPI10_0
; ARCH12: vl 0, 0(1), 3
; ARCH12: vlgvg 3, 24, 0
; ARCH12: vrepg 2, 0, 1
; ARCH12: vst 25, 2208(4), 3
; ARCH12: vst 24, 2192(4), 3
define void @call_vec_vararg_test1(<4 x i32> %v, <2 x i64> %w) {
%retval = call i64(fp128, ...) @pass_vararg1(fp128 0xLE0FC1518450562CD4000921FB5444261, <4 x i32> %v, <2 x i64> %w)
ret void
}
; ARCH12-LABEL: call_vec_char_vararg_straddle
; ARCH12: vlgvg 3, 24, 0
; ARCH12: lghi 1, 1
; ARCH12: lghi 2, 2
; ARCH12: vst 24, 2192(4), 3
define void @call_vec_char_vararg_straddle(<16 x i8> %v) {
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <16 x i8> %v)
ret void
}
; ARCH12-LABEL: call_vec_short_vararg_straddle
; ARCH12: vlgvg 3, 24, 0
; ARCH12: lghi 1, 1
; ARCH12: lghi 2, 2
; ARCH12: vst 24, 2192(4), 3
define void @call_vec_short_vararg_straddle(<8 x i16> %v) {
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <8 x i16> %v)
ret void
}
; ARCH12-LABEL: call_vec_int_vararg_straddle
; ARCH12: vlgvg 3, 24, 0
; ARCH12: lghi 1, 1
; ARCH12: lghi 2, 2
; ARCH12: vst 24, 2192(4), 3
define void @call_vec_int_vararg_straddle(<4 x i32> %v) {
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <4 x i32> %v)
ret void
}
; ARCH12-LABEL: call_vec_double_vararg_straddle
; ARCH12: vlgvg 3, 24, 0
; ARCH12: lghi 1, 1
; ARCH12: lghi 2, 2
; ARCH12: vst 24, 2192(4), 3
define void @call_vec_double_vararg_straddle(<2 x double> %v) {
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <2 x double> %v)
ret void
}
; CHECK-LABEL: call_vararg_integral0
; Since arguments 0, 1, and 2 are already in the correct
; registers, we should have no loads of any sort into
; GPRs 1, 2, and 3.
; CHECK-NOT: lg 1
; CHECK-NOT: lgr 1
; CHECK-NOT: lg 2
; CHECK-NOT: lgr 2
; CHECK-NOT: lg 3
; CHECK-NOT: lgr 3
define i64 @call_vararg_integral0(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) {
entry:
%retval = call i64(...) @pass_vararg3(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3)
ret i64 %retval
}
declare i64 @pass_vararg0(i64 %arg0, i64 %arg1, ...)
declare i64 @pass_vararg1(fp128 %arg0, ...)
declare i64 @pass_vararg2(i64 %arg0, ...)
declare i64 @pass_vararg3(...)

View File

@ -0,0 +1,66 @@
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck %s
; CHECK-LABEL: sum_vecs0
; CHECK: vag 24, 24, 25
define <2 x i64> @sum_vecs0(<2 x i64> %v1, <2 x i64> %v2) {
entry:
%add0 = add <2 x i64> %v1, %v2
ret <2 x i64> %add0
}
; CHECK-LABEL: sum_vecs1
; CHECK: vaf 1, 24, 25
; CHECK: vaf 1, 1, 26
; CHECK: vaf 1, 1, 27
; CHECK: vaf 1, 1, 28
; CHECK: vaf 1, 1, 29
; CHECK: vl 0, 32(4), 4
; CHECK: vaf 1, 1, 30
; CHECK: vaf 1, 1, 31
; CHECK: vaf 24, 1, 0
define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) {
entry:
%add0 = add <4 x i32> %v1, %v2
%add1 = add <4 x i32> %add0, %v3
%add2 = add <4 x i32> %add1, %v4
%add3 = add <4 x i32> %add2, %v5
%add4 = add <4 x i32> %add3, %v6
%add5 = add <4 x i32> %add4, %v7
%add6 = add <4 x i32> %add5, %v8
%add7 = add <4 x i32> %add6, %v9
ret <4 x i32> %add7
}
; Verify that 3 is used for passing integral types if
; only 24 is used.
; CHECK-LABEL: call_vecs0
; CHECK: lgr 3, 1
define i64 @call_vecs0(i64 %n, <2 x i64> %v1) {
entry:
%ret = call i64 (<2 x i64>, i64) @pass_vecs0(<2 x i64> %v1, i64 %n)
ret i64 %ret
}
; Verify that 3 is not allocated for passing integral types
; if 24 and %f0 are used.
; CHECK-LABEL: call_vecs1
; CHECK: vlr 24, 25
; CHECK: stg 1, 2200(4)
define i64 @call_vecs1(i64 %n, <2 x i64> %v1, double %x, <2 x i64> %v2) {
entry:
%ret = call i64 (<2 x i64>, double, i64) @pass_vecs1(<2 x i64> %v2, double %x, i64 %n)
ret i64 %ret
}
; Verify that 3 is not allocated for passing integral types
; if 24 and 25 are used.
; CHECK-LABEL: call_vecs2
; CHECK: mvghi 2208(4), 55
define i64 @call_vecs2(<2 x i64> %v1, <2 x i64> %v2) {
%ret = call i64 (<2 x i64>, <2 x i64>, i64) @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 55)
ret i64 %ret
}
declare i64 @pass_vecs0(<2 x i64> %v1, i64 %n)
declare i64 @pass_vecs1(<2 x i64> %v1, double %x, i64 %n)
declare i64 @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 %n)