forked from OSchip/llvm-project
[SystemZ][z/OS] Initial implementation for lowerCall on z/OS
- This patch provides the initial implementation for lowering a call on z/OS according to the XPLINK64 calling convention - A series of changes have been made to SystemZCallingConv.td to account for these additional XPLINK64 changes including adding a new helper function to shadow the stack along with allocation of a register wherever appropriate - For the cases of copying a f64 to a gr64 and a f128 / 128-bit vector type to a gr64, a `CCBitConvertToType` has been added and has been bitcasted appropriately in the lowering phase - Support for the ADA register (R5) will be provided in a later patch. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D111662
This commit is contained in:
parent
d2198771e9
commit
aa3519f178
|
@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
|
|||
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
|
||||
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
|
||||
};
|
||||
|
||||
const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
|
||||
SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
|
||||
SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
|
||||
|
|
|
@ -27,6 +27,9 @@ namespace SystemZ {
|
|||
|
||||
const unsigned XPLINK64NumArgFPRs = 4;
|
||||
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
|
||||
|
||||
const unsigned XPLINK64NumArgVRs = 8;
|
||||
extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
|
||||
} // end namespace SystemZ
|
||||
|
||||
class SystemZCCState : public CCState {
|
||||
|
@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
|
|||
else
|
||||
llvm_unreachable("Unknown Calling Convention!");
|
||||
|
||||
unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
|
||||
unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
|
||||
? 0
|
||||
: State.AllocateStack(8, Align(8));
|
||||
|
||||
// Use that same location for all the pending parts.
|
||||
for (auto &It : PendingMembers) {
|
||||
|
@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
|
|||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags,
|
||||
CCState &State) {
|
||||
if (LocVT.getSizeInBits() < 128)
|
||||
return false;
|
||||
|
||||
if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
|
||||
return false;
|
||||
|
||||
// For any C or C++ program, this should always be
|
||||
// false, since it is illegal to have a function
|
||||
// where the first argument is variadic. Therefore
|
||||
|
@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
|
|||
bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
|
||||
|
||||
// If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
|
||||
if (AllocGPR2 && AllocGPR3) {
|
||||
State.addLoc(
|
||||
CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
|
||||
// If only GPR3 is available, we need to set custom handling to copy
|
||||
// hi bits into GPR3.
|
||||
// Either way, we allocate on the stack.
|
||||
if (AllocGPR3) {
|
||||
// For f128 and vector var arg case, set the bitcast flag to bitcast to
|
||||
// i128.
|
||||
LocVT = MVT::i128;
|
||||
LocInfo = CCValAssign::BCvt;
|
||||
auto Offset = State.AllocateStack(16, Align(8));
|
||||
if (AllocGPR2)
|
||||
State.addLoc(
|
||||
CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
|
||||
else
|
||||
State.addLoc(
|
||||
CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
// If only GPR3 is available, we allocate on stack but need to
|
||||
// set custom handling to copy hi bits into GPR3.
|
||||
if (!AllocGPR2 && AllocGPR3) {
|
||||
auto Offset = State.AllocateStack(16, Align(8));
|
||||
State.addLoc(
|
||||
CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags,
|
||||
CCState &State) {
|
||||
ArrayRef<MCPhysReg> RegList;
|
||||
|
||||
switch (LocVT.SimpleTy) {
|
||||
case MVT::i64:
|
||||
RegList = SystemZ::XPLINK64ArgGPRs;
|
||||
break;
|
||||
case MVT::v16i8:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v4f32:
|
||||
case MVT::v2f64:
|
||||
RegList = SystemZ::XPLINK64ArgVRs;
|
||||
break;
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
case MVT::f128:
|
||||
RegList = SystemZ::XPLINK64ArgFPRs;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
|
||||
// Every time we can allocate a register, allocate on the stack.
|
||||
if (UnallocatedRegisterIndex < RegList.size())
|
||||
State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -224,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
|
|||
// XPLINK64 ABI compliant code widens integral types smaller than i64
|
||||
// to i64 before placing the parameters either on the stack or in registers.
|
||||
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
|
||||
// Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
|
||||
CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
|
||||
CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
|
||||
// long double, can only be passed in GPR2 and GPR3, if available,
|
||||
// hence R2Q
|
||||
CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
|
||||
// Non fixed vector arguments are treated in the same way as long
|
||||
// doubles.
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
|
||||
|
||||
// A SwiftSelf is passed in callee-saved R10.
|
||||
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
|
||||
|
@ -238,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
|
|||
// The first 3 integer arguments are passed in registers R1D-R3D.
|
||||
// The rest will be passed in the user area. The address offset of the user
|
||||
// area can be found in register R4D.
|
||||
CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
|
||||
CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
|
||||
CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
|
||||
|
||||
// The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
|
||||
|
@ -247,6 +258,9 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
|
|||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
|
||||
|
@ -255,28 +269,15 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
|
|||
// The first 4 named float and double arguments are passed in registers FPR0-FPR6.
|
||||
// The rest will be passed in the user area.
|
||||
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
|
||||
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
|
||||
CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
|
||||
CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
|
||||
// The first 2 long double arguments are passed in register FPR0/FPR2
|
||||
// and FPR4/FPR6. The rest will be passed in the user area.
|
||||
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
|
||||
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
|
||||
CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
|
||||
|
||||
// Non fixed floats are passed in GPRs
|
||||
// Promote f32 to f64, if it needs to be passed in GPRs.
|
||||
CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
|
||||
// Assign f64 varargs to their proper GPRs.
|
||||
CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
|
||||
// long double, can only be passed in GPR2 and GPR3, if available,
|
||||
// hence R2Q
|
||||
CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
|
||||
|
||||
// Non fixed vector arguments are treated in the same way as long
|
||||
// doubles.
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
|
||||
|
||||
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
|
||||
// Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.
|
||||
|
|
|
@ -1358,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
|
|||
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
|
||||
case CCValAssign::AExt:
|
||||
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
|
||||
case CCValAssign::BCvt:
|
||||
// If this is a short vector argument to be stored to the stack,
|
||||
case CCValAssign::BCvt: {
|
||||
assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
|
||||
assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
|
||||
VA.getValVT() == MVT::f128);
|
||||
MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
|
||||
? MVT::v2i64
|
||||
: VA.getLocVT();
|
||||
Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
|
||||
// For ELF, this is a short vector argument to be stored to the stack,
|
||||
// bitcast to v2i64 and then extract first element.
|
||||
assert(VA.getLocVT() == MVT::i64);
|
||||
assert(VA.getValVT().isVector());
|
||||
Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
if (BitCastToType == MVT::v2i64)
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
return Value;
|
||||
}
|
||||
case CCValAssign::Full:
|
||||
return Value;
|
||||
default:
|
||||
|
@ -1472,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
|
|||
NumFixedFPRs += 1;
|
||||
RC = &SystemZ::FP64BitRegClass;
|
||||
break;
|
||||
case MVT::f128:
|
||||
NumFixedFPRs += 2;
|
||||
RC = &SystemZ::FP128BitRegClass;
|
||||
break;
|
||||
case MVT::v16i8:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
|
@ -1525,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
|
|||
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
|
||||
}
|
||||
|
||||
if (IsVarArg) {
|
||||
// FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
|
||||
if (IsVarArg && Subtarget.isTargetELF()) {
|
||||
// Save the number of non-varargs registers for later use by va_start, etc.
|
||||
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
|
||||
FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
|
||||
|
@ -1564,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
|
||||
// register (R5)
|
||||
return Chain;
|
||||
}
|
||||
|
||||
|
@ -1604,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
EVT PtrVT = getPointerTy(MF.getDataLayout());
|
||||
LLVMContext &Ctx = *DAG.getContext();
|
||||
SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
|
||||
|
||||
// FIXME: z/OS support to be added in later.
|
||||
if (Subtarget.isTargetXPLINK64())
|
||||
IsTailCall = false;
|
||||
|
||||
// Detect unsupported vector argument and return types.
|
||||
if (Subtarget.hasVector()) {
|
||||
|
@ -1624,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
|
||||
|
||||
if (Subtarget.isTargetXPLINK64())
|
||||
// Although the XPLINK specifications for AMODE64 state that minimum size
|
||||
// of the param area is minimum 32 bytes and no rounding is otherwise
|
||||
// specified, we round this area in 64 bytes increments to be compatible
|
||||
// with existing compilers.
|
||||
NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
|
||||
|
||||
// Mark the start of the call.
|
||||
if (!IsTailCall)
|
||||
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
|
||||
|
@ -1674,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
} else
|
||||
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
|
||||
|
||||
if (VA.isRegLoc())
|
||||
if (VA.isRegLoc()) {
|
||||
// In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
|
||||
// MVT::i128 type. We decompose the 128-bit type to a pair of its high
|
||||
// and low values.
|
||||
if (VA.getLocVT() == MVT::i128)
|
||||
ArgValue = lowerI128ToGR128(DAG, ArgValue);
|
||||
// Queue up the argument copies and emit them at the end.
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
|
||||
else {
|
||||
} else {
|
||||
assert(VA.isMemLoc() && "Argument not register or memory");
|
||||
|
||||
// Work out the address of the stack slot. Unpromoted ints and
|
||||
// floats are passed as right-justified 8-byte values.
|
||||
if (!StackPtr.getNode())
|
||||
StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
|
||||
unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
|
||||
StackPtr = DAG.getCopyFromReg(Chain, DL,
|
||||
Regs->getStackPointerRegister(), PtrVT);
|
||||
unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
|
||||
VA.getLocMemOffset();
|
||||
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
|
||||
Offset += 4;
|
||||
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
||||
|
@ -1693,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
// Emit the store.
|
||||
MemOpChains.push_back(
|
||||
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
|
||||
|
||||
// Although long doubles or vectors are passed through the stack when
|
||||
// they are vararg (non-fixed arguments), if a long double or vector
|
||||
// occupies the third and fourth slot of the argument list GPR3 should
|
||||
// still shadow the third slot of the argument list.
|
||||
if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
|
||||
SDValue ShadowArgValue =
|
||||
DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
|
||||
DAG.getIntPtrConstant(1, DL));
|
||||
RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1704,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
// associated Target* opcodes. Force %r1 to be used for indirect
|
||||
// tail calls.
|
||||
SDValue Glue;
|
||||
// FIXME: Add support for XPLINK using the ADA register.
|
||||
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
|
||||
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
|
||||
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
|
||||
|
|
|
@ -66,6 +66,12 @@ public:
|
|||
virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID CC) const = 0;
|
||||
|
||||
/// \returns the offset to the locals area.
|
||||
virtual int getCallFrameSize() = 0;
|
||||
|
||||
/// \returns the stack pointer bias.
|
||||
virtual int getStackPointerBias() = 0;
|
||||
|
||||
/// Destroys the object. Bogus destructor allowing derived classes
|
||||
/// to override it.
|
||||
virtual ~SystemZCallingConventionRegisters(){};
|
||||
|
@ -91,6 +97,10 @@ public:
|
|||
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID CC) const override final;
|
||||
|
||||
int getCallFrameSize() override final { return 128; }
|
||||
|
||||
int getStackPointerBias() override final { return 2048; }
|
||||
|
||||
/// Destroys the object. Bogus destructor overriding base class destructor
|
||||
~SystemZXPLINK64Registers(){};
|
||||
};
|
||||
|
@ -113,6 +123,10 @@ public:
|
|||
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID CC) const override final;
|
||||
|
||||
int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
|
||||
|
||||
int getStackPointerBias() override final { return 0; }
|
||||
|
||||
/// Destroys the object. Bogus destructor overriding base class destructor
|
||||
~SystemZELFRegisters(){};
|
||||
};
|
||||
|
|
|
@ -135,6 +135,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (TT.isOSBinFormatGOFF())
|
||||
return true;
|
||||
|
||||
if (TT.isOSBinFormatMachO()) {
|
||||
if (RM == Reloc::Static)
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
; Test the passing of scalar values in GPRs, FPRs in 64-bit calls on z/OS.
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: call_char:
|
||||
; CHECK: lghi 1, 8
|
||||
define i8 @call_char(){
|
||||
%retval = call i8 (i8) @pass_char(i8 8)
|
||||
ret i8 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_short:
|
||||
; CHECK: lghi 1, 16
|
||||
define i16 @call_short() {
|
||||
entry:
|
||||
%retval = call i16 (i16) @pass_short(i16 16)
|
||||
ret i16 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_int:
|
||||
; CHECK: lghi 1, 32
|
||||
; CHECK: lghi 2, 33
|
||||
define i32 @call_int() {
|
||||
entry:
|
||||
%retval = call i32 (i32, i32) @pass_int(i32 32, i32 33)
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_long:
|
||||
; CHECK: lghi 1, 64
|
||||
; CHECK: lghi 2, 65
|
||||
; CHECK: lghi 3, 66
|
||||
define i64 @call_long() {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, i64) @pass_long(i64 64, i64 65, i64 66)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_ptr:
|
||||
; CHECK: lgr 1, 2
|
||||
define i32 @call_ptr(i32* %p1, i32* %p2) {
|
||||
entry:
|
||||
%retval = call i32 (i32*) @pass_ptr(i32* %p2)
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_integrals:
|
||||
; CHECK: lghi 1, 64
|
||||
; CHECK: lghi 2, 32
|
||||
; CHECK: lghi 3, 16
|
||||
define i64 @call_integrals() {
|
||||
entry:
|
||||
%retval = call i64 (i64, i32, i16, i64) @pass_integrals0(i64 64, i32 32, i16 16, i64 128)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_char:
|
||||
; CHECK: lgr 3, 1
|
||||
define signext i8 @pass_char(i8 signext %arg) {
|
||||
entry:
|
||||
ret i8 %arg
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_short:
|
||||
; CHECK: lgr 3, 1
|
||||
define signext i16 @pass_short(i16 signext %arg) {
|
||||
entry:
|
||||
ret i16 %arg
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_int:
|
||||
; CHECK: lgr 3, 2
|
||||
define signext i32 @pass_int(i32 signext %arg0, i32 signext %arg1) {
|
||||
entry:
|
||||
ret i32 %arg1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_long:
|
||||
; CHECK: agr 1, 2
|
||||
; CHECK: agr 3, 1
|
||||
define signext i64 @pass_long(i64 signext %arg0, i64 signext %arg1, i64 signext %arg2) {
|
||||
entry:
|
||||
%N = add i64 %arg0, %arg1
|
||||
%M = add i64 %N, %arg2
|
||||
ret i64 %M
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_integrals0:
|
||||
; CHECK: ag 2, -{{[0-9]+}}(4)
|
||||
; CHECK-NEXT: lgr 3, 2
|
||||
define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
|
||||
entry:
|
||||
%N = sext i32 %arg1 to i64
|
||||
%M = add i64 %arg3, %N
|
||||
ret i64 %M
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_float:
|
||||
; CHECK: le 0, 0({{[0-9]}})
|
||||
define float @call_float() {
|
||||
entry:
|
||||
%ret = call float (float) @pass_float(float 0x400921FB60000000)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_double:
|
||||
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK-NEXT: ld 0, 0([[GENREG]])
|
||||
define double @call_double() {
|
||||
entry:
|
||||
%ret = call double (double) @pass_double(double 3.141000e+00)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_longdouble:
|
||||
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK-NEXT: ld 0, 0([[GENREG]])
|
||||
; CHECK-NEXT: ld 2, 8([[GENREG]])
|
||||
define fp128 @call_longdouble() {
|
||||
entry:
|
||||
%ret = call fp128 (fp128) @pass_longdouble(fp128 0xLE0FC1518450562CD4000921FB5444261)
|
||||
ret fp128 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_floats0
|
||||
; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK-NEXT: ld 1, 0([[GENREG]])
|
||||
; CHECK-NEXT: ld 3, 8([[GENREG]])
|
||||
; CHECK: lxr 5, 0
|
||||
; CHECK: lxr 0, 1
|
||||
; CHECK: lxr 4, 5
|
||||
define i64 @call_floats0(fp128 %arg0, double %arg1) {
|
||||
entry:
|
||||
%ret = call i64 (fp128, fp128, double) @pass_floats0(fp128 0xLE0FC1518450562CD4000921FB5444261, fp128 %arg0, double %arg1)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_floats1
|
||||
; CHECK: lxr 1, 0
|
||||
; CHECK: ldr 0, 4
|
||||
; CHECK: lxr 4, 1
|
||||
define i64 @call_floats1(fp128 %arg0, double %arg1) {
|
||||
entry:
|
||||
%ret = call i64 (double, fp128) @pass_floats1(double %arg1, fp128 %arg0)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_float:
|
||||
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK: aeb 0, 0(1)
|
||||
define float @pass_float(float %arg) {
|
||||
entry:
|
||||
%X = fadd float %arg, 0x400821FB60000000
|
||||
ret float %X
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_double:
|
||||
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK: adb 0, 0(1)
|
||||
define double @pass_double(double %arg) {
|
||||
entry:
|
||||
%X = fadd double %arg, 1.414213e+00
|
||||
ret double %X
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_longdouble
|
||||
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK: lxdb 1, 0(1)
|
||||
; CHECK: axbr 0, 1
|
||||
define fp128 @pass_longdouble(fp128 %arg) {
|
||||
entry:
|
||||
%X = fadd fp128 %arg, 0xL10000000000000004000921FB53C8D4F
|
||||
ret fp128 %X
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pass_floats0
|
||||
; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
|
||||
; CHECK: axbr 0, 4
|
||||
; CHECK: axbr 1, 0
|
||||
; CHECK: cxbr 1, 5
|
||||
define i64 @pass_floats0(fp128 %arg0, fp128 %arg1, double %arg2) {
|
||||
%X = fadd fp128 %arg0, %arg1
|
||||
%arg2_ext = fpext double %arg2 to fp128
|
||||
%Y = fadd fp128 %X, %arg2_ext
|
||||
%ret_bool = fcmp ueq fp128 %Y, 0xLE0FC1518450562CD4000921FB5444261
|
||||
%ret = sext i1 %ret_bool to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i64 @pass_floats1(double %arg0, fp128 %arg1)
|
||||
declare i32 @pass_ptr(i32* %arg)
|
|
@ -0,0 +1,195 @@
|
|||
; Test passing variable argument lists in 64-bit calls on z/OS.
|
||||
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z14 | FileCheck %s -check-prefix=ARCH12
|
||||
; CHECK-LABEL: call_vararg_double0
|
||||
; CHECK: llihf 3, 1074118262
|
||||
; CHECK-NEXT: oilf 3, 3367254360
|
||||
; CHECK: lghi 1, 1
|
||||
; CHECK: lghi 2, 2
|
||||
define i64 @call_vararg_double0() {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 2.718000e+00)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_double1
|
||||
; CHECK: llihf 0, 1074118262
|
||||
; CHECK-NEXT: oilf 0, 3367254360
|
||||
; CHECK: llihf 3, 1074340036
|
||||
; CHECK-NEXT: oilf 3, 2611340116
|
||||
; CHECK: lghi 1, 1
|
||||
; CHECK: lghi 2, 2
|
||||
; CHECK: stg 0, 2200(4)
|
||||
define i64 @call_vararg_double1() {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 3.141000e+00, double 2.718000e+00)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_double2
|
||||
; CHECK-NOT: llihf 0
|
||||
; CHECK-NOT: oilf 0
|
||||
; CHECK: llihf 2, 1074118262
|
||||
; CHECK-NEXT: oilf 2, 3367254360
|
||||
; CHECK: lghi 1, 8200
|
||||
define i64 @call_vararg_double2() {
|
||||
entry:
|
||||
%retval = call i64 (i64, ...) @pass_vararg2(i64 8200, double 2.718000e+00)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_double3
|
||||
; CHECK: llihf 0, 1072703839
|
||||
; CHECK-NEXT: oilf 0, 2861204133
|
||||
; CHECK: llihf 1, 1074118262
|
||||
; CHECK-NEXT: oilf 1, 3367254360
|
||||
; CHECK: llihf 2, 1074340036
|
||||
; CHECK-NEXT: oilf 2, 2611340116
|
||||
; CHECK: llihf 3, 1073127358
|
||||
; CHECK-NEXT: oilf 3, 1992864825
|
||||
; CHECK: stg 0, 2200(4)
|
||||
define i64 @call_vararg_double3() {
|
||||
entry:
|
||||
%retval = call i64 (...) @pass_vararg3(double 2.718000e+00, double 3.141000e+00, double 1.414000e+00, double 1.010101e+00)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_both0
|
||||
; CHECK: lgr 2, 1
|
||||
; CHECK: lgdr 1, 0
|
||||
define i64 @call_vararg_both0(i64 %arg0, double %arg1) {
|
||||
%retval = call i64(...) @pass_vararg3(double %arg1, i64 %arg0)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_long_double0
|
||||
; CHECK: larl 1, @CPI5_0
|
||||
; CHECK-NEXT: ld 0, 0(1)
|
||||
; CHECK-NEXT: ld 2, 8(1)
|
||||
; CHECK-NEXT: lgdr 3, 0
|
||||
; CHECK: lghi 1, 1
|
||||
; CHECK: lghi 2, 2
|
||||
; CHECK: std 0, 2192(4)
|
||||
; CHECK-NEXT: std 2, 2200(4)
|
||||
define i64 @call_vararg_long_double0() {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 0xLE0FC1518450562CD4000921FB5444261)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_long_double1
|
||||
; CHECK: lgdr 3, 0
|
||||
; CHECK: lghi 1, 1
|
||||
; CHECK: lghi 2, 2
|
||||
; CHECK: std 0, 2192(4)
|
||||
; CHECK-NEXT: std 2, 2200(4)
|
||||
define i64 @call_vararg_long_double1(fp128 %arg0) {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_long_double2
|
||||
; CHECK: std 4, 2208(4)
|
||||
; CHECK-NEXT: std 6, 2216(4)
|
||||
; CHECK: lgdr 3, 0
|
||||
; CHECK: lghi 1, 1
|
||||
; CHECK: lghi 2, 2
|
||||
; CHECK: std 0, 2192(4)
|
||||
; CHECK-NEXT: std 2, 2200(4)
|
||||
define i64 @call_vararg_long_double2(fp128 %arg0, fp128 %arg1) {
|
||||
entry:
|
||||
%retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0, fp128 %arg1)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_long_double3
|
||||
; CHECK: lgdr 3, 2
|
||||
; CHECK-NEXT: lgdr 2, 0
|
||||
define i64 @call_vararg_long_double3(fp128 %arg0) {
|
||||
entry:
|
||||
%retval = call i64 (...) @pass_vararg3(fp128 %arg0)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_vararg_test0
|
||||
; ARCH12: vlgvg 3, 24, 1
|
||||
; ARCH12: vlgvg 2, 24, 0
|
||||
; ARCH12: lghi 1, 1
|
||||
define void @call_vec_vararg_test0(<2 x double> %v) {
|
||||
%retval = call i64(i64, ...) @pass_vararg2(i64 1, <2 x double> %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_vararg_test1
|
||||
; ARCH12: larl 1, @CPI10_0
|
||||
; ARCH12: vl 0, 0(1), 3
|
||||
; ARCH12: vlgvg 3, 24, 0
|
||||
; ARCH12: vrepg 2, 0, 1
|
||||
; ARCH12: vst 25, 2208(4), 3
|
||||
; ARCH12: vst 24, 2192(4), 3
|
||||
define void @call_vec_vararg_test1(<4 x i32> %v, <2 x i64> %w) {
|
||||
%retval = call i64(fp128, ...) @pass_vararg1(fp128 0xLE0FC1518450562CD4000921FB5444261, <4 x i32> %v, <2 x i64> %w)
|
||||
ret void
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_char_vararg_straddle
|
||||
; ARCH12: vlgvg 3, 24, 0
|
||||
; ARCH12: lghi 1, 1
|
||||
; ARCH12: lghi 2, 2
|
||||
; ARCH12: vst 24, 2192(4), 3
|
||||
define void @call_vec_char_vararg_straddle(<16 x i8> %v) {
|
||||
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <16 x i8> %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_short_vararg_straddle
|
||||
; ARCH12: vlgvg 3, 24, 0
|
||||
; ARCH12: lghi 1, 1
|
||||
; ARCH12: lghi 2, 2
|
||||
; ARCH12: vst 24, 2192(4), 3
|
||||
define void @call_vec_short_vararg_straddle(<8 x i16> %v) {
|
||||
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <8 x i16> %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_int_vararg_straddle
|
||||
; ARCH12: vlgvg 3, 24, 0
|
||||
; ARCH12: lghi 1, 1
|
||||
; ARCH12: lghi 2, 2
|
||||
; ARCH12: vst 24, 2192(4), 3
|
||||
define void @call_vec_int_vararg_straddle(<4 x i32> %v) {
|
||||
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <4 x i32> %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; ARCH12-LABEL: call_vec_double_vararg_straddle
|
||||
; ARCH12: vlgvg 3, 24, 0
|
||||
; ARCH12: lghi 1, 1
|
||||
; ARCH12: lghi 2, 2
|
||||
; ARCH12: vst 24, 2192(4), 3
|
||||
define void @call_vec_double_vararg_straddle(<2 x double> %v) {
|
||||
%retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <2 x double> %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_vararg_integral0
|
||||
; Since arguments 0, 1, and 2 are already in the correct
|
||||
; registers, we should have no loads of any sort into
|
||||
; GPRs 1, 2, and 3.
|
||||
; CHECK-NOT: lg 1
|
||||
; CHECK-NOT: lgr 1
|
||||
; CHECK-NOT: lg 2
|
||||
; CHECK-NOT: lgr 2
|
||||
; CHECK-NOT: lg 3
|
||||
; CHECK-NOT: lgr 3
|
||||
define i64 @call_vararg_integral0(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) {
|
||||
entry:
|
||||
%retval = call i64(...) @pass_vararg3(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3)
|
||||
ret i64 %retval
|
||||
}
|
||||
|
||||
declare i64 @pass_vararg0(i64 %arg0, i64 %arg1, ...)
|
||||
declare i64 @pass_vararg1(fp128 %arg0, ...)
|
||||
declare i64 @pass_vararg2(i64 %arg0, ...)
|
||||
declare i64 @pass_vararg3(...)
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: sum_vecs0
|
||||
; CHECK: vag 24, 24, 25
|
||||
define <2 x i64> @sum_vecs0(<2 x i64> %v1, <2 x i64> %v2) {
|
||||
entry:
|
||||
%add0 = add <2 x i64> %v1, %v2
|
||||
ret <2 x i64> %add0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: sum_vecs1
|
||||
; CHECK: vaf 1, 24, 25
|
||||
; CHECK: vaf 1, 1, 26
|
||||
; CHECK: vaf 1, 1, 27
|
||||
; CHECK: vaf 1, 1, 28
|
||||
; CHECK: vaf 1, 1, 29
|
||||
; CHECK: vl 0, 32(4), 4
|
||||
; CHECK: vaf 1, 1, 30
|
||||
; CHECK: vaf 1, 1, 31
|
||||
; CHECK: vaf 24, 1, 0
|
||||
define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) {
|
||||
entry:
|
||||
%add0 = add <4 x i32> %v1, %v2
|
||||
%add1 = add <4 x i32> %add0, %v3
|
||||
%add2 = add <4 x i32> %add1, %v4
|
||||
%add3 = add <4 x i32> %add2, %v5
|
||||
%add4 = add <4 x i32> %add3, %v6
|
||||
%add5 = add <4 x i32> %add4, %v7
|
||||
%add6 = add <4 x i32> %add5, %v8
|
||||
%add7 = add <4 x i32> %add6, %v9
|
||||
ret <4 x i32> %add7
|
||||
}
|
||||
|
||||
; Verify that 3 is used for passing integral types if
|
||||
; only 24 is used.
|
||||
; CHECK-LABEL: call_vecs0
|
||||
; CHECK: lgr 3, 1
|
||||
define i64 @call_vecs0(i64 %n, <2 x i64> %v1) {
|
||||
entry:
|
||||
%ret = call i64 (<2 x i64>, i64) @pass_vecs0(<2 x i64> %v1, i64 %n)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; Verify that 3 is not allocated for passing integral types
|
||||
; if 24 and %f0 are used.
|
||||
; CHECK-LABEL: call_vecs1
|
||||
; CHECK: vlr 24, 25
|
||||
; CHECK: stg 1, 2200(4)
|
||||
define i64 @call_vecs1(i64 %n, <2 x i64> %v1, double %x, <2 x i64> %v2) {
|
||||
entry:
|
||||
%ret = call i64 (<2 x i64>, double, i64) @pass_vecs1(<2 x i64> %v2, double %x, i64 %n)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
; Verify that 3 is not allocated for passing integral types
|
||||
; if 24 and 25 are used.
|
||||
; CHECK-LABEL: call_vecs2
|
||||
; CHECK: mvghi 2208(4), 55
|
||||
define i64 @call_vecs2(<2 x i64> %v1, <2 x i64> %v2) {
|
||||
%ret = call i64 (<2 x i64>, <2 x i64>, i64) @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 55)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare i64 @pass_vecs0(<2 x i64> %v1, i64 %n)
|
||||
declare i64 @pass_vecs1(<2 x i64> %v1, double %x, i64 %n)
|
||||
declare i64 @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 %n)
|
Loading…
Reference in New Issue