forked from OSchip/llvm-project
PowerPC: Optimize SPE double parameter calling setup
Summary: SPE passes doubles the same as soft-float, in register pairs as i32 types. This is all handled by the target-independent layer. However, this is not optimal when splitting or reforming the doubles, as it pushes to the stack and loads from, on either side. For instance, to pass a double argument to a function, assuming the double value is in r5, the sequence currently looks like this: evstdd 5, X(1) lwz 3, X(1) lwz 4, X+4(1) Likewise, to form a double into r5 from args in r3 and r4: stw 3, X(1) stw 4, X+4(1) evldd 5, X(1) This optimizes the fence to use SPE instructions. Now, to pass a double to a function: mr 4, 5 evmergehi 3, 5, 5 And to form a double into r5 from args in r3 and r4: evmergelo 5, 3, 4 This is comparable to the way that gcc generates the double splits. This also fixes a bug with expanding builtins to libcalls, where the LowerCallTo() code path was generating intermediate illegal type nodes. Reviewers: nemanjai, hfinkel, joerg Subscribers: kbarton, jfb, jsji, llvm-commits Differential Revision: https://reviews.llvm.org/D54583 llvm-svn: 363526
This commit is contained in:
parent
4f15732067
commit
1d1cf30b73
|
@ -105,4 +105,58 @@ static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Split F64 arguments into two 32-bit consecutive registers.
|
||||
static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
|
||||
MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags,
|
||||
CCState &State) {
|
||||
static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
|
||||
static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
|
||||
|
||||
// Try to get the first register.
|
||||
unsigned Reg = State.AllocateReg(HiRegList);
|
||||
if (!Reg)
|
||||
return false;
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
|
||||
if (HiRegList[i] == Reg)
|
||||
break;
|
||||
|
||||
unsigned T = State.AllocateReg(LoRegList[i]);
|
||||
(void)T;
|
||||
assert(T == LoRegList[i] && "Could not allocate register");
|
||||
|
||||
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
|
||||
LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Same as above, but for return values, so only allocate for R3 and R4
|
||||
static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
|
||||
MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags,
|
||||
CCState &State) {
|
||||
static const MCPhysReg HiRegList[] = { PPC::R3 };
|
||||
static const MCPhysReg LoRegList[] = { PPC::R4 };
|
||||
|
||||
// Try to get the first register.
|
||||
unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
|
||||
if (!Reg)
|
||||
return false;
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
|
||||
if (HiRegList[i] == Reg)
|
||||
break;
|
||||
|
||||
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
|
||||
LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "PPCGenCallingConv.inc"
|
||||
|
|
|
@ -91,7 +91,7 @@ def RetCC_PPC : CallingConv<[
|
|||
CCIfSubtarget<"hasSPE()",
|
||||
CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
|
||||
CCIfSubtarget<"hasSPE()",
|
||||
CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
|
||||
CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>,
|
||||
|
||||
// For P9, f128 are passed in vector registers.
|
||||
CCIfType<[f128],
|
||||
|
@ -182,6 +182,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
|
|||
CCIfType<[i32],
|
||||
CCIfSplit<CCIfNotSubtarget<"useSoftFloat()",
|
||||
CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
|
||||
CCIfType<[f64],
|
||||
CCIfSubtarget<"hasSPE()",
|
||||
CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
|
||||
CCIfSplit<CCIfSubtarget<"useSoftFloat()",
|
||||
CCIfOrigArgWasPPCF128<CCCustom<
|
||||
"CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
|
||||
|
@ -202,7 +205,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
|
|||
CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
|
||||
CCIfType<[f64],
|
||||
CCIfSubtarget<"hasSPE()",
|
||||
CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
|
||||
CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>,
|
||||
CCIfType<[f32],
|
||||
CCIfSubtarget<"hasSPE()",
|
||||
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
|
||||
|
|
|
@ -1269,22 +1269,6 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
|
|||
return Align;
|
||||
}
|
||||
|
||||
unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
|
||||
CallingConv:: ID CC,
|
||||
EVT VT) const {
|
||||
if (Subtarget.hasSPE() && VT == MVT::f64)
|
||||
return 2;
|
||||
return PPCTargetLowering::getNumRegisters(Context, VT);
|
||||
}
|
||||
|
||||
MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
|
||||
CallingConv:: ID CC,
|
||||
EVT VT) const {
|
||||
if (Subtarget.hasSPE() && VT == MVT::f64)
|
||||
return MVT::i32;
|
||||
return PPCTargetLowering::getRegisterType(Context, VT);
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::useSoftFloat() const {
|
||||
return Subtarget.useSoftFloat();
|
||||
}
|
||||
|
@ -1402,6 +1386,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::QBFLT: return "PPCISD::QBFLT";
|
||||
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
|
||||
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
|
||||
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
|
||||
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
|
||||
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
|
||||
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
|
||||
case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
|
||||
|
@ -3427,7 +3413,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
|
|||
// Reserve space for the linkage area on the stack.
|
||||
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
|
||||
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
|
||||
if (useSoftFloat() || hasSPE())
|
||||
if (useSoftFloat())
|
||||
CCInfo.PreAnalyzeFormalArguments(Ins);
|
||||
|
||||
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
|
||||
|
@ -3460,7 +3446,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
|
|||
if (Subtarget.hasVSX())
|
||||
RC = &PPC::VSFRCRegClass;
|
||||
else if (Subtarget.hasSPE())
|
||||
RC = &PPC::SPERCRegClass;
|
||||
// SPE passes doubles in GPR pairs.
|
||||
RC = &PPC::GPRCRegClass;
|
||||
else
|
||||
RC = &PPC::F8RCRegClass;
|
||||
break;
|
||||
|
@ -3484,13 +3471,26 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
|
|||
break;
|
||||
}
|
||||
|
||||
// Transform the arguments stored in physical registers into virtual ones.
|
||||
SDValue ArgValue;
|
||||
// Transform the arguments stored in physical registers into
|
||||
// virtual ones.
|
||||
if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
|
||||
assert(i + 1 < e && "No second half of double precision argument");
|
||||
unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
|
||||
unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
|
||||
SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
|
||||
SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
|
||||
if (!Subtarget.isLittleEndian())
|
||||
std::swap (ArgValueLo, ArgValueHi);
|
||||
ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
|
||||
ArgValueHi);
|
||||
} else {
|
||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
||||
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
|
||||
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
|
||||
ValVT == MVT::i1 ? MVT::i32 : ValVT);
|
||||
|
||||
if (ValVT == MVT::i1)
|
||||
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
|
||||
}
|
||||
|
||||
InVals.push_back(ArgValue);
|
||||
} else {
|
||||
|
@ -5135,10 +5135,27 @@ SDValue PPCTargetLowering::LowerCallResult(
|
|||
CCValAssign &VA = RVLocs[i];
|
||||
assert(VA.isRegLoc() && "Can only return in registers!");
|
||||
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl,
|
||||
SDValue Val;
|
||||
|
||||
if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
|
||||
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
|
||||
InFlag);
|
||||
Chain = Lo.getValue(1);
|
||||
InFlag = Lo.getValue(2);
|
||||
VA = RVLocs[++i]; // skip ahead to next loc
|
||||
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
|
||||
InFlag);
|
||||
Chain = Hi.getValue(1);
|
||||
InFlag = Hi.getValue(2);
|
||||
if (!Subtarget.isLittleEndian())
|
||||
std::swap (Lo, Hi);
|
||||
Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
|
||||
} else {
|
||||
Val = DAG.getCopyFromReg(Chain, dl,
|
||||
VA.getLocReg(), VA.getLocVT(), InFlag);
|
||||
Chain = Val.getValue(1);
|
||||
InFlag = Val.getValue(2);
|
||||
}
|
||||
|
||||
switch (VA.getLocInfo()) {
|
||||
default: llvm_unreachable("Unknown loc info!");
|
||||
|
@ -5459,12 +5476,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
|
|||
|
||||
bool seenFloatArg = false;
|
||||
// Walk the register/memloc assignments, inserting copies/loads.
|
||||
for (unsigned i = 0, j = 0, e = ArgLocs.size();
|
||||
// i - Tracks the index into the list of registers allocated for the call
|
||||
// RealArgIdx - Tracks the index into the list of actual function arguments
|
||||
// j - Tracks the index into the list of byval arguments
|
||||
for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
|
||||
i != e;
|
||||
++i) {
|
||||
++i, ++RealArgIdx) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
SDValue Arg = OutVals[i];
|
||||
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
||||
SDValue Arg = OutVals[RealArgIdx];
|
||||
ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
|
||||
|
||||
if (Flags.isByVal()) {
|
||||
// Argument is an aggregate which is passed by value, thus we need to
|
||||
|
@ -5513,6 +5533,16 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
|
|||
if (VA.isRegLoc()) {
|
||||
seenFloatArg |= VA.getLocVT().isFloatingPoint();
|
||||
// Put argument in a physical register.
|
||||
if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
|
||||
bool IsLE = Subtarget.isLittleEndian();
|
||||
SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
|
||||
DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
|
||||
SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
|
||||
DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
|
||||
RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
|
||||
SVal.getValue(0)));
|
||||
} else
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
} else {
|
||||
// Put argument in the parameter list area of the current stack frame.
|
||||
|
@ -6781,11 +6811,11 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|||
SmallVector<SDValue, 4> RetOps(1, Chain);
|
||||
|
||||
// Copy the result values into the output registers.
|
||||
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
||||
for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
|
||||
CCValAssign &VA = RVLocs[i];
|
||||
assert(VA.isRegLoc() && "Can only return in registers!");
|
||||
|
||||
SDValue Arg = OutVals[i];
|
||||
SDValue Arg = OutVals[RealResIdx];
|
||||
|
||||
switch (VA.getLocInfo()) {
|
||||
default: llvm_unreachable("Unknown loc info!");
|
||||
|
@ -6800,7 +6830,20 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|||
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
|
||||
bool isLittleEndian = Subtarget.isLittleEndian();
|
||||
// Legalize ret f64 -> ret 2 x i32.
|
||||
SDValue SVal =
|
||||
DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
|
||||
DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
|
||||
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
||||
SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
|
||||
DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
|
||||
Flag = Chain.getValue(1);
|
||||
VA = RVLocs[++i]; // skip ahead to next loc
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
|
||||
} else
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
|
||||
Flag = Chain.getValue(1);
|
||||
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
||||
|
|
|
@ -194,6 +194,15 @@ namespace llvm {
|
|||
/// Direct move of 2 consecutive GPR to a VSX register.
|
||||
BUILD_FP128,
|
||||
|
||||
/// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
|
||||
/// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
|
||||
/// unsupported for this target.
|
||||
/// Merge 2 GPRs to a single SPE register.
|
||||
BUILD_SPE64,
|
||||
|
||||
/// Extract SPE register component, second argument is high or low.
|
||||
EXTRACT_SPE,
|
||||
|
||||
/// Extract a subvector from signed integer vector and convert to FP.
|
||||
/// It is primarily used to convert a (widened) illegal integer vector
|
||||
/// type to a legal floating point vector type.
|
||||
|
@ -908,14 +917,6 @@ namespace llvm {
|
|||
unsigned JTI,
|
||||
MCContext &Ctx) const override;
|
||||
|
||||
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
|
||||
CallingConv:: ID CC,
|
||||
EVT VT) const override;
|
||||
|
||||
MVT getRegisterTypeForCallingConv(LLVMContext &Context,
|
||||
CallingConv:: ID CC,
|
||||
EVT VT) const override;
|
||||
|
||||
private:
|
||||
struct ReuseLoadInfo {
|
||||
SDValue Ptr;
|
||||
|
|
|
@ -230,6 +230,18 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
|
|||
SDTCisSameAs<1,2>]>,
|
||||
[]>;
|
||||
|
||||
def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
|
||||
SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, f64>, SDTCisVT<1,i32>,
|
||||
SDTCisVT<1,i32>]>,
|
||||
[]>;
|
||||
|
||||
def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE",
|
||||
SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, i32>, SDTCisVT<1, f64>,
|
||||
SDTCisPtrTy<2>]>,
|
||||
[]>;
|
||||
|
||||
// These are target-independent nodes, but have target-specific formats.
|
||||
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
|
||||
[SDNPHasChain, SDNPOutGlue]>;
|
||||
|
|
|
@ -511,7 +511,7 @@ def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src),
|
|||
|
||||
def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
|
||||
"evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
|
||||
def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
|
||||
def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
|
||||
"evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
|
||||
def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
|
||||
"evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
|
||||
|
@ -886,4 +886,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
|
|||
(SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
|
||||
def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
|
||||
(SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
|
||||
|
||||
|
||||
def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
|
||||
(f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
|
||||
|
||||
def : Pat<(i32 (PPCextract_spe f64:$rA, 1)),
|
||||
(i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
|
||||
def : Pat<(i32 (PPCextract_spe f64:$rA, 0)),
|
||||
(i32 (EXTRACT_SUBREG $rA, sub_32))>;
|
||||
|
||||
}
|
||||
|
|
|
@ -472,10 +472,8 @@ entry:
|
|||
; CHECK-LABEL: test_dselect
|
||||
; CHECK: andi.
|
||||
; CHECK: bc
|
||||
; CHECK: evldd
|
||||
; CHECK: b
|
||||
; CHECK: evldd
|
||||
; CHECK: evstdd
|
||||
; CHECK: evor
|
||||
; CHECK: evmergehi
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
@ -519,7 +517,7 @@ entry:
|
|||
%1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0)
|
||||
ret i32 %1
|
||||
; CHECK-LABEL: test_dasmconst
|
||||
; CHECK: evldd
|
||||
; CHECK: evmergelo
|
||||
; CHECK: #APP
|
||||
; CHECK: efdctsi
|
||||
; CHECK: #NO_APP
|
||||
|
|
Loading…
Reference in New Issue