forked from OSchip/llvm-project
Tail call optimization improvements:
Move platform independent code (lowering of possibly overwritten arguments, check for tail call optimization eligibility) from target X86ISelectionLowering.cpp to TargetLowering.h and SelectionDAGISel.cpp. Initial PowerPC tail call implementation: Support ppc32 implemented and tested (passes my tests and test-suite llvm-test). Support ppc64 implemented and half tested (passes my tests). On ppc tail call optimization is performed if caller and callee are fastcc call is a tail call (in tail call position, call followed by ret) no variable argument lists or byval arguments option -tailcallopt is enabled Supported: * non pic tail calls on linux/darwin * module-local tail calls on linux(PIC/GOT)/darwin(PIC) * inter-module tail calls on darwin(PIC) If constraints are not met a normal call will be emitted. A test checking the argument lowering behaviour on x86-64 was added. llvm-svn: 50477
This commit is contained in:
parent
be5c91d3b3
commit
be0de34ede
|
@ -1033,6 +1033,36 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// CheckTailCallReturnConstraints - Check whether CALL node immediatly
|
||||
/// preceeds the RET node and whether the return uses the result of the node
|
||||
/// or is a void return. This function can be used by the target to determine
|
||||
/// eligiblity of tail call optimization.
|
||||
static bool CheckTailCallReturnConstraints(SDOperand Call, SDOperand Ret) {
|
||||
unsigned NumOps = Ret.getNumOperands();
|
||||
if ((NumOps == 1 &&
|
||||
(Ret.getOperand(0) == SDOperand(Call.Val,1) ||
|
||||
Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
|
||||
(NumOps > 1 &&
|
||||
Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
|
||||
Ret.getOperand(1) == SDOperand(Call.Val,0)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// GetPossiblePreceedingTailCall - Get preceeding TailCallNodeOpCode node if
|
||||
/// it exists skip possible ISD:TokenFactor.
|
||||
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain,
|
||||
unsigned TailCallNodeOpCode) {
|
||||
if (Chain.getOpcode() == TailCallNodeOpCode) {
|
||||
return Chain;
|
||||
} else if (Chain.getOpcode() == ISD::TokenFactor) {
|
||||
if (Chain.getNumOperands() &&
|
||||
Chain.getOperand(0).getOpcode() == TailCallNodeOpCode)
|
||||
return Chain.getOperand(0);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// CustomPromoteOperation - This callback is invoked for operations that are
|
||||
/// unsupported by the target, are registered to use 'custom' lowering, and
|
||||
/// whose type needs to be promoted.
|
||||
|
|
|
@ -4612,6 +4612,40 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
|
|||
}
|
||||
}
|
||||
|
||||
/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and
|
||||
/// whether object offset >= 0.
|
||||
static bool
|
||||
IsFixedFrameObjectWithPosOffset(MachineFrameInfo * MFI, SDOperand Op) {
|
||||
if (!isa<FrameIndexSDNode>(Op)) return false;
|
||||
|
||||
FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op);
|
||||
int FrameIdx = FrameIdxNode->getIndex();
|
||||
return MFI->isFixedObjectIndex(FrameIdx) &&
|
||||
MFI->getObjectOffset(FrameIdx) >= 0;
|
||||
}
|
||||
|
||||
/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
|
||||
/// possibly be overwritten when lowering the outgoing arguments in a tail
|
||||
/// call. Currently the implementation of this call is very conservative and
|
||||
/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
|
||||
/// virtual registers would be overwritten by direct lowering.
|
||||
static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op,
|
||||
MachineFrameInfo * MFI) {
|
||||
RegisterSDNode * OpReg = NULL;
|
||||
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
|
||||
(Op.getOpcode()== ISD::CopyFromReg &&
|
||||
(OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
|
||||
(OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
|
||||
(Op.getOpcode() == ISD::LOAD &&
|
||||
IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) ||
|
||||
(Op.getOpcode() == ISD::MERGE_VALUES &&
|
||||
Op.getOperand(Op.ResNo).getOpcode() == ISD::LOAD &&
|
||||
IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.ResNo).
|
||||
getOperand(1))))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
|
||||
/// DAG and fixes their tailcall attribute operand.
|
||||
static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
|
||||
|
@ -4636,19 +4670,51 @@ static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
|
|||
// eligible (no RET or the target rejects) the attribute is fixed to
|
||||
// false. The TargetLowering::IsEligibleForTailCallOptimization function
|
||||
// must correctly identify tail call optimizable calls.
|
||||
if (isMarkedTailCall &&
|
||||
(Ret==NULL ||
|
||||
!TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) {
|
||||
if (!isMarkedTailCall) continue;
|
||||
if (Ret==NULL ||
|
||||
!TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG)) {
|
||||
// Not eligible. Mark CALL node as non tail call.
|
||||
SmallVector<SDOperand, 32> Ops;
|
||||
unsigned idx=0;
|
||||
for(SDNode::op_iterator I =OpCall.Val->op_begin(),
|
||||
E=OpCall.Val->op_end(); I!=E; I++, idx++) {
|
||||
for(SDNode::op_iterator I =OpCall.Val->op_begin(),
|
||||
E = OpCall.Val->op_end(); I != E; I++, idx++) {
|
||||
if (idx!=3)
|
||||
Ops.push_back(*I);
|
||||
else
|
||||
else
|
||||
Ops.push_back(DAG.getConstant(false, TLI.getPointerTy()));
|
||||
}
|
||||
DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
|
||||
} else {
|
||||
// Look for tail call clobbered arguments. Emit a series of
|
||||
// copyto/copyfrom virtual register nodes to protect them.
|
||||
SmallVector<SDOperand, 32> Ops;
|
||||
SDOperand Chain = OpCall.getOperand(0), InFlag;
|
||||
unsigned idx=0;
|
||||
for(SDNode::op_iterator I = OpCall.Val->op_begin(),
|
||||
E = OpCall.Val->op_end(); I != E; I++, idx++) {
|
||||
SDOperand Arg = *I;
|
||||
if (idx > 4 && (idx % 2)) {
|
||||
bool isByVal = cast<ARG_FLAGSSDNode>(OpCall.getOperand(idx+1))->
|
||||
getArgFlags().isByVal();
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
if (!isByVal &&
|
||||
IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
|
||||
MVT::ValueType VT = Arg.getValueType();
|
||||
unsigned VReg = MF.getRegInfo().
|
||||
createVirtualRegister(TLI.getRegClassFor(VT));
|
||||
Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
Arg = DAG.getCopyFromReg(Chain, VReg, VT, InFlag);
|
||||
Chain = Arg.getValue(1);
|
||||
InFlag = Arg.getValue(2);
|
||||
}
|
||||
}
|
||||
Ops.push_back(Arg);
|
||||
}
|
||||
// Link in chain of CopyTo/CopyFromReg.
|
||||
Ops[0] = Chain;
|
||||
DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -143,7 +143,8 @@ int PPCCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) {
|
|||
MO.isConstantPoolIndex() || MO.isJumpTableIndex()) {
|
||||
unsigned Reloc = 0;
|
||||
if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
|
||||
MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF)
|
||||
MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
|
||||
MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
|
||||
Reloc = PPC::reloc_pcrel_bx;
|
||||
else {
|
||||
if (TM.getRelocationModel() == Reloc::PIC_) {
|
||||
|
|
|
@ -26,9 +26,11 @@
|
|||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/ParameterAttributes.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
@ -412,6 +414,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
|
||||
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
|
||||
case PPCISD::MTFSF: return "PPCISD::MTFSF";
|
||||
case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
|
||||
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1317,6 +1321,20 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
|
|||
return FPR;
|
||||
}
|
||||
|
||||
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
|
||||
/// the stack.
|
||||
static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag,
|
||||
bool isVarArg, unsigned PtrByteSize) {
|
||||
MVT::ValueType ArgVT = Arg.getValueType();
|
||||
ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags();
|
||||
unsigned ArgSize =MVT::getSizeInBits(ArgVT)/8;
|
||||
if (Flags.isByVal())
|
||||
ArgSize = Flags.getByValSize();
|
||||
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
||||
|
||||
return ArgSize;
|
||||
}
|
||||
|
||||
SDOperand
|
||||
PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
||||
SelectionDAG &DAG,
|
||||
|
@ -1338,10 +1356,15 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
bool isPPC64 = PtrVT == MVT::i64;
|
||||
bool isMachoABI = Subtarget.isMachoABI();
|
||||
bool isELF32_ABI = Subtarget.isELF32_ABI();
|
||||
// Potential tail calls could cause overwriting of argument stack slots.
|
||||
unsigned CC = MF.getFunction()->getCallingConv();
|
||||
bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
|
||||
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
||||
|
||||
unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
|
||||
|
||||
// Area that is at least reserved in caller of this function.
|
||||
unsigned MinReservedArea = ArgOffset;
|
||||
|
||||
static const unsigned GPR_32[] = { // 32-bit registers.
|
||||
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
|
||||
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
|
||||
|
@ -1426,7 +1449,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
// even GPR_idx value or to an even ArgOffset value.
|
||||
|
||||
SmallVector<SDOperand, 8> MemOps;
|
||||
|
||||
unsigned nAltivecParamsAtEnd = 0;
|
||||
for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
|
||||
SDOperand ArgVal;
|
||||
bool needsLoad = false;
|
||||
|
@ -1440,6 +1463,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
|
||||
unsigned CurArgOffset = ArgOffset;
|
||||
|
||||
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
|
||||
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
|
||||
ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
|
||||
if (isVarArg || isPPC64) {
|
||||
MinReservedArea = ((MinReservedArea+15)/16)*16;
|
||||
MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
|
||||
Op.getOperand(ArgNo+3),
|
||||
isVarArg,
|
||||
PtrByteSize);
|
||||
} else nAltivecParamsAtEnd++;
|
||||
} else
|
||||
// Calculate min reserved area.
|
||||
MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
|
||||
Op.getOperand(ArgNo+3),
|
||||
isVarArg,
|
||||
PtrByteSize);
|
||||
|
||||
// FIXME alignment for ELF may not be right
|
||||
// FIXME the codegen can be much improved in some cases.
|
||||
// We do not have to keep everything in memory.
|
||||
|
@ -1614,7 +1654,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
// that we ran out of physical registers of the appropriate type.
|
||||
if (needsLoad) {
|
||||
int FI = MFI->CreateFixedObject(ObjSize,
|
||||
CurArgOffset + (ArgSize - ObjSize));
|
||||
CurArgOffset + (ArgSize - ObjSize),
|
||||
isImmutable);
|
||||
SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
|
||||
}
|
||||
|
@ -1622,6 +1663,25 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
ArgValues.push_back(ArgVal);
|
||||
}
|
||||
|
||||
// Set the size that is at least reserved in caller of this function. Tail
|
||||
// call optimized function's reserved stack space needs to be aligned so that
|
||||
// taking the difference between two stack areas will result in an aligned
|
||||
// stack.
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
// Add the Altivec parameters at the end, if needed.
|
||||
if (nAltivecParamsAtEnd) {
|
||||
MinReservedArea = ((MinReservedArea+15)/16)*16;
|
||||
MinReservedArea += 16*nAltivecParamsAtEnd;
|
||||
}
|
||||
MinReservedArea =
|
||||
std::max(MinReservedArea,
|
||||
PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
|
||||
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
|
||||
getStackAlignment();
|
||||
unsigned AlignMask = TargetAlign-1;
|
||||
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
|
||||
FI->setMinReservedArea(MinReservedArea);
|
||||
|
||||
// If the function takes variable number of arguments, make a frame index for
|
||||
// the start of the first vararg value... for expansion of llvm.va_start.
|
||||
if (isVarArg) {
|
||||
|
@ -1720,6 +1780,131 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op,
|
|||
return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
|
||||
}
|
||||
|
||||
/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
|
||||
/// linkage area.
|
||||
static unsigned
|
||||
CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
|
||||
bool isPPC64,
|
||||
bool isMachoABI,
|
||||
bool isVarArg,
|
||||
unsigned CC,
|
||||
SDOperand Call,
|
||||
unsigned &nAltivecParamsAtEnd) {
|
||||
// Count how many bytes are to be pushed on the stack, including the linkage
|
||||
// area, and parameter passing area. We start with 24/48 bytes, which is
|
||||
// prereserved space for [SP][CR][LR][3 x unused].
|
||||
unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
|
||||
unsigned NumOps = (Call.getNumOperands() - 5) / 2;
|
||||
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
||||
|
||||
// Add up all the space actually used.
|
||||
// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
|
||||
// they all go in registers, but we must reserve stack space for them for
|
||||
// possible use by the caller. In varargs or 64-bit calls, parameters are
|
||||
// assigned stack space in order, with padding so Altivec parameters are
|
||||
// 16-byte aligned.
|
||||
nAltivecParamsAtEnd = 0;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
SDOperand Arg = Call.getOperand(5+2*i);
|
||||
SDOperand Flag = Call.getOperand(5+2*i+1);
|
||||
MVT::ValueType ArgVT = Arg.getValueType();
|
||||
// Varargs Altivec parameters are padded to a 16 byte boundary.
|
||||
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
|
||||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
|
||||
if (!isVarArg && !isPPC64) {
|
||||
// Non-varargs Altivec parameters go after all the non-Altivec
|
||||
// parameters; handle those later so we know how much padding we need.
|
||||
nAltivecParamsAtEnd++;
|
||||
continue;
|
||||
}
|
||||
// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
|
||||
NumBytes = ((NumBytes+15)/16)*16;
|
||||
}
|
||||
NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize);
|
||||
}
|
||||
|
||||
// Allow for Altivec parameters at the end, if needed.
|
||||
if (nAltivecParamsAtEnd) {
|
||||
NumBytes = ((NumBytes+15)/16)*16;
|
||||
NumBytes += 16*nAltivecParamsAtEnd;
|
||||
}
|
||||
|
||||
// The prolog code of the callee may store up to 8 GPR argument registers to
|
||||
// the stack, allowing va_start to index over them in memory if its varargs.
|
||||
// Because we cannot tell if this is needed on the caller side, we have to
|
||||
// conservatively assume that it is needed. As such, make sure we have at
|
||||
// least enough stack space for the caller to store the 8 GPRs.
|
||||
NumBytes = std::max(NumBytes,
|
||||
PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
|
||||
|
||||
// Tail call needs the stack to be aligned.
|
||||
if (CC==CallingConv::Fast && PerformTailCallOpt) {
|
||||
unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
|
||||
getStackAlignment();
|
||||
unsigned AlignMask = TargetAlign-1;
|
||||
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
|
||||
}
|
||||
|
||||
return NumBytes;
|
||||
}
|
||||
|
||||
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
|
||||
/// adjusted to accomodate the arguments for the tailcall.
|
||||
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
|
||||
unsigned ParamSize) {
|
||||
|
||||
if (!IsTailCall) return 0;
|
||||
|
||||
PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
|
||||
unsigned CallerMinReservedArea = FI->getMinReservedArea();
|
||||
int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
|
||||
// Remember only if the new adjustement is bigger.
|
||||
if (SPDiff < FI->getTailCallSPDelta())
|
||||
FI->setTailCallSPDelta(SPDiff);
|
||||
|
||||
return SPDiff;
|
||||
}
|
||||
|
||||
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
|
||||
/// following the call is a return. A function is eligible if caller/callee
|
||||
/// calling conventions match, currently only fastcc supports tail calls, and
|
||||
/// the function CALL is immediatly followed by a RET.
|
||||
bool
|
||||
PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
|
||||
SDOperand Ret,
|
||||
SelectionDAG& DAG) const {
|
||||
// Variable argument functions are not supported.
|
||||
if (!PerformTailCallOpt ||
|
||||
cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false;
|
||||
|
||||
if (CheckTailCallReturnConstraints(Call, Ret)) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned CallerCC = MF.getFunction()->getCallingConv();
|
||||
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
|
||||
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
|
||||
// Functions containing by val parameters are not supported.
|
||||
for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) {
|
||||
ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1))
|
||||
->getArgFlags();
|
||||
if (Flags.isByVal()) return false;
|
||||
}
|
||||
|
||||
SDOperand Callee = Call.getOperand(4);
|
||||
// Non PIC/GOT tail calls are supported.
|
||||
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
|
||||
return true;
|
||||
|
||||
// At the moment we can only do local tail calls (in same module, hidden
|
||||
// or protected) if we are generating PIC.
|
||||
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
||||
return G->getGlobal()->hasHiddenVisibility()
|
||||
|| G->getGlobal()->hasProtectedVisibility();
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isCallCompatibleAddress - Return the immediate to use if the specified
|
||||
/// 32-bit value is representable in the immediate field of a BxA instruction.
|
||||
static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
|
||||
|
@ -1735,6 +1920,102 @@ static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
|
|||
DAG.getTargetLoweringInfo().getPointerTy()).Val;
|
||||
}
|
||||
|
||||
struct TailCallArgumentInfo {
|
||||
SDOperand Arg;
|
||||
SDOperand FrameIdxOp;
|
||||
int FrameIdx;
|
||||
|
||||
TailCallArgumentInfo() : FrameIdx(0) {}
|
||||
};
|
||||
|
||||
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
|
||||
static void
|
||||
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
|
||||
SDOperand Chain,
|
||||
const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
|
||||
SmallVector<SDOperand, 8> &MemOpChains) {
|
||||
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
|
||||
SDOperand Arg = TailCallArgs[i].Arg;
|
||||
SDOperand FIN = TailCallArgs[i].FrameIdxOp;
|
||||
int FI = TailCallArgs[i].FrameIdx;
|
||||
// Store relative to framepointer.
|
||||
MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN,
|
||||
PseudoSourceValue::getFixedStack(),
|
||||
FI));
|
||||
}
|
||||
}
|
||||
|
||||
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
|
||||
/// the appropriate stack slot for the tail call optimized function call.
|
||||
static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
|
||||
MachineFunction &MF,
|
||||
SDOperand Chain,
|
||||
SDOperand OldRetAddr,
|
||||
SDOperand OldFP,
|
||||
int SPDiff,
|
||||
bool isPPC64,
|
||||
bool isMachoABI) {
|
||||
if (SPDiff) {
|
||||
// Calculate the new stack slot for the return address.
|
||||
int SlotSize = isPPC64 ? 8 : 4;
|
||||
int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
|
||||
isMachoABI);
|
||||
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
|
||||
NewRetAddrLoc);
|
||||
int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
|
||||
isMachoABI);
|
||||
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
|
||||
|
||||
MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32;
|
||||
SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
|
||||
Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx,
|
||||
PseudoSourceValue::getFixedStack(), NewRetAddr);
|
||||
SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
|
||||
Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx,
|
||||
PseudoSourceValue::getFixedStack(), NewFPIdx);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
|
||||
/// the position of the argument.
|
||||
static void
|
||||
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
|
||||
SDOperand Arg, int SPDiff, unsigned ArgOffset,
|
||||
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
|
||||
int Offset = ArgOffset + SPDiff;
|
||||
uint32_t OpSize = (MVT::getSizeInBits(Arg.getValueType())+7)/8;
|
||||
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32;
|
||||
SDOperand FIN = DAG.getFrameIndex(FI, VT);
|
||||
TailCallArgumentInfo Info;
|
||||
Info.Arg = Arg;
|
||||
Info.FrameIdxOp = FIN;
|
||||
Info.FrameIdx = FI;
|
||||
TailCallArguments.push_back(Info);
|
||||
}
|
||||
|
||||
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
|
||||
/// stack slot. Returns the chain as result and the loaded frame pointers in
|
||||
/// LROpOut/FPOpout. Used when tail calling.
|
||||
SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
|
||||
int SPDiff,
|
||||
SDOperand Chain,
|
||||
SDOperand &LROpOut,
|
||||
SDOperand &FPOpOut) {
|
||||
if (SPDiff) {
|
||||
// Load the LR and FP stack slot for later adjusting.
|
||||
MVT::ValueType VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
|
||||
LROpOut = getReturnAddrFrameIndex(DAG);
|
||||
LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0);
|
||||
Chain = SDOperand(LROpOut.Val, 1);
|
||||
FPOpOut = getFramePointerFrameIndex(DAG);
|
||||
FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0);
|
||||
Chain = SDOperand(FPOpOut.Val, 1);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
|
||||
/// by "Src" to address "Dst" of size "Size". Alignment information is
|
||||
/// specified by the specific parameter attribute. The copy will be passed as
|
||||
|
@ -1750,11 +2031,39 @@ CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
|
|||
NULL, 0, NULL, 0);
|
||||
}
|
||||
|
||||
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
|
||||
/// tail calls.
|
||||
static void
|
||||
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain,
|
||||
SDOperand Arg, SDOperand PtrOff, int SPDiff,
|
||||
unsigned ArgOffset, bool isPPC64, bool isTailCall,
|
||||
bool isVector, SmallVector<SDOperand, 8> &MemOpChains,
|
||||
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
|
||||
MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
if (!isTailCall) {
|
||||
if (isVector) {
|
||||
SDOperand StackPtr;
|
||||
if (isPPC64)
|
||||
StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
|
||||
else
|
||||
StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
|
||||
PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
|
||||
DAG.getConstant(ArgOffset, PtrVT));
|
||||
}
|
||||
MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
|
||||
// Calculate and remember argument location.
|
||||
} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
|
||||
TailCallArguments);
|
||||
}
|
||||
|
||||
SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
||||
const PPCSubtarget &Subtarget,
|
||||
TargetMachine &TM) {
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 &&
|
||||
CC == CallingConv::Fast && PerformTailCallOpt;
|
||||
SDOperand Callee = Op.getOperand(4);
|
||||
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
|
||||
|
||||
|
@ -1765,58 +2074,32 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
bool isPPC64 = PtrVT == MVT::i64;
|
||||
unsigned PtrByteSize = isPPC64 ? 8 : 4;
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
// args_to_use will accumulate outgoing args for the PPCISD::CALL case in
|
||||
// SelectExpr to use to put the arguments in the appropriate registers.
|
||||
std::vector<SDOperand> args_to_use;
|
||||
|
||||
// Mark this function as potentially containing a function that contains a
|
||||
// tail call. As a consequence the frame pointer will be used for dynamicalloc
|
||||
// and restoring the callers stack pointer in this functions epilog. This is
|
||||
// done because by tail calling the called function might overwrite the value
|
||||
// in this function's (MF) stack pointer stack slot 0(SP).
|
||||
if (PerformTailCallOpt && CC==CallingConv::Fast)
|
||||
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
|
||||
|
||||
unsigned nAltivecParamsAtEnd = 0;
|
||||
|
||||
// Count how many bytes are to be pushed on the stack, including the linkage
|
||||
// area, and parameter passing area. We start with 24/48 bytes, which is
|
||||
// prereserved space for [SP][CR][LR][3 x unused].
|
||||
unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
|
||||
unsigned NumBytes =
|
||||
CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
|
||||
Op, nAltivecParamsAtEnd);
|
||||
|
||||
// Add up all the space actually used.
|
||||
// In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
|
||||
// they all go in registers, but we must reserve stack space for them for
|
||||
// possible use by the caller. In varargs or 64-bit calls, parameters are
|
||||
// assigned stack space in order, with padding so Altivec parameters are
|
||||
// 16-byte aligned.
|
||||
unsigned nAltivecParamsAtEnd = 0;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
SDOperand Arg = Op.getOperand(5+2*i);
|
||||
MVT::ValueType ArgVT = Arg.getValueType();
|
||||
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
|
||||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
|
||||
if (!isVarArg && !isPPC64) {
|
||||
// Non-varargs Altivec parameters go after all the non-Altivec parameters;
|
||||
// do those last so we know how much padding we need.
|
||||
nAltivecParamsAtEnd++;
|
||||
continue;
|
||||
} else {
|
||||
// Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
|
||||
NumBytes = ((NumBytes+15)/16)*16;
|
||||
}
|
||||
}
|
||||
ISD::ArgFlagsTy Flags =
|
||||
cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags();
|
||||
unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
|
||||
if (Flags.isByVal())
|
||||
ArgSize = Flags.getByValSize();
|
||||
ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
||||
NumBytes += ArgSize;
|
||||
}
|
||||
// Allow for Altivec parameters at the end, if needed.
|
||||
if (nAltivecParamsAtEnd) {
|
||||
NumBytes = ((NumBytes+15)/16)*16;
|
||||
NumBytes += 16*nAltivecParamsAtEnd;
|
||||
}
|
||||
|
||||
// The prolog code of the callee may store up to 8 GPR argument registers to
|
||||
// the stack, allowing va_start to index over them in memory if its varargs.
|
||||
// Because we cannot tell if this is needed on the caller side, we have to
|
||||
// conservatively assume that it is needed. As such, make sure we have at
|
||||
// least enough stack space for the caller to store the 8 GPRs.
|
||||
NumBytes = std::max(NumBytes,
|
||||
PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
|
||||
// Calculate by how many bytes the stack has to be adjusted in case of tail
|
||||
// call optimization.
|
||||
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
|
||||
|
||||
// Adjust the stack pointer for the new arguments...
|
||||
// These operations are automatically eliminated by the prolog/epilog pass
|
||||
|
@ -1824,6 +2107,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
DAG.getConstant(NumBytes, PtrVT));
|
||||
SDOperand CallSeqStart = Chain;
|
||||
|
||||
// Load the return address and frame pointer so it can be move somewhere else
|
||||
// later.
|
||||
SDOperand LROp, FPOp;
|
||||
Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp);
|
||||
|
||||
// Set up a copy of the stack pointer for use loading and storing any
|
||||
// arguments that may not fit in the registers available for argument
|
||||
// passing.
|
||||
|
@ -1861,6 +2149,8 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
|
||||
|
||||
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
|
||||
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
|
||||
|
||||
SmallVector<SDOperand, 8> MemOpChains;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
bool inMem = false;
|
||||
|
@ -1959,7 +2249,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
if (GPR_idx != NumGPRs) {
|
||||
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
|
||||
} else {
|
||||
MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
|
||||
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
||||
isPPC64, isTailCall, false, MemOpChains,
|
||||
TailCallArguments);
|
||||
inMem = true;
|
||||
}
|
||||
if (inMem || isMachoABI) {
|
||||
|
@ -2007,7 +2299,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
|
||||
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
||||
isPPC64, isTailCall, false, MemOpChains,
|
||||
TailCallArguments);
|
||||
inMem = true;
|
||||
}
|
||||
if (inMem || isMachoABI) {
|
||||
|
@ -2058,6 +2352,7 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Non-varargs Altivec params generally go in registers, but have
|
||||
// stack space allocated at the end.
|
||||
if (VR_idx != NumVRs) {
|
||||
|
@ -2065,10 +2360,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
|
||||
} else if (nAltivecParamsAtEnd==0) {
|
||||
// We are emitting Altivec params in order.
|
||||
PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
|
||||
DAG.getConstant(ArgOffset, PtrVT));
|
||||
SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
||||
MemOpChains.push_back(Store);
|
||||
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
||||
isPPC64, isTailCall, true, MemOpChains,
|
||||
TailCallArguments);
|
||||
ArgOffset += 16;
|
||||
}
|
||||
break;
|
||||
|
@ -2090,10 +2384,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
|
||||
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
|
||||
if (++j > NumVRs) {
|
||||
SDOperand PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr,
|
||||
DAG.getConstant(ArgOffset, PtrVT));
|
||||
SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
||||
MemOpChains.push_back(Store);
|
||||
SDOperand PtrOff;
|
||||
// We are emitting Altivec params in order.
|
||||
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
|
||||
isPPC64, isTailCall, true, MemOpChains,
|
||||
TailCallArguments);
|
||||
ArgOffset += 16;
|
||||
}
|
||||
}
|
||||
|
@ -2120,6 +2415,37 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
|
||||
// might overwrite each other in case of tail call optimization.
|
||||
if (isTailCall) {
|
||||
SmallVector<SDOperand, 8> MemOpChains2;
|
||||
// Do not flag preceeding copytoreg stuff together with the following stuff.
|
||||
InFlag = SDOperand();
|
||||
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
|
||||
MemOpChains2);
|
||||
if (!MemOpChains2.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||
&MemOpChains2[0], MemOpChains2.size());
|
||||
|
||||
// Store the return address to the appropriate stack slot.
|
||||
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
|
||||
isPPC64, isMachoABI);
|
||||
}
|
||||
|
||||
// Emit callseq_end just before tailcall node.
|
||||
if (isTailCall) {
|
||||
SmallVector<SDOperand, 8> CallSeqOps;
|
||||
SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
CallSeqOps.push_back(Chain);
|
||||
CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes));
|
||||
CallSeqOps.push_back(DAG.getIntPtrConstant(0));
|
||||
if (InFlag.Val)
|
||||
CallSeqOps.push_back(InFlag);
|
||||
Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0],
|
||||
CallSeqOps.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
std::vector<MVT::ValueType> NodeTys;
|
||||
NodeTys.push_back(MVT::Other); // Returns a chain
|
||||
NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
|
||||
|
@ -2157,6 +2483,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
Ops.push_back(Chain);
|
||||
CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
|
||||
Callee.Val = 0;
|
||||
// Add CTR register as callee so a bctr can be emitted later.
|
||||
if (isTailCall)
|
||||
Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
|
||||
}
|
||||
|
||||
// If this is a direct call, pass the chain and the callee.
|
||||
|
@ -2164,29 +2493,48 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
|
|||
Ops.push_back(Chain);
|
||||
Ops.push_back(Callee);
|
||||
}
|
||||
|
||||
// If this is a tail call add stack pointer delta.
|
||||
if (isTailCall)
|
||||
Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
|
||||
|
||||
// Add argument registers to the end of the list so that they are known live
|
||||
// into the call.
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
|
||||
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
||||
RegsToPass[i].second.getValueType()));
|
||||
|
||||
|
||||
// When performing tail call optimization the callee pops its arguments off
|
||||
// the stack. Account for this here so these bytes can be pushed back on in
|
||||
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
|
||||
int BytesCalleePops =
|
||||
(CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
|
||||
|
||||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
|
||||
// Emit tail call.
|
||||
if (isTailCall) {
|
||||
assert(InFlag.Val &&
|
||||
"Flag must be set. Depend on flag being set in LowerRET");
|
||||
Chain = DAG.getNode(PPCISD::TAILCALL,
|
||||
Op.Val->getVTList(), &Ops[0], Ops.size());
|
||||
return SDOperand(Chain.Val, Op.ResNo);
|
||||
}
|
||||
|
||||
Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
Chain = DAG.getCALLSEQ_END(Chain,
|
||||
DAG.getConstant(NumBytes, PtrVT),
|
||||
DAG.getConstant(0, PtrVT),
|
||||
DAG.getConstant(BytesCalleePops, PtrVT),
|
||||
InFlag);
|
||||
if (Op.Val->getValueType(0) != MVT::Other)
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
SmallVector<SDOperand, 16> ResultVals;
|
||||
SmallVector<CCValAssign, 16> RVLocs;
|
||||
unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
|
||||
CCState CCInfo(CC, isVarArg, TM, RVLocs);
|
||||
unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
|
||||
CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
|
||||
CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC);
|
||||
|
||||
// Copy all of the result registers out of their specified physreg.
|
||||
|
@ -2226,6 +2574,36 @@ SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
|
||||
Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
|
||||
if (Chain.getOpcode() == PPCISD::TAILCALL) {
|
||||
SDOperand TailCall = Chain;
|
||||
SDOperand TargetAddress = TailCall.getOperand(1);
|
||||
SDOperand StackAdjustment = TailCall.getOperand(2);
|
||||
|
||||
assert(((TargetAddress.getOpcode() == ISD::Register &&
|
||||
cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
|
||||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
|
||||
isa<ConstantSDNode>(TargetAddress)) &&
|
||||
"Expecting an global address, external symbol, absolute value or register");
|
||||
|
||||
assert(StackAdjustment.getOpcode() == ISD::Constant &&
|
||||
"Expecting a const value");
|
||||
|
||||
SmallVector<SDOperand,8> Operands;
|
||||
Operands.push_back(Chain.getOperand(0));
|
||||
Operands.push_back(TargetAddress);
|
||||
Operands.push_back(StackAdjustment);
|
||||
// Copy registers used by the call. Last operand is a flag so it is not
|
||||
// copied.
|
||||
for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
|
||||
Operands.push_back(Chain.getOperand(i));
|
||||
}
|
||||
return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0],
|
||||
Operands.size());
|
||||
}
|
||||
|
||||
SDOperand Flag;
|
||||
|
||||
// Copy the result values into the output registers.
|
||||
|
@ -2268,18 +2646,44 @@ SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG,
|
|||
return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0);
|
||||
}
|
||||
|
||||
SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
|
||||
SelectionDAG &DAG,
|
||||
const PPCSubtarget &Subtarget) {
|
||||
|
||||
|
||||
SDOperand
|
||||
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
bool IsPPC64 = Subtarget.isPPC64();
|
||||
bool isMachoABI = Subtarget.isMachoABI();
|
||||
bool IsPPC64 = PPCSubTarget.isPPC64();
|
||||
bool isMachoABI = PPCSubTarget.isMachoABI();
|
||||
MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
|
||||
// Get current frame pointer save index. The users of this index will be
|
||||
// primarily DYNALLOC instructions.
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
int RASI = FI->getReturnAddrSaveIndex();
|
||||
|
||||
// If the frame pointer save index hasn't been defined yet.
|
||||
if (!RASI) {
|
||||
// Find out what the fix offset of the frame pointer save area.
|
||||
int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
|
||||
// Allocate the frame index for frame pointer save area.
|
||||
RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
|
||||
// Save the result.
|
||||
FI->setReturnAddrSaveIndex(RASI);
|
||||
}
|
||||
return DAG.getFrameIndex(RASI, PtrVT);
|
||||
}
|
||||
|
||||
SDOperand
|
||||
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
bool IsPPC64 = PPCSubTarget.isPPC64();
|
||||
bool isMachoABI = PPCSubTarget.isMachoABI();
|
||||
MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
|
||||
// Get current frame pointer save index. The users of this index will be
|
||||
// primarily DYNALLOC instructions.
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
int FPSI = FI->getFramePointerSaveIndex();
|
||||
|
||||
|
||||
// If the frame pointer save index hasn't been defined yet.
|
||||
if (!FPSI) {
|
||||
// Find out what the fix offset of the frame pointer save area.
|
||||
|
@ -2290,7 +2694,12 @@ SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
|
|||
// Save the result.
|
||||
FI->setFramePointerSaveIndex(FPSI);
|
||||
}
|
||||
return DAG.getFrameIndex(FPSI, PtrVT);
|
||||
}
|
||||
|
||||
SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
|
||||
SelectionDAG &DAG,
|
||||
const PPCSubtarget &Subtarget) {
|
||||
// Get the inputs.
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
SDOperand Size = Op.getOperand(1);
|
||||
|
@ -2301,7 +2710,7 @@ SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
|
|||
SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT,
|
||||
DAG.getConstant(0, PtrVT), Size);
|
||||
// Construct a node for the frame pointer save index.
|
||||
SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT);
|
||||
SDOperand FPSIdx = getFramePointerFrameIndex(DAG);
|
||||
// Build a DYNALLOC node.
|
||||
SDOperand Ops[3] = { Chain, NegSize, FPSIdx };
|
||||
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
|
||||
|
@ -4099,25 +4508,13 @@ SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
|
||||
int RAIdx = FuncInfo->getReturnAddrSaveIndex();
|
||||
if (RAIdx == 0) {
|
||||
bool isPPC64 = PPCSubTarget.isPPC64();
|
||||
int Offset =
|
||||
PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI());
|
||||
|
||||
// Set up a frame object for the return address.
|
||||
RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset);
|
||||
|
||||
// Remember it for next time.
|
||||
FuncInfo->setReturnAddrSaveIndex(RAIdx);
|
||||
|
||||
// Make sure the function really does not optimize away the store of the RA
|
||||
// to the stack.
|
||||
FuncInfo->setLRStoreRequired();
|
||||
}
|
||||
|
||||
// Just load the return address off the stack.
|
||||
SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy());
|
||||
SDOperand RetAddrFI = getReturnAddrFrameIndex(DAG);
|
||||
|
||||
// Make sure the function really does not optimize away the store of the RA
|
||||
// to the stack.
|
||||
FuncInfo->setLRStoreRequired();
|
||||
return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -162,7 +162,16 @@ namespace llvm {
|
|||
|
||||
/// CMP_UNRESERVE = Test for equality and "unreserve" if not true. This
|
||||
/// is used to implement atomic operations.
|
||||
CMP_UNRESERVE
|
||||
CMP_UNRESERVE,
|
||||
|
||||
/// TAILCALL - Indicates a tail call should be taken.
|
||||
TAILCALL,
|
||||
/// TC_RETURN - A tail call return.
|
||||
/// operand #0 chain
|
||||
/// operand #1 callee (register or absolute)
|
||||
/// operand #2 stack adjustment
|
||||
/// operand #3 optional in flag
|
||||
TC_RETURN
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -308,11 +317,27 @@ namespace llvm {
|
|||
/// the offset of the target addressing mode.
|
||||
virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
|
||||
|
||||
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||
/// for tail call optimization. Target which want to do tail call
|
||||
/// optimization should implement this function.
|
||||
virtual bool IsEligibleForTailCallOptimization(SDOperand Call,
|
||||
SDOperand Ret,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
private:
|
||||
/// PPCAtomicLabelIndex - Keep track the number of PPC atomic labels.
|
||||
///
|
||||
unsigned PPCAtomicLabelIndex;
|
||||
|
||||
SDOperand getFramePointerFrameIndex(SelectionDAG & DAG) const;
|
||||
SDOperand getReturnAddrFrameIndex(SelectionDAG & DAG) const;
|
||||
|
||||
SDOperand EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
|
||||
int SPDiff,
|
||||
SDOperand Chain,
|
||||
SDOperand &LROpOut,
|
||||
SDOperand &FPOpOut);
|
||||
|
||||
SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG);
|
||||
|
|
|
@ -116,7 +116,6 @@ def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
|
|||
def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
|
||||
(BL8_ELF texternalsym:$dst)>;
|
||||
|
||||
|
||||
// Atomic operations.
|
||||
def LDARX : Pseudo<(outs G8RC:$rD), (ins memrr:$ptr, i32imm:$label),
|
||||
"\nLa${label}_entry:\n\tldarx $rD, $ptr",
|
||||
|
@ -135,6 +134,53 @@ def CMP_UNRESdi : Pseudo<(outs), (ins G8RC:$rA, s16imm64:$imm, i32imm:$label),
|
|||
[(PPCcmp_unres G8RC:$rA, immSExt16:$imm, imm:$label)]>;
|
||||
}
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNdi8 :Pseudo< (outs),
|
||||
(ins calltarget:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNd8 $dst $offset",
|
||||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNa8 $func $offset",
|
||||
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNr8 $dst $offset",
|
||||
[]>;
|
||||
|
||||
|
||||
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
|
||||
isIndirectBranch = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
|
||||
|
||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
|
||||
isBarrier = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
|
||||
"b $dst", BrB,
|
||||
[]>;
|
||||
|
||||
|
||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
|
||||
isBarrier = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
|
||||
"ba $dst", BrB,
|
||||
[]>;
|
||||
|
||||
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
|
||||
(TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
|
||||
|
||||
def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
|
||||
(TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
|
||||
|
||||
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
|
||||
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 64-bit SPR manipulation instrs.
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ def SDT_PPCstbrx : SDTypeProfile<0, 4, [
|
|||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
|
||||
]>;
|
||||
|
||||
|
||||
def SDT_PPClarx : SDTypeProfile<1, 2, [
|
||||
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
|
@ -52,6 +53,10 @@ def SDT_PPCcmp_unres : SDTypeProfile<0, 3, [
|
|||
SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>
|
||||
]>;
|
||||
|
||||
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
|
||||
SDTCisPtrTy<0>, SDTCisVT<1, i32>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific DAG Nodes.
|
||||
//
|
||||
|
@ -121,6 +126,12 @@ def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone,
|
|||
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInFlag]>;
|
||||
|
||||
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
|
||||
[SDNPHasChain, SDNPOptInFlag]>;
|
||||
|
||||
def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
|
||||
|
||||
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
|
||||
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
|
||||
|
||||
|
@ -453,6 +464,46 @@ let isCall = 1, PPC970_Unit = 7,
|
|||
[(PPCbctrl_ELF)]>, Requires<[In32BitMode]>;
|
||||
}
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNdi :Pseudo< (outs),
|
||||
(ins calltarget:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNd $dst $offset",
|
||||
[]>;
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNa $func $offset",
|
||||
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURNr $dst $offset",
|
||||
[]>;
|
||||
|
||||
|
||||
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
|
||||
isIndirectBranch = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
|
||||
|
||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
|
||||
isBarrier = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
|
||||
"b $dst", BrB,
|
||||
[]>;
|
||||
|
||||
|
||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
|
||||
isBarrier = 1, isCall = 1, isReturn = 1 in
|
||||
def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
|
||||
"ba $dst", BrB,
|
||||
[]>;
|
||||
|
||||
|
||||
// DCB* instructions.
|
||||
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
|
||||
"dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
|
||||
|
@ -1211,6 +1262,18 @@ def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
|
|||
def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
|
||||
(BL_ELF texternalsym:$dst)>;
|
||||
|
||||
|
||||
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
|
||||
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
|
||||
|
||||
def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
|
||||
(TCRETURNdi texternalsym:$dst, imm:$imm)>;
|
||||
|
||||
def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
|
||||
(TCRETURNri CTRRC:$dst, imm:$imm)>;
|
||||
|
||||
|
||||
|
||||
// Hi and Lo for Darwin Global Addresses.
|
||||
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
|
||||
def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
|
||||
|
|
|
@ -43,19 +43,42 @@ private:
|
|||
/// requires that the code generator produce a store of LR to the stack on
|
||||
/// entry, even though LR may otherwise apparently not be used.
|
||||
bool LRStoreRequired;
|
||||
|
||||
/// MinReservedArea - This is the frame size that is at least reserved in a
|
||||
/// potential caller (parameter+linkage area).
|
||||
unsigned MinReservedArea;
|
||||
|
||||
/// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
|
||||
/// amount the stack pointer is adjusted to make the frame bigger for tail
|
||||
/// calls. Used for creating an area before the register spill area.
|
||||
int TailCallSPDelta;
|
||||
|
||||
/// HasFastCall - Does this function contain a fast call. Used to determine
|
||||
/// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
|
||||
bool HasFastCall;
|
||||
|
||||
public:
|
||||
PPCFunctionInfo(MachineFunction &MF)
|
||||
: FramePointerSaveIndex(0),
|
||||
ReturnAddrSaveIndex(0),
|
||||
SpillsCR(false),
|
||||
LRStoreRequired(false) {}
|
||||
LRStoreRequired(false),
|
||||
MinReservedArea(0),
|
||||
TailCallSPDelta(0),
|
||||
HasFastCall(false) {}
|
||||
|
||||
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
|
||||
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
|
||||
|
||||
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
|
||||
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
|
||||
|
||||
|
||||
unsigned getMinReservedArea() const { return MinReservedArea; }
|
||||
void setMinReservedArea(unsigned size) { MinReservedArea = size; }
|
||||
|
||||
int getTailCallSPDelta() const { return TailCallSPDelta; }
|
||||
void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
|
||||
|
||||
/// UsesLR - This is set when the prolog/epilog inserter does its initial scan
|
||||
/// of the function, it is true if the LR/LR8 register is ever explicitly
|
||||
/// accessed/clobbered in the machine function (e.g. by calls and movpctolr,
|
||||
|
@ -68,6 +91,9 @@ public:
|
|||
|
||||
void setLRStoreRequired() { LRStoreRequired = true; }
|
||||
bool isLRStoreRequired() const { return LRStoreRequired; }
|
||||
|
||||
void setHasFastCall() { HasFastCall = true; }
|
||||
bool hasFastCall() const { return HasFastCall;}
|
||||
};
|
||||
|
||||
} // end of namespace llvm
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "PPCRegisterInfo.h"
|
||||
#include "PPCFrameInfo.h"
|
||||
#include "PPCSubtarget.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Type.h"
|
||||
|
@ -332,7 +333,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
|
|||
//
|
||||
static bool needsFP(const MachineFunction &MF) {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
return NoFramePointerElim || MFI->hasVarSizedObjects();
|
||||
return NoFramePointerElim || MFI->hasVarSizedObjects() ||
|
||||
(PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
|
||||
}
|
||||
|
||||
static bool spillsCR(const MachineFunction &MF) {
|
||||
|
@ -399,9 +401,42 @@ static bool MustSaveLR(const MachineFunction &MF) {
|
|||
MF.getFrameInfo()->hasCalls();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void PPCRegisterInfo::
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
|
||||
// Add (actually substract) back the amount the callee popped on return.
|
||||
if (int CalleeAmt = I->getOperand(1).getImm()) {
|
||||
MachineInstr * New = NULL;
|
||||
bool is64Bit = Subtarget.isPPC64();
|
||||
CalleeAmt *= -1;
|
||||
unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
|
||||
unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
|
||||
unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
|
||||
unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
|
||||
unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
|
||||
unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
|
||||
|
||||
if (isInt16(CalleeAmt)) {
|
||||
New = BuildMI(TII.get(ADDIInstr), StackReg).addReg(StackReg).
|
||||
addImm(CalleeAmt);
|
||||
MBB.insert(I, New);
|
||||
} else {
|
||||
MachineBasicBlock::iterator MBBI = I;
|
||||
BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg)
|
||||
.addImm(CalleeAmt >> 16);
|
||||
BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg)
|
||||
.addReg(TmpReg, false, false, true)
|
||||
.addImm(CalleeAmt & 0xFFFF);
|
||||
BuildMI(MBB, MBBI, TII.get(ADDInstr))
|
||||
.addReg(StackReg)
|
||||
.addReg(StackReg)
|
||||
.addReg(TmpReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
|
||||
MBB.erase(I);
|
||||
}
|
||||
|
@ -924,6 +959,13 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
|||
FI->setFramePointerSaveIndex(FPSI);
|
||||
}
|
||||
|
||||
// Reserve stack space to move the linkage area to in case of a tail call.
|
||||
int TCSPDelta = 0;
|
||||
if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
|
||||
int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
|
||||
MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
|
||||
AddFPOffsetAmount + TCSPDelta);
|
||||
}
|
||||
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
|
||||
// a large stack, which will require scavenging a register to materialize a
|
||||
// large offset.
|
||||
|
@ -1160,7 +1202,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator MBBI = prior(MBB.end());
|
||||
assert(MBBI->getOpcode() == PPC::BLR &&
|
||||
unsigned RetOpcode = MBBI->getOpcode();
|
||||
|
||||
assert( (RetOpcode == PPC::BLR ||
|
||||
RetOpcode == PPC::TCRETURNri ||
|
||||
RetOpcode == PPC::TCRETURNdi ||
|
||||
RetOpcode == PPC::TCRETURNai ||
|
||||
RetOpcode == PPC::TCRETURNri8 ||
|
||||
RetOpcode == PPC::TCRETURNdi8 ||
|
||||
RetOpcode == PPC::TCRETURNai8) &&
|
||||
"Can only insert epilog into returning blocks");
|
||||
|
||||
// Get alignment info so we know how to restore r1
|
||||
|
@ -1169,7 +1219,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
unsigned MaxAlign = MFI->getMaxAlignment();
|
||||
|
||||
// Get the number of bytes allocated from the FrameInfo.
|
||||
unsigned FrameSize = MFI->getStackSize();
|
||||
int FrameSize = MFI->getStackSize();
|
||||
|
||||
// Get processor type.
|
||||
bool IsPPC64 = Subtarget.isPPC64();
|
||||
|
@ -1183,19 +1233,75 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
|
||||
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
|
||||
|
||||
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
|
||||
RetOpcode == PPC::TCRETURNdi ||
|
||||
RetOpcode == PPC::TCRETURNai ||
|
||||
RetOpcode == PPC::TCRETURNri8 ||
|
||||
RetOpcode == PPC::TCRETURNdi8 ||
|
||||
RetOpcode == PPC::TCRETURNai8;
|
||||
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
|
||||
if (UsesTCRet) {
|
||||
int MaxTCRetDelta = FI->getTailCallSPDelta();
|
||||
MachineOperand &StackAdjust = MBBI->getOperand(1);
|
||||
assert( StackAdjust.isImmediate() && "Expecting immediate value.");
|
||||
// Adjust stack pointer.
|
||||
int StackAdj = StackAdjust.getImm();
|
||||
int Delta = StackAdj - MaxTCRetDelta;
|
||||
assert((Delta >= 0) && "Delta must be positive");
|
||||
if (MaxTCRetDelta>0)
|
||||
FrameSize += (StackAdj +Delta);
|
||||
else
|
||||
FrameSize += StackAdj;
|
||||
}
|
||||
|
||||
if (FrameSize) {
|
||||
// The loaded (or persistent) stack pointer value is offset by the 'stwu'
|
||||
// on entry to the function. Add this offset back now.
|
||||
if (!Subtarget.isPPC64()) {
|
||||
if (isInt16(FrameSize) && (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
|
||||
!MFI->hasVarSizedObjects()) {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
|
||||
.addReg(PPC::R1).addImm(FrameSize);
|
||||
if (!IsPPC64) {
|
||||
// If this function contained a fastcc call and PerformTailCallOpt is
|
||||
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
|
||||
// call which invalidates the stack pointer value in SP(0). So we use the
|
||||
// value of R31 in this case.
|
||||
if (FI->hasFastCall() && isInt16(FrameSize)) {
|
||||
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
|
||||
.addReg(PPC::R31).addImm(FrameSize);
|
||||
} else if(FI->hasFastCall()) {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::LIS), PPC::R0)
|
||||
.addImm(FrameSize >> 16);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ORI), PPC::R0)
|
||||
.addReg(PPC::R0, false, false, true)
|
||||
.addImm(FrameSize & 0xFFFF);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADD4))
|
||||
.addReg(PPC::R1)
|
||||
.addReg(PPC::R31)
|
||||
.addReg(PPC::R0);
|
||||
} else if (isInt16(FrameSize) &&
|
||||
(!ALIGN_STACK || TargetAlign >= MaxAlign) &&
|
||||
!MFI->hasVarSizedObjects()) {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1)
|
||||
.addReg(PPC::R1).addImm(FrameSize);
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::LWZ),PPC::R1).addImm(0).addReg(PPC::R1);
|
||||
}
|
||||
} else {
|
||||
if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
|
||||
if (FI->hasFastCall() && isInt16(FrameSize)) {
|
||||
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1)
|
||||
.addReg(PPC::X31).addImm(FrameSize);
|
||||
} else if(FI->hasFastCall()) {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::LIS8), PPC::X0)
|
||||
.addImm(FrameSize >> 16);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ORI8), PPC::X0)
|
||||
.addReg(PPC::X0, false, false, true)
|
||||
.addImm(FrameSize & 0xFFFF);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADD8))
|
||||
.addReg(PPC::X1)
|
||||
.addReg(PPC::X31)
|
||||
.addReg(PPC::X0);
|
||||
} else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
|
||||
!MFI->hasVarSizedObjects()) {
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1)
|
||||
.addReg(PPC::X1).addImm(FrameSize);
|
||||
|
@ -1228,6 +1334,64 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
if (UsesLR)
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::MTLR)).addReg(PPC::R0);
|
||||
}
|
||||
|
||||
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
|
||||
// call optimization
|
||||
if (PerformTailCallOpt && RetOpcode == PPC::BLR &&
|
||||
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
|
||||
unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
|
||||
unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
|
||||
unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
|
||||
unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
|
||||
unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
|
||||
unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
|
||||
unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
|
||||
|
||||
if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
|
||||
BuildMI(MBB, MBBI, TII.get(ADDIInstr), StackReg)
|
||||
.addReg(StackReg).addImm(CallerAllocatedAmt);
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg)
|
||||
.addImm(CallerAllocatedAmt >> 16);
|
||||
BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg)
|
||||
.addReg(TmpReg, false, false, true)
|
||||
.addImm(CallerAllocatedAmt & 0xFFFF);
|
||||
BuildMI(MBB, MBBI, TII.get(ADDInstr))
|
||||
.addReg(StackReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(TmpReg);
|
||||
}
|
||||
} else if (RetOpcode == PPC::TCRETURNdi) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILB)).
|
||||
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
|
||||
} else if (RetOpcode == PPC::TCRETURNri) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
assert(JumpTarget.isReg() && "Expecting register operand.");
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR));
|
||||
} else if (RetOpcode == PPC::TCRETURNai) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
|
||||
} else if (RetOpcode == PPC::TCRETURNdi8) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILB8)).
|
||||
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
|
||||
} else if (RetOpcode == PPC::TCRETURNri8) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
assert(JumpTarget.isReg() && "Expecting register operand.");
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR8));
|
||||
} else if (RetOpcode == PPC::TCRETURNai8) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
BuildMI(MBB, MBBI, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned PPCRegisterInfo::getRARegister() const {
|
||||
|
|
|
@ -346,4 +346,6 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
|
|||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
|
||||
def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
|
||||
|
|
|
@ -788,19 +788,6 @@ SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
|
|||
|
||||
#include "X86GenCallingConv.inc"
|
||||
|
||||
/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
|
||||
/// exists skip possible ISD:TokenFactor.
|
||||
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
|
||||
if (Chain.getOpcode() == X86ISD::TAILCALL) {
|
||||
return Chain;
|
||||
} else if (Chain.getOpcode() == ISD::TokenFactor) {
|
||||
if (Chain.getNumOperands() &&
|
||||
Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
|
||||
return Chain.getOperand(0);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// LowerRET - Lower an ISD::RET node.
|
||||
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
||||
assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
|
||||
|
@ -821,7 +808,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
|||
SDOperand Chain = Op.getOperand(0);
|
||||
|
||||
// Handle tail call return.
|
||||
Chain = GetPossiblePreceedingTailCall(Chain);
|
||||
Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
|
||||
if (Chain.getOpcode() == X86ISD::TAILCALL) {
|
||||
SDOperand TailCall = Chain;
|
||||
SDOperand TargetAddress = TailCall.getOperand(1);
|
||||
|
@ -1057,27 +1044,6 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
|
|||
return None;
|
||||
}
|
||||
|
||||
/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
|
||||
/// possibly be overwritten when lowering the outgoing arguments in a tail
|
||||
/// call. Currently the implementation of this call is very conservative and
|
||||
/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
|
||||
/// virtual registers would be overwritten by direct lowering.
|
||||
static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op,
|
||||
MachineFrameInfo * MFI) {
|
||||
RegisterSDNode * OpReg = NULL;
|
||||
FrameIndexSDNode * FrameIdxNode = NULL;
|
||||
int FrameIdx = 0;
|
||||
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
|
||||
(Op.getOpcode()== ISD::CopyFromReg &&
|
||||
(OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
|
||||
(OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
|
||||
(Op.getOpcode() == ISD::LOAD &&
|
||||
(FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op.getOperand(1))) &&
|
||||
(MFI->isFixedObjectIndex((FrameIdx = FrameIdxNode->getIndex()))) &&
|
||||
(MFI->getObjectOffset(FrameIdx) >= 0)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
|
||||
/// in a register before calling.
|
||||
|
@ -1087,7 +1053,6 @@ bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
|
|||
Subtarget->isPICStyleGOT();
|
||||
}
|
||||
|
||||
|
||||
/// CallRequiresFnAddressInReg - Check whether the call requires the function
|
||||
/// address to be loaded in a register.
|
||||
bool
|
||||
|
@ -1097,33 +1062,6 @@ X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
|
|||
Subtarget->isPICStyleGOT();
|
||||
}
|
||||
|
||||
/// CopyTailCallClobberedArgumentsToVRegs - Create virtual registers for all
|
||||
/// arguments to force loading and guarantee that arguments sourcing from
|
||||
/// incomming parameters are not overwriting each other.
|
||||
static SDOperand
|
||||
CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain,
|
||||
SmallVector<std::pair<unsigned, SDOperand>, 8> &TailCallClobberedVRegs,
|
||||
SelectionDAG &DAG,
|
||||
MachineFunction &MF,
|
||||
const TargetLowering * TL) {
|
||||
|
||||
SDOperand InFlag;
|
||||
for (unsigned i = 0, e = TailCallClobberedVRegs.size(); i != e; i++) {
|
||||
SDOperand Arg = TailCallClobberedVRegs[i].second;
|
||||
unsigned Idx = TailCallClobberedVRegs[i].first;
|
||||
unsigned VReg =
|
||||
MF.getRegInfo().
|
||||
createVirtualRegister(TL->getRegClassFor(Arg.getValueType()));
|
||||
Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
Arg = DAG.getCopyFromReg(Chain, VReg, Arg.getValueType(), InFlag);
|
||||
TailCallClobberedVRegs[i] = std::make_pair(Idx, Arg);
|
||||
Chain = Arg.getValue(1);
|
||||
InFlag = Arg.getValue(2);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
|
||||
/// by "Src" to address "Dst" with size and alignment information specified by
|
||||
/// the specific parameter attribute. The copy will be passed as a byval
|
||||
|
@ -1133,8 +1071,7 @@ CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
|
|||
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
|
||||
SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
|
||||
return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
|
||||
/*AlwaysInline=*/true,
|
||||
NULL, 0, NULL, 0);
|
||||
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
|
||||
|
@ -1463,65 +1400,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
|
|||
return Chain;
|
||||
}
|
||||
|
||||
/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target
|
||||
/// which might be overwritten by later byval tail call lowering to a virtual
|
||||
/// register.
|
||||
bool
|
||||
X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
|
||||
SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
|
||||
SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
|
||||
std::pair<unsigned, SDOperand> &RegToPass,
|
||||
SDOperand &OutChain,
|
||||
SDOperand &OutFlag,
|
||||
MachineFunction &MF,
|
||||
SelectionDAG & DAG) {
|
||||
if (!containsByValArg) return false;
|
||||
|
||||
std::pair<unsigned, unsigned> ArgRegVReg;
|
||||
MVT::ValueType VT = RegToPass.second.getValueType();
|
||||
|
||||
ArgRegVReg.first = RegToPass.first;
|
||||
ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT));
|
||||
|
||||
// Copy Argument to virtual register.
|
||||
OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second,
|
||||
RegToPass.second, OutFlag);
|
||||
OutFlag = OutChain.getValue(1);
|
||||
// Remember virtual register and type.
|
||||
TailCallByValClobberedVRegs.push_back(ArgRegVReg);
|
||||
TailCallByValClobberedVRegTypes.push_back(VT);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// RestoreTailCallByValClobberedReg - Restore registers which were saved to
|
||||
/// virtual registers to prevent tail call byval lowering from overwriting
|
||||
/// parameter registers.
|
||||
static SDOperand
|
||||
RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain,
|
||||
SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
|
||||
SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes) {
|
||||
if (TailCallByValClobberedVRegs.size()==0) return Chain;
|
||||
|
||||
SmallVector<SDOperand, 8> RegOpChains;
|
||||
for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) {
|
||||
SDOperand InFlag;
|
||||
unsigned DestReg = TailCallByValClobberedVRegs[i].first;
|
||||
unsigned VirtReg = TailCallByValClobberedVRegs[i].second;
|
||||
MVT::ValueType VT = TailCallByValClobberedVRegTypes[i];
|
||||
SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag);
|
||||
Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag);
|
||||
RegOpChains.push_back(Chain);
|
||||
}
|
||||
if (!RegOpChains.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||
&RegOpChains[0], RegOpChains.size());
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo * MFI = MF.getFrameInfo();
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
|
@ -1572,17 +1452,11 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
FPDiff);
|
||||
|
||||
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
||||
SmallVector<std::pair<unsigned, SDOperand>, 8> TailCallClobberedVRegs;
|
||||
|
||||
SmallVector<SDOperand, 8> MemOpChains;
|
||||
|
||||
SDOperand StackPtr;
|
||||
bool containsTailCallByValArg = false;
|
||||
SmallVector<std::pair<unsigned, unsigned>, 8> TailCallByValClobberedVRegs;
|
||||
SmallVector<MVT::ValueType, 8> TailCallByValClobberedVRegTypes;
|
||||
|
||||
// Walk the register/memloc assignments, inserting copies/loads. For tail
|
||||
// calls, remember all arguments for later special lowering.
|
||||
// Walk the register/memloc assignments, inserting copies/loads. In the case
|
||||
// of tail call optimization arguments are handle later.
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
|
@ -1638,10 +1512,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
||||
Arg));
|
||||
// Remember fact that this call contains byval arguments.
|
||||
containsTailCallByValArg |= IsTailCall && isByVal;
|
||||
} else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
|
||||
TailCallClobberedVRegs.push_back(std::make_pair(i,Arg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1653,21 +1523,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||
// and flag operands which copy the outgoing args into registers.
|
||||
SDOperand InFlag;
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
// Tail call byval lowering might overwrite argument registers so arguments
|
||||
// passed to be copied to a virtual register for
|
||||
// later processing.
|
||||
if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg,
|
||||
TailCallByValClobberedVRegs,
|
||||
TailCallByValClobberedVRegTypes,
|
||||
RegsToPass[i], Chain, InFlag, MF,
|
||||
DAG))
|
||||
continue;
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
// Tail call byval lowering might overwrite argument registers so in case of
|
||||
// tail call optimization the copies to registers are lowered later.
|
||||
if (!IsTailCall)
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// ELF / PIC requires GOT in the EBX register before function calls via PLT
|
||||
// GOT pointer.
|
||||
|
@ -1723,10 +1586,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
int FI = 0;
|
||||
// Do not flag preceeding copytoreg stuff together with the following stuff.
|
||||
InFlag = SDOperand();
|
||||
|
||||
Chain = CopyTailCallClobberedArgumentsToVRegs(Chain, TailCallClobberedVRegs,
|
||||
DAG, MF, this);
|
||||
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
if (!VA.isRegLoc()) {
|
||||
|
@ -1741,17 +1600,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
FIN = DAG.getFrameIndex(FI, getPointerTy());
|
||||
|
||||
// Find virtual register for this argument.
|
||||
bool Found=false;
|
||||
for (unsigned idx=0, e= TailCallClobberedVRegs.size(); idx < e; idx++)
|
||||
if (TailCallClobberedVRegs[idx].first==i) {
|
||||
Arg = TailCallClobberedVRegs[idx].second;
|
||||
Found=true;
|
||||
break;
|
||||
}
|
||||
assert(IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)==false ||
|
||||
(Found==true && "No corresponding Argument was found"));
|
||||
|
||||
if (Flags.isByVal()) {
|
||||
// Copy relative to framepointer.
|
||||
SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
|
||||
|
@ -1774,10 +1622,13 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||
&MemOpChains2[0], MemOpChains2.size());
|
||||
|
||||
// Restore byval lowering clobbered registers.
|
||||
Chain = RestoreTailCallByValClobberedRegs(DAG, Chain,
|
||||
TailCallByValClobberedVRegs,
|
||||
TailCallByValClobberedVRegTypes);
|
||||
// Copy arguments to their registers.
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
InFlag =SDOperand();
|
||||
|
||||
// Store the return address to the appropriate stack slot.
|
||||
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
|
||||
|
@ -1955,15 +1806,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
|
|||
if (!PerformTailCallOpt)
|
||||
return false;
|
||||
|
||||
// Check whether CALL node immediatly preceeds the RET node and whether the
|
||||
// return uses the result of the node or is a void return.
|
||||
unsigned NumOps = Ret.getNumOperands();
|
||||
if ((NumOps == 1 &&
|
||||
(Ret.getOperand(0) == SDOperand(Call.Val,1) ||
|
||||
Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
|
||||
(NumOps > 1 &&
|
||||
Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
|
||||
Ret.getOperand(1) == SDOperand(Call.Val,0))) {
|
||||
if (CheckTailCallReturnConstraints(Call, Ret)) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned CallerCC = MF.getFunction()->getCallingConv();
|
||||
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
|
||||
|
|
|
@ -475,15 +475,6 @@ namespace llvm {
|
|||
SDOperand EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDOperand &OutRetAddr,
|
||||
SDOperand Chain, bool IsTailCall, bool Is64Bit,
|
||||
int FPDiff);
|
||||
|
||||
bool CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
|
||||
SmallVector< std::pair<unsigned, unsigned>,8> &TailCallByValClobberedVRegs,
|
||||
SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
|
||||
std::pair<unsigned, SDOperand> &RegToPass,
|
||||
SDOperand &OutChain,
|
||||
SDOperand &OutFlag,
|
||||
MachineFunction &MF,
|
||||
SelectionDAG & DAG);
|
||||
|
||||
CCAssignFn *CCAssignFnForNode(SDOperand Op) const;
|
||||
NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDOperand Op);
|
||||
|
|
|
@ -391,6 +391,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
|
|||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
|
||||
def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
|
||||
[]>;
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
; RUN: llvm-as < %s | llc -march=ppc64 -tailcallopt | grep TC_RETURNd8
|
||||
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
; RUN: llvm-as < %s | llc -march=ppc32 -tailcallopt | grep TC_RETURN
|
||||
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
; RUN: llvm-as < %s | llc -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
|
||||
|
||||
|
||||
|
||||
define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
Loading…
Reference in New Issue