forked from OSchip/llvm-project
Added tail call optimization to the x86 back end. It can be
enabled by passing -tailcallopt to llc. The optimization is performed if the following conditions are satisfied: * caller/callee are fastcc * elf/pic is disabled OR elf/pic enabled + callee is in module + callee has visibility protected or hidden llvm-svn: 42870
This commit is contained in:
parent
29cfef59ff
commit
9ccea99165
|
@ -860,6 +860,15 @@ public:
|
|||
/// implement this. The default implementation of this aborts.
|
||||
virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
|
||||
|
||||
/// IsEligibleForTailCallOptimization - Check whether the call is eligible for
|
||||
/// tail call optimization. Target which want to do tail call optimization
|
||||
/// should implement this function.
|
||||
virtual bool IsEligibleForTailCallOptimization(SDOperand Call,
|
||||
SDOperand Ret,
|
||||
SelectionDAG &DAG) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// CustomPromoteOperation - This callback is invoked for operations that are
|
||||
/// unsupported by the target, are registered to use 'custom' lowering, and
|
||||
/// whose type needs to be promoted.
|
||||
|
|
|
@ -73,6 +73,11 @@ namespace llvm {
|
|||
/// ExceptionHandling - This flag indicates that exception information should
|
||||
/// be emitted.
|
||||
extern bool ExceptionHandling;
|
||||
|
||||
/// PerformTailCallOpt - This flag is enabled when the -tailcallopt is
|
||||
/// specified on the commandline. When the flag is on, the target will perform
|
||||
/// tail call optimization (pop the caller's stack) providing it supports it.
|
||||
extern bool PerformTailCallOpt;
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -4444,6 +4444,48 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
|
|||
}
|
||||
}
|
||||
|
||||
/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the
|
||||
/// DAG and fixes their tailcall attribute operand
|
||||
static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG,
|
||||
TargetLowering& TLI) {
|
||||
SDNode * Ret = NULL;
|
||||
SDOperand Terminator = DAG.getRoot();
|
||||
|
||||
// Find RET node.
|
||||
if (Terminator.getOpcode() == ISD::RET) {
|
||||
Ret = Terminator.Val;
|
||||
}
|
||||
|
||||
// Fix tail call attribute of CALL nodes.
|
||||
for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(),
|
||||
BI = prior(DAG.allnodes_end()); BI != BE; --BI) {
|
||||
if (BI->getOpcode() == ISD::CALL) {
|
||||
SDOperand OpRet(Ret, 0);
|
||||
SDOperand OpCall(static_cast<SDNode*>(BI), 0);
|
||||
bool isMarkedTailCall =
|
||||
cast<ConstantSDNode>(OpCall.getOperand(3))->getValue() != 0;
|
||||
// If CALL node has tail call attribute set to true and the call is not
|
||||
// eligible (no RET or the target rejects) the attribute is fixed to
|
||||
// false. The TargetLowering::IsEligibleForTailCallOptimization function
|
||||
// must correctly identify tail call optimizable calls.
|
||||
if (isMarkedTailCall &&
|
||||
(Ret==NULL ||
|
||||
!TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) {
|
||||
SmallVector<SDOperand, 32> Ops;
|
||||
unsigned idx=0;
|
||||
for(SDNode::op_iterator I =OpCall.Val->op_begin(),
|
||||
E=OpCall.Val->op_end(); I!=E; I++, idx++) {
|
||||
if (idx!=3)
|
||||
Ops.push_back(*I);
|
||||
else
|
||||
Ops.push_back(DAG.getConstant(false, TLI.getPointerTy()));
|
||||
}
|
||||
DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
|
||||
std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate,
|
||||
FunctionLoweringInfo &FuncInfo) {
|
||||
|
@ -4621,6 +4663,12 @@ void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB,
|
|||
|
||||
// Make sure the root of the DAG is up-to-date.
|
||||
DAG.setRoot(SDL.getRoot());
|
||||
|
||||
// Check whether calls in this block are real tail calls. Fix up CALL nodes
|
||||
// with correct tailcall attribute so that the target can rely on the tailcall
|
||||
// attribute indicating whether the call is really eligible for tail call
|
||||
// optimization.
|
||||
CheckDAGForTailCallsAndFixThem(DAG, TLI);
|
||||
}
|
||||
|
||||
void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) {
|
||||
|
|
|
@ -33,6 +33,7 @@ namespace llvm {
|
|||
bool ExceptionHandling;
|
||||
Reloc::Model RelocationModel;
|
||||
CodeModel::Model CMModel;
|
||||
bool PerformTailCallOpt;
|
||||
}
|
||||
namespace {
|
||||
cl::opt<bool, true> PrintCode("print-machineinstrs",
|
||||
|
@ -116,6 +117,12 @@ namespace {
|
|||
clEnumValN(CodeModel::Large, "large",
|
||||
" Large code model"),
|
||||
clEnumValEnd));
|
||||
|
||||
cl::opt<bool, true>
|
||||
EnablePerformTailCallOpt("tailcallopt",
|
||||
cl::desc("Turn on tail call optimization."),
|
||||
cl::location(PerformTailCallOpt),
|
||||
cl::init(false));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
|
|
@ -1368,3 +1368,83 @@ L7:
|
|||
L5:
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
Tail call optimization improvements: Tail call optimization currently
|
||||
pushes all arguments on the top of the stack (their normal place if
|
||||
that was a not tail call optimized functiong call ) before moving them
|
||||
to actual stack slot. this is done to prevent overwriting of paramters
|
||||
(see example below) that might be used, since the arguments of the
|
||||
callee overwrites callers arguments.
|
||||
|
||||
example:
|
||||
|
||||
int callee(int32, int64);
|
||||
int caller(int32 arg1, int32 arg2) {
|
||||
int64 local = arg2 * 2;
|
||||
return callee(arg2, (int64)local);
|
||||
}
|
||||
|
||||
[arg1] [!arg2 no longer valid since we moved local onto it]
|
||||
[arg2] -> [(int64)
|
||||
[RETADDR] local ]
|
||||
|
||||
moving arg1 onto the stack slot of callee function would overwrite
|
||||
arg2 of the caller.
|
||||
|
||||
Possible optimizations:
|
||||
|
||||
- only push those arguments to the top of the stack that are actual
|
||||
parameters of the caller function and have no local value in the
|
||||
caller
|
||||
|
||||
in above example local does not need to be pushed onto the top of
|
||||
the stack as it is definitetly not a caller's function parameter
|
||||
|
||||
- analyse the actual parameters of the callee to see which would
|
||||
overwrite a caller paramter which is used by the callee and only
|
||||
push them onto the top of the stack
|
||||
|
||||
int callee (int32 arg1, int32 arg2);
|
||||
int caller (int32 arg1, int32 arg2) {
|
||||
return callee(arg1,arg2);
|
||||
}
|
||||
|
||||
here we don't need to write any variables to the top of the stack
|
||||
since they don't overwrite each other
|
||||
|
||||
int callee (int32 arg1, int32 arg2);
|
||||
int caller (int32 arg1, int32 arg2) {
|
||||
return callee(arg2,arg1);
|
||||
}
|
||||
|
||||
here we need to push the arguments because they overwrite each other
|
||||
|
||||
|
||||
code for lowering directly onto callers arguments:
|
||||
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
||||
+ SmallVector<SDOperand, 8> MemOpChains;
|
||||
+
|
||||
+ SDOperand FramePtr;
|
||||
+ SDOperand PtrOff;
|
||||
+ SDOperand FIN;
|
||||
+ int FI = 0;
|
||||
+ // Walk the register/memloc assignments, inserting copies/loads.
|
||||
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
+ CCValAssign &VA = ArgLocs[i];
|
||||
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
+
|
||||
+ ....
|
||||
+
|
||||
+ if (VA.isRegLoc()) {
|
||||
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
+ } else {
|
||||
+ assert(VA.isMemLoc());
|
||||
+ // create frame index
|
||||
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
||||
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||
+ // store relative to framepointer
|
||||
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
|
||||
+ }
|
||||
+ }
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
|
@ -127,6 +127,40 @@ def CC_X86_64_C : CallingConv<[
|
|||
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
// tail call convetion (fast) one register is reserved for target address
|
||||
// namely R9
|
||||
def CC_X86_64_TailCall : CallingConv<[
|
||||
// Promote i8/i16 arguments to i32.
|
||||
CCIfType<[i8, i16], CCPromoteToType<i32>>,
|
||||
|
||||
CCIfStruct<CCStructAssign<[RDI, RSI, RDX, RCX, R8]>>,
|
||||
|
||||
// The first 6 integer arguments are passed in integer registers.
|
||||
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
|
||||
|
||||
// The first 8 FP/Vector arguments are passed in XMM registers.
|
||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>,
|
||||
|
||||
// The first 8 MMX vector arguments are passed in GPRs.
|
||||
CCIfType<[v8i8, v4i16, v2i32, v1i64],
|
||||
CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
|
||||
|
||||
// The 'nest' parameter, if any, is passed in R10.
|
||||
CCIfNest<CCAssignToReg<[R10]>>,
|
||||
|
||||
// Integer/FP values get stored in stack slots that are 8 bytes in size and
|
||||
// 8-byte aligned if there are no more registers to hold them.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
|
||||
|
||||
// Vectors get 16-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
|
||||
|
||||
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
|
||||
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 C Calling Convention
|
||||
|
@ -173,6 +207,22 @@ def CC_X86_32_C : CallingConv<[
|
|||
CCDelegateTo<CC_X86_32_Common>
|
||||
]>;
|
||||
|
||||
/// Same as C calling convention up to nonfree ECX which is used for storing
|
||||
/// potential pointer to tail called function
|
||||
def CC_X86_32_TailCall : CallingConv<[
|
||||
// Promote i8/i16 arguments to i32.
|
||||
CCIfType<[i8, i16], CCPromoteToType<i32>>,
|
||||
|
||||
// The 'nest' parameter, if any, is passed in ECX.
|
||||
CCIfNest<CCAssignToReg<[ECX]>>,
|
||||
|
||||
// The first 3 integer arguments, if marked 'inreg' and if the call is not
|
||||
// a vararg call, are passed in integer registers.
|
||||
CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>>>,
|
||||
|
||||
// Otherwise, same as everything else.
|
||||
CCDelegateTo<CC_X86_32_Common>
|
||||
]>;
|
||||
|
||||
def CC_X86_32_FastCall : CallingConv<[
|
||||
// Promote i8/i16 arguments to i32.
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SSARegMap.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ParameterAttributes.h"
|
||||
|
@ -43,6 +45,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
X86ScalarSSEf64 = Subtarget->hasSSE2();
|
||||
X86ScalarSSEf32 = Subtarget->hasSSE1();
|
||||
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
|
||||
|
||||
|
||||
RegInfo = TM.getRegisterInfo();
|
||||
|
||||
|
@ -641,6 +644,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86GenCallingConv.inc"
|
||||
|
||||
/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
|
||||
/// exists skip possible ISD:TokenFactor.
|
||||
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
|
||||
if (Chain.getOpcode()==X86ISD::TAILCALL) {
|
||||
return Chain;
|
||||
} else if (Chain.getOpcode()==ISD::TokenFactor) {
|
||||
if (Chain.getNumOperands() &&
|
||||
Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL)
|
||||
return Chain.getOperand(0);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// LowerRET - Lower an ISD::RET node.
|
||||
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
||||
|
@ -651,8 +667,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
|||
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
|
||||
CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
|
||||
CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
|
||||
|
||||
|
||||
|
||||
// If this is the first return lowered for this function, add the regs to the
|
||||
// liveout set for the function.
|
||||
if (DAG.getMachineFunction().liveout_empty()) {
|
||||
|
@ -660,10 +675,38 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
|||
if (RVLocs[i].isRegLoc())
|
||||
DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
|
||||
}
|
||||
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
SDOperand Flag;
|
||||
|
||||
// Handle tail call return.
|
||||
Chain = GetPossiblePreceedingTailCall(Chain);
|
||||
if (Chain.getOpcode() == X86ISD::TAILCALL) {
|
||||
SDOperand TailCall = Chain;
|
||||
SDOperand TargetAddress = TailCall.getOperand(1);
|
||||
SDOperand StackAdjustment = TailCall.getOperand(2);
|
||||
assert ( ((TargetAddress.getOpcode() == ISD::Register &&
|
||||
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
|
||||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
|
||||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
|
||||
"Expecting an global address, external symbol, or register");
|
||||
assert( StackAdjustment.getOpcode() == ISD::Constant &&
|
||||
"Expecting a const value");
|
||||
|
||||
SmallVector<SDOperand,8> Operands;
|
||||
Operands.push_back(Chain.getOperand(0));
|
||||
Operands.push_back(TargetAddress);
|
||||
Operands.push_back(StackAdjustment);
|
||||
// Copy registers used by the call. Last operand is a flag so it is not
|
||||
// copied.
|
||||
for(unsigned i=3; i < TailCall.getNumOperands()-1;i++) {
|
||||
Operands.push_back(Chain.getOperand(i));
|
||||
}
|
||||
return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], Operands.size());
|
||||
}
|
||||
|
||||
// Regular return.
|
||||
SDOperand Flag;
|
||||
|
||||
// Copy the result values into the output registers.
|
||||
if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
|
||||
RVLocs[0].getLocReg() != X86::ST0) {
|
||||
|
@ -684,7 +727,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
|
|||
if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
|
||||
(X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
|
||||
SDOperand MemLoc;
|
||||
|
||||
|
||||
// If this is a load into a scalarsse value, don't store the loaded value
|
||||
// back to the stack, only to reload it: just replace the scalar-sse load.
|
||||
if (ISD::isNON_EXTLoad(Value.Val) &&
|
||||
|
@ -784,12 +827,14 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
|
|||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// C & StdCall Calling Convention implementation
|
||||
// C & StdCall & Fast Calling Convention implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
// StdCall calling convention seems to be standard for many Windows' API
|
||||
// routines and around. It differs from C calling convention just a little:
|
||||
// callee should clean up the stack, not caller. Symbols should be also
|
||||
// decorated in some fancy way :) It doesn't support any vector arguments.
|
||||
// For info on fast calling convention see Fast Calling Convention (tail call)
|
||||
// implementation LowerX86_32FastCCCallTo.
|
||||
|
||||
/// AddLiveIn - This helper function adds the specified physical register to the
|
||||
/// MachineFunction as a live in value. It also creates a corresponding virtual
|
||||
|
@ -802,6 +847,9 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
|
|||
return VReg;
|
||||
}
|
||||
|
||||
// align stack arguments according to platform alignment needed for tail calls
|
||||
unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG);
|
||||
|
||||
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
|
||||
const CCValAssign &VA,
|
||||
MachineFrameInfo *MFI,
|
||||
|
@ -826,13 +874,17 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
|
|||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
SDOperand Root = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
|
||||
unsigned CC = MF.getFunction()->getCallingConv();
|
||||
// Assign locations to all of the incoming arguments.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
|
||||
CCState CCInfo(CC, isVarArg,
|
||||
getTargetMachine(), ArgLocs);
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
|
||||
|
||||
// Check for possible tail call calling convention.
|
||||
if (CC == CallingConv::Fast && PerformTailCallOpt)
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall);
|
||||
else
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
|
||||
|
||||
SmallVector<SDOperand, 8> ArgValues;
|
||||
unsigned LastVal = ~0U;
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
|
@ -877,6 +929,9 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
unsigned StackSize = CCInfo.getNextStackOffset();
|
||||
// align stack specially for tail calls
|
||||
if (CC==CallingConv::Fast)
|
||||
StackSize = GetAlignedArgumentStackSize(StackSize,DAG);
|
||||
|
||||
ArgValues.push_back(Root);
|
||||
|
||||
|
@ -885,7 +940,12 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
|
|||
if (isVarArg)
|
||||
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
|
||||
|
||||
if (isStdCall && !isVarArg) {
|
||||
// Tail call calling convention (CallingConv::Fast) does not support varargs.
|
||||
assert( !(isVarArg && CC == CallingConv::Fast) &&
|
||||
"CallingConv::Fast does not support varargs.");
|
||||
|
||||
if (isStdCall && !isVarArg &&
|
||||
(CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) {
|
||||
BytesToPopOnReturn = StackSize; // Callee pops everything..
|
||||
BytesCallerReserves = 0;
|
||||
} else {
|
||||
|
@ -914,17 +974,21 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
unsigned CC) {
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
|
||||
SDOperand Callee = Op.getOperand(4);
|
||||
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
|
||||
|
||||
|
||||
// Analyze operands of the call, assigning locations to each operand.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
|
||||
if(CC==CallingConv::Fast && PerformTailCallOpt)
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
|
||||
else
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
|
||||
|
||||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
unsigned NumBytes = CCInfo.getNextStackOffset();
|
||||
if (CC==CallingConv::Fast)
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
|
||||
|
||||
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
|
||||
|
||||
|
@ -1023,19 +1087,21 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
|
||||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
|
||||
Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
|
||||
NodeTys, &Ops[0], Ops.size());
|
||||
|
||||
Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
// Create the CALLSEQ_END node.
|
||||
unsigned NumBytesForCalleeToPush = 0;
|
||||
|
||||
if (CC == CallingConv::X86_StdCall) {
|
||||
if (CC == CallingConv::X86_StdCall ||
|
||||
(CC == CallingConv::Fast && PerformTailCallOpt)) {
|
||||
if (isVarArg)
|
||||
NumBytesForCalleeToPush = isSRet ? 4 : 0;
|
||||
else
|
||||
NumBytesForCalleeToPush = NumBytes;
|
||||
assert(!(isVarArg && CC==CallingConv::Fast) &&
|
||||
"CallingConv::Fast does not support varargs.");
|
||||
} else {
|
||||
// If this is is a call to a struct-return function, the callee
|
||||
// pops the hidden struct pointer, so we have to push it back.
|
||||
|
@ -1132,7 +1198,8 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
|
||||
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
|
||||
// arguments and the arguments after the retaddr has been pushed are aligned.
|
||||
// arguments and the arguments after the retaddr has been pushed are
|
||||
// aligned.
|
||||
if ((StackSize & 7) == 0)
|
||||
StackSize += 4;
|
||||
}
|
||||
|
@ -1194,7 +1261,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
|
||||
if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
|
||||
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
|
||||
// arguments and the arguments after the retaddr has been pushed are aligned.
|
||||
// arguments and the arguments after the retaddr has been pushed are
|
||||
// aligned.
|
||||
if ((NumBytes & 7) == 0)
|
||||
NumBytes += 4;
|
||||
}
|
||||
|
@ -1292,8 +1360,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
|
||||
// FIXME: Do not generate X86ISD::TAILCALL for now.
|
||||
Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
|
||||
assert(isTailCall==false && "no tail call here");
|
||||
Chain = DAG.getNode(X86ISD::CALL,
|
||||
NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
|
@ -1312,6 +1380,314 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Fast Calling Convention (tail call) implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Like std call, callee cleans arguments, convention except that ECX is
|
||||
// reserved for storing the tail called function address. Only 2 registers are
|
||||
// free for argument passing (inreg). Tail call optimization is performed
|
||||
// provided:
|
||||
// * tailcallopt is enabled
|
||||
// * caller/callee are fastcc
|
||||
// * elf/pic is disabled OR
|
||||
// * elf/pic enabled + callee is in module + callee has
|
||||
// visibility protected or hidden
|
||||
// To ensure the stack is aligned according to platform abi pass
|
||||
// tail-call-align-stack. This makes sure that argument delta is always
|
||||
// multiples of stack alignment. (Dynamic linkers need this - darwin's dyld for
|
||||
// example)
|
||||
// If a tail called function callee has more arguments than the caller the
|
||||
// caller needs to make sure that there is room to move the RETADDR to. This is
|
||||
// achived by reserving an area the size of the argument delta right after the
|
||||
// original REtADDR, but before the saved framepointer or the spilled registers
|
||||
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
|
||||
// stack layout:
|
||||
// arg1
|
||||
// arg2
|
||||
// RETADDR
|
||||
// [ new RETADDR
|
||||
// move area ]
|
||||
// (possible EBP)
|
||||
// ESI
|
||||
// EDI
|
||||
// local1 ..
|
||||
|
||||
/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
|
||||
/// for a 16 byte align requirement.
|
||||
unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
|
||||
SelectionDAG& DAG) {
|
||||
if (PerformTailCallOpt) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const TargetFrameInfo &TFI = *TM.getFrameInfo();
|
||||
unsigned StackAlignment = TFI.getStackAlignment();
|
||||
uint64_t AlignMask = StackAlignment - 1;
|
||||
int64_t Offset = StackSize;
|
||||
unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
|
||||
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
|
||||
// Number smaller than 12 so just add the difference.
|
||||
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
|
||||
} else {
|
||||
// Mask out lower bits, add stackalignment once plus the 12 bytes.
|
||||
Offset = ((~AlignMask) & Offset) + StackAlignment +
|
||||
(StackAlignment-SlotSize);
|
||||
}
|
||||
StackSize = Offset;
|
||||
}
|
||||
return StackSize;
|
||||
}
|
||||
|
||||
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
|
||||
// following the call is a return. A function is eligible if caller/callee
|
||||
// calling conventions match, currently only fastcc supports tail calls, and the
|
||||
// function CALL is immediatly followed by a RET.
|
||||
bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
|
||||
SDOperand Ret,
|
||||
SelectionDAG& DAG) const {
|
||||
bool IsEligible = false;
|
||||
|
||||
// Check whether CALL node immediatly preceeds the RET node and whether the
|
||||
// return uses the result of the node or is a void return.
|
||||
if ((Ret.getNumOperands() == 1 &&
|
||||
(Ret.getOperand(0)== SDOperand(Call.Val,1) ||
|
||||
Ret.getOperand(0)== SDOperand(Call.Val,0))) ||
|
||||
(Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
|
||||
Ret.getOperand(1)== SDOperand(Call.Val,0))) {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned CallerCC = MF.getFunction()->getCallingConv();
|
||||
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
|
||||
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
|
||||
SDOperand Callee = Call.getOperand(4);
|
||||
// On elf/pic %ebx needs to be livein.
|
||||
if(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
|
||||
Subtarget->isPICStyleGOT()) {
|
||||
// Can only do local tail calls with PIC.
|
||||
GlobalValue * GV = 0;
|
||||
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
|
||||
if(G != 0 &&
|
||||
(GV = G->getGlobal()) &&
|
||||
(GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
|
||||
IsEligible=true;
|
||||
} else {
|
||||
IsEligible=true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return IsEligible;
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
|
||||
SelectionDAG &DAG,
|
||||
unsigned CC) {
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
|
||||
SDOperand Callee = Op.getOperand(4);
|
||||
bool is64Bit = Subtarget->is64Bit();
|
||||
|
||||
assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls.");
|
||||
|
||||
// Analyze operands of the call, assigning locations to each operand.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
|
||||
if (is64Bit)
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
|
||||
else
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
|
||||
|
||||
|
||||
// Lower arguments at fp - stackoffset + fpdiff.
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
unsigned NumBytesToBePushed =
|
||||
GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG);
|
||||
|
||||
unsigned NumBytesCallerPushed =
|
||||
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
|
||||
int FPDiff = NumBytesCallerPushed - NumBytesToBePushed;
|
||||
|
||||
// Set the delta of movement of the returnaddr stackslot.
|
||||
// But only set if delta is greater than previous delta.
|
||||
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
|
||||
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
|
||||
|
||||
// Adjust the ret address stack slot.
|
||||
if (FPDiff) {
|
||||
MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32;
|
||||
SDOperand RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
|
||||
RetAddrFrIdx =
|
||||
DAG.getLoad(VT, DAG.getEntryNode(),RetAddrFrIdx, NULL, 0);
|
||||
// Emit a store of the saved ret value to the new location.
|
||||
int SlotSize = is64Bit ? 8 : 4;
|
||||
int NewReturnAddrFI =
|
||||
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
|
||||
SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
|
||||
Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
|
||||
}
|
||||
|
||||
Chain = DAG.
|
||||
getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
|
||||
|
||||
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
||||
SmallVector<SDOperand, 8> MemOpChains;
|
||||
SmallVector<SDOperand, 8> MemOpChains2;
|
||||
SDOperand FramePtr, StackPtr;
|
||||
SDOperand PtrOff;
|
||||
SDOperand FIN;
|
||||
int FI = 0;
|
||||
|
||||
// Walk the register/memloc assignments, inserting copies/loads. Lower
|
||||
// arguments first to the stack slot where they would normally - in case of a
|
||||
// normal function call - be.
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
|
||||
// Promote the value if needed.
|
||||
switch (VA.getLocInfo()) {
|
||||
default: assert(0 && "Unknown loc info!");
|
||||
case CCValAssign::Full: break;
|
||||
case CCValAssign::SExt:
|
||||
Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
|
||||
break;
|
||||
case CCValAssign::ZExt:
|
||||
Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
|
||||
break;
|
||||
case CCValAssign::AExt:
|
||||
Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
|
||||
break;
|
||||
}
|
||||
|
||||
if (VA.isRegLoc()) {
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
} else {
|
||||
assert(VA.isMemLoc());
|
||||
if (StackPtr.Val == 0)
|
||||
StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
|
||||
|
||||
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
||||
Arg));
|
||||
}
|
||||
}
|
||||
|
||||
if (!MemOpChains.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||
&MemOpChains[0], MemOpChains.size());
|
||||
|
||||
// Build a sequence of copy-to-reg nodes chained together with token chain
|
||||
// and flag operands which copy the outgoing args into registers.
|
||||
SDOperand InFlag;
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
||||
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
|
||||
InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
InFlag = SDOperand();
|
||||
// Copy from stack slots to stack slot of a tail called function. This needs
|
||||
// to be done because if we would lower the arguments directly to their real
|
||||
// stack slot we might end up overwriting each other.
|
||||
// TODO: To make this more efficient (sometimes saving a store/load) we could
|
||||
// analyse the arguments and emit this store/load/store sequence only for
|
||||
// arguments which would be overwritten otherwise.
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
if (!VA.isRegLoc()) {
|
||||
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
||||
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
||||
|
||||
// Get source stack slot.
|
||||
SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
|
||||
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
|
||||
// Create frame index.
|
||||
int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
||||
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||
if (Flags & ISD::ParamFlags::ByVal) {
|
||||
// Copy relative to framepointer.
|
||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
||||
ISD::ParamFlags::ByValAlignOffs);
|
||||
|
||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
||||
ISD::ParamFlags::ByValSizeOffs;
|
||||
|
||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||
// Copy relative to framepointer.
|
||||
MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN,
|
||||
PtrOff, SizeNode, AlignNode));
|
||||
} else {
|
||||
SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0);
|
||||
// Store relative to framepointer.
|
||||
MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!MemOpChains2.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||
&MemOpChains2[0], MemOpChains.size());
|
||||
|
||||
// ELF / PIC requires GOT in the EBX register before function calls via PLT
|
||||
// GOT pointer.
|
||||
// Does not work with tail call since ebx is not restored correctly by
|
||||
// tailcaller. TODO: at least for x86 - verify for x86-64
|
||||
|
||||
// If the callee is a GlobalAddress node (quite common, every direct call is)
|
||||
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
|
||||
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
|
||||
// We should use extra load for direct calls to dllimported functions in
|
||||
// non-JIT mode.
|
||||
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
|
||||
getTargetMachine(), true))
|
||||
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
|
||||
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
|
||||
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
|
||||
else {
|
||||
assert(Callee.getOpcode() == ISD::LOAD &&
|
||||
"Function destination must be loaded into virtual register");
|
||||
unsigned Opc = is64Bit ? X86::R9 : X86::ECX;
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain,
|
||||
DAG.getRegister(Opc, getPointerTy()) ,
|
||||
Callee,InFlag);
|
||||
Callee = DAG.getRegister(Opc, getPointerTy());
|
||||
// Add register as live out.
|
||||
DAG.getMachineFunction().addLiveOut(Opc);
|
||||
}
|
||||
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
SmallVector<SDOperand, 8> Ops;
|
||||
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy()));
|
||||
Ops.push_back(DAG.getConstant(0, getPointerTy()));
|
||||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
// Returns a chain & a flag for retval copy to use.
|
||||
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
Ops.clear();
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(Callee);
|
||||
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
|
||||
// Add argument registers to the end of the list so that they are known live
|
||||
// into the call.
|
||||
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
|
||||
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
||||
RegsToPass[i].second.getValueType()));
|
||||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
assert(InFlag.Val &&
|
||||
"Flag must be set. Depend on flag being set in LowerRET");
|
||||
Chain = DAG.getNode(X86ISD::TAILCALL,
|
||||
Op.Val->getVTList(), &Ops[0], Ops.size());
|
||||
|
||||
return SDOperand(Chain.Val, Op.ResNo);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86-64 C Calling Convention implementation
|
||||
|
@ -1323,6 +1699,7 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
|
|||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
SDOperand Root = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
unsigned CC= MF.getFunction()->getCallingConv();
|
||||
|
||||
static const unsigned GPR64ArgRegs[] = {
|
||||
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
|
||||
|
@ -1335,9 +1712,12 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
|
|||
|
||||
// Assign locations to all of the incoming arguments.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
|
||||
CCState CCInfo(CC, isVarArg,
|
||||
getTargetMachine(), ArgLocs);
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
|
||||
if (CC == CallingConv::Fast && PerformTailCallOpt)
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall);
|
||||
else
|
||||
CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
|
||||
|
||||
SmallVector<SDOperand, 8> ArgValues;
|
||||
unsigned LastVal = ~0U;
|
||||
|
@ -1398,10 +1778,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
unsigned StackSize = CCInfo.getNextStackOffset();
|
||||
if (CC==CallingConv::Fast)
|
||||
StackSize =GetAlignedArgumentStackSize(StackSize, DAG);
|
||||
|
||||
// If the function takes variable number of arguments, make a frame index for
|
||||
// the start of the first vararg value... for expansion of llvm.va_start.
|
||||
if (isVarArg) {
|
||||
assert(CC!=CallingConv::Fast
|
||||
&& "Var arg not supported with calling convention fastcc");
|
||||
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
|
||||
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
|
||||
|
||||
|
@ -1446,10 +1830,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
ArgValues.push_back(Root);
|
||||
|
||||
BytesToPopOnReturn = 0; // Callee pops nothing.
|
||||
BytesCallerReserves = StackSize;
|
||||
|
||||
// Tail call convention (fastcc) needs callee pop.
|
||||
if (CC == CallingConv::Fast && PerformTailCallOpt){
|
||||
BytesToPopOnReturn = StackSize; // Callee pops everything.
|
||||
BytesCallerReserves = 0;
|
||||
} else {
|
||||
BytesToPopOnReturn = 0; // Callee pops nothing.
|
||||
BytesCallerReserves = StackSize;
|
||||
}
|
||||
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
|
||||
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
|
||||
|
||||
|
@ -1463,16 +1851,21 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
unsigned CC) {
|
||||
SDOperand Chain = Op.getOperand(0);
|
||||
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
|
||||
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
|
||||
SDOperand Callee = Op.getOperand(4);
|
||||
|
||||
// Analyze operands of the call, assigning locations to each operand.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
|
||||
if (CC==CallingConv::Fast)
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
|
||||
else
|
||||
CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
|
||||
|
||||
// Get a count of how many bytes are to be pushed on the stack.
|
||||
unsigned NumBytes = CCInfo.getNextStackOffset();
|
||||
if (CC == CallingConv::Fast)
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG);
|
||||
|
||||
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
|
||||
|
||||
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
||||
|
@ -1526,6 +1919,9 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
if (isVarArg) {
|
||||
assert ( CallingConv::Fast != CC &&
|
||||
"Var args not supported with calling convention fastcc");
|
||||
|
||||
// From AMD64 ABI document:
|
||||
// For calls that may call functions that use varargs or stdargs
|
||||
// (prototype-less calls or calls to functions containing ellipsis (...) in
|
||||
|
@ -1574,17 +1970,22 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||
if (InFlag.Val)
|
||||
Ops.push_back(InFlag);
|
||||
|
||||
// FIXME: Do not generate X86ISD::TAILCALL for now.
|
||||
Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
|
||||
Chain = DAG.getNode(X86ISD::CALL,
|
||||
NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
int NumBytesForCalleeToPush = 0;
|
||||
if (CC==CallingConv::Fast) {
|
||||
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
|
||||
|
||||
} else {
|
||||
NumBytesForCalleeToPush = 0; // Callee pops nothing.
|
||||
}
|
||||
// Returns a flag for retval copy to use.
|
||||
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
Ops.clear();
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
|
||||
Ops.push_back(DAG.getConstant(0, getPointerTy()));
|
||||
Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
|
||||
Ops.push_back(InFlag);
|
||||
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
|
||||
InFlag = Chain.getValue(1);
|
||||
|
@ -3106,10 +3507,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
|
|||
// SHUFPS the element to the lowest double word, then movss.
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
SmallVector<SDOperand, 8> IdxVec;
|
||||
IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.
|
||||
push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.
|
||||
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.
|
||||
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.
|
||||
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
|
||||
&IdxVec[0], IdxVec.size());
|
||||
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
|
||||
|
@ -3128,7 +3533,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
|
|||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
SmallVector<SDOperand, 8> IdxVec;
|
||||
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
IdxVec.
|
||||
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
|
||||
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
|
||||
&IdxVec[0], IdxVec.size());
|
||||
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
|
||||
|
@ -3777,17 +4183,23 @@ SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
||||
unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
|
||||
|
||||
if (Subtarget->is64Bit())
|
||||
return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
|
||||
if (Subtarget->is64Bit())
|
||||
if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
|
||||
return LowerX86_TailCallTo(Op, DAG, CallingConv);
|
||||
else
|
||||
return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
|
||||
else
|
||||
switch (CallingConv) {
|
||||
default:
|
||||
assert(0 && "Unsupported calling convention");
|
||||
case CallingConv::Fast:
|
||||
// TODO: Implement fastcc
|
||||
// Falls through
|
||||
if (isTailCall && PerformTailCallOpt)
|
||||
return LowerX86_TailCallTo(Op, DAG, CallingConv);
|
||||
else
|
||||
return LowerCCCCallTo(Op,DAG, CallingConv);
|
||||
case CallingConv::C:
|
||||
case CallingConv::X86_StdCall:
|
||||
return LowerCCCCallTo(Op, DAG, CallingConv);
|
||||
|
@ -3855,8 +4267,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
|
|||
default:
|
||||
assert(0 && "Unsupported calling convention");
|
||||
case CallingConv::Fast:
|
||||
// TODO: implement fastcc.
|
||||
|
||||
return LowerCCCArguments(Op,DAG, true);
|
||||
// Falls through
|
||||
case CallingConv::C:
|
||||
return LowerCCCArguments(Op, DAG);
|
||||
|
@ -4176,7 +4587,8 @@ X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
|
|||
SDOperand TheOp = Op.getOperand(0);
|
||||
SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
|
||||
if (Subtarget->is64Bit()) {
|
||||
SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
|
||||
SDOperand Copy1 =
|
||||
DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
|
||||
SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
|
||||
MVT::i64, Copy1.getValue(2));
|
||||
SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
|
||||
|
@ -4612,6 +5024,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
|
||||
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
|
||||
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
|
||||
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4885,7 +5298,7 @@ static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
|
|||
i %= NumElems;
|
||||
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
return (i == 0)
|
||||
? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
||||
? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
||||
} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
SDOperand Idx = PermMask.getOperand(i);
|
||||
if (Idx.getOpcode() == ISD::UNDEF)
|
||||
|
|
|
@ -181,7 +181,14 @@ namespace llvm {
|
|||
TLSADDR, THREAD_POINTER,
|
||||
|
||||
// Exception Handling helpers
|
||||
EH_RETURN
|
||||
EH_RETURN,
|
||||
|
||||
// tail call return
|
||||
// oeprand #0 chain
|
||||
// operand #1 callee (register or absolute)
|
||||
// operand #2 stack adjustment
|
||||
// operand #3 optional in flag
|
||||
TC_RETURN
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -285,6 +292,7 @@ namespace llvm {
|
|||
unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
|
||||
int BytesToPopOnReturn; // Number of arg bytes ret should pop.
|
||||
int BytesCallerReserves; // Number of arg bytes caller makes.
|
||||
|
||||
public:
|
||||
explicit X86TargetLowering(TargetMachine &TM);
|
||||
|
||||
|
@ -364,6 +372,14 @@ namespace llvm {
|
|||
virtual bool isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
|
||||
MVT::ValueType EVT,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||
/// for tail call optimization. Target which want to do tail call
|
||||
/// optimization should implement this function.
|
||||
virtual bool IsEligibleForTailCallOptimization(SDOperand Call,
|
||||
SDOperand Ret,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
private:
|
||||
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
|
@ -372,7 +388,7 @@ namespace llvm {
|
|||
|
||||
/// X86StackPtr - X86 physical register used as stack ptr.
|
||||
unsigned X86StackPtr;
|
||||
|
||||
|
||||
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
|
||||
/// floating point ops.
|
||||
/// When SSE is available, use it for f32 operations.
|
||||
|
@ -402,6 +418,10 @@ namespace llvm {
|
|||
SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,unsigned CC);
|
||||
|
||||
// fast calling convention (tail call) implementation for 32/64bit
|
||||
SDOperand LowerX86_TailCallTo(SDOperand Op,
|
||||
SelectionDAG & DAG, unsigned CC);
|
||||
unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
|
||||
// Fast and FastCall Calling Convention implementation.
|
||||
SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC);
|
||||
|
|
|
@ -706,6 +706,8 @@ bool X86InstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
|
|||
if (MBB.empty()) return false;
|
||||
|
||||
switch (MBB.back().getOpcode()) {
|
||||
case X86::TCRETURNri:
|
||||
case X86::TCRETURNdi:
|
||||
case X86::RET: // Return.
|
||||
case X86::RETI:
|
||||
case X86::TAILJMPd:
|
||||
|
|
|
@ -55,6 +55,8 @@ def SDT_X86TLSTP : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
|
|||
|
||||
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
|
||||
|
||||
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
|
||||
def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
|
||||
|
||||
|
@ -73,7 +75,7 @@ def X86callseq_start :
|
|||
[SDNPHasChain, SDNPOutFlag]>;
|
||||
def X86callseq_end :
|
||||
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
|
||||
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
|
||||
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
|
||||
|
||||
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
|
||||
[SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
|
||||
|
@ -99,6 +101,8 @@ def X86TLStp : SDNode<"X86ISD::THREAD_POINTER", SDT_X86TLSTP, []>;
|
|||
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
|
||||
[SDNPHasChain, SDNPOptInFlag]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Operand Definitions.
|
||||
|
@ -356,15 +360,30 @@ let isCall = 1 in
|
|||
}
|
||||
|
||||
// Tail call stuff.
|
||||
|
||||
def TAILCALL : I<0, Pseudo, (outs), (ins ),
|
||||
"#TAILCALL",
|
||||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAIL CALL",
|
||||
def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
|
||||
[]>;
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp\t{*}$dst # TAIL CALL",
|
||||
[]>;
|
||||
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL",
|
||||
[]>;
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
|
||||
"jmp\t{*}$dst # TAIL CALL", []>;
|
||||
"jmp\t{*}$dst # TAILCALL", []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Instructions...
|
||||
|
@ -2507,13 +2526,23 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
|
|||
(MOV32mi addr:$dst, texternalsym:$src)>;
|
||||
|
||||
// Calls
|
||||
// tailcall stuff
|
||||
def : Pat<(X86tailcall GR32:$dst),
|
||||
(CALL32r GR32:$dst)>;
|
||||
(TAILCALL)>;
|
||||
|
||||
def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
|
||||
(CALLpcrel32 tglobaladdr:$dst)>;
|
||||
(TAILCALL)>;
|
||||
def : Pat<(X86tailcall (i32 texternalsym:$dst)),
|
||||
(CALLpcrel32 texternalsym:$dst)>;
|
||||
(TAILCALL)>;
|
||||
|
||||
def : Pat<(X86tcret GR32:$dst, imm:$off),
|
||||
(TCRETURNri GR32:$dst, imm:$off)>;
|
||||
|
||||
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>;
|
||||
|
||||
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>;
|
||||
|
||||
def : Pat<(X86call (i32 tglobaladdr:$dst)),
|
||||
(CALLpcrel32 tglobaladdr:$dst)>;
|
||||
|
|
|
@ -102,6 +102,23 @@ let isCall = 1 in
|
|||
"call\t{*}$dst", []>;
|
||||
}
|
||||
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst # TAILCALL",
|
||||
[]>;
|
||||
|
||||
// Branches
|
||||
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
|
||||
|
@ -1105,6 +1122,24 @@ def : Pat<(X86tailcall (i64 texternalsym:$dst)),
|
|||
def : Pat<(X86tailcall GR64:$dst),
|
||||
(CALL64r GR64:$dst)>;
|
||||
|
||||
|
||||
// tailcall stuff
|
||||
def : Pat<(X86tailcall GR32:$dst),
|
||||
(TAILCALL)>;
|
||||
def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
|
||||
(TAILCALL)>;
|
||||
def : Pat<(X86tailcall (i64 texternalsym:$dst)),
|
||||
(TAILCALL)>;
|
||||
|
||||
def : Pat<(X86tcret GR64:$dst, imm:$off),
|
||||
(TCRETURNri64 GR64:$dst, imm:$off)>;
|
||||
|
||||
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi64 texternalsym:$dst, imm:$off)>;
|
||||
|
||||
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
|
||||
(TCRETURNdi64 texternalsym:$dst, imm:$off)>;
|
||||
|
||||
// Comparisons.
|
||||
|
||||
// TEST R,R is smaller than CMP R,0
|
||||
|
|
|
@ -47,18 +47,26 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
|
||||
// FrameIndex for return slot.
|
||||
int ReturnAddrIndex;
|
||||
|
||||
// Delta the ReturnAddr stack slot is moved
|
||||
// Used for creating an area before the register spill area on the stack
|
||||
// the returnaddr can be savely move to this area
|
||||
int TailCallReturnAddrDelta;
|
||||
|
||||
public:
|
||||
X86MachineFunctionInfo() : ForceFramePointer(false),
|
||||
CalleeSavedFrameSize(0),
|
||||
BytesToPopOnReturn(0),
|
||||
DecorationStyle(None),
|
||||
ReturnAddrIndex(0) {}
|
||||
ReturnAddrIndex(0),
|
||||
TailCallReturnAddrDelta(0){}
|
||||
|
||||
X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
|
||||
CalleeSavedFrameSize(0),
|
||||
BytesToPopOnReturn(0),
|
||||
DecorationStyle(None),
|
||||
ReturnAddrIndex(0) {}
|
||||
ReturnAddrIndex(0),
|
||||
TailCallReturnAddrDelta(0) {}
|
||||
|
||||
bool getForceFramePointer() const { return ForceFramePointer;}
|
||||
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
|
||||
|
@ -74,6 +82,9 @@ public:
|
|||
|
||||
int getRAIndex() const { return ReturnAddrIndex; }
|
||||
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
|
||||
|
||||
int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
|
||||
void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
|
|
|
@ -1436,18 +1436,42 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
|
||||
if (!hasFP(MF))
|
||||
Offset += MF.getFrameInfo()->getStackSize();
|
||||
else
|
||||
else {
|
||||
Offset += SlotSize; // Skip the saved EBP
|
||||
|
||||
// Skip the RETADDR move area
|
||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||
if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
|
||||
}
|
||||
|
||||
MI.getOperand(i+3).ChangeToImmediate(Offset);
|
||||
}
|
||||
|
||||
void
|
||||
X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
|
||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||
if (TailCallReturnAddrDelta < 0) {
|
||||
// create RETURNADDR area
|
||||
// arg
|
||||
// arg
|
||||
// RETADDR
|
||||
// { ...
|
||||
// RETADDR area
|
||||
// ...
|
||||
// }
|
||||
// [EBP]
|
||||
MF.getFrameInfo()->
|
||||
CreateFixedObject(-TailCallReturnAddrDelta,
|
||||
(-1*SlotSize)+TailCallReturnAddrDelta);
|
||||
}
|
||||
if (hasFP(MF)) {
|
||||
assert((TailCallReturnAddrDelta <= 0) &&
|
||||
"The Delta should always be zero or negative");
|
||||
// Create a frame entry for the EBP register that must be saved.
|
||||
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
|
||||
(int)SlotSize * -2);
|
||||
(int)SlotSize * -2+
|
||||
TailCallReturnAddrDelta);
|
||||
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
|
||||
"Slot for EBP register must be last in order to be found!");
|
||||
}
|
||||
|
@ -1530,6 +1554,41 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
/// mergeSPUpdates - Checks the instruction before/after the passed
|
||||
/// instruction. If it is an ADD/SUB instruction it is deleted
|
||||
/// argument and the stack adjustment is returned as a positive value for ADD
|
||||
/// and a negative for SUB.
|
||||
static int mergeSPUpdates(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
unsigned StackPtr,
|
||||
bool doMergeWithPrevious) {
|
||||
|
||||
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
|
||||
(!doMergeWithPrevious && MBBI == MBB.end()))
|
||||
return 0;
|
||||
|
||||
int Offset = 0;
|
||||
|
||||
MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
|
||||
MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
|
||||
unsigned Opc = PI->getOpcode();
|
||||
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
|
||||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
|
||||
PI->getOperand(0).getReg() == StackPtr){
|
||||
Offset += PI->getOperand(2).getImm();
|
||||
MBB.erase(PI);
|
||||
if (!doMergeWithPrevious) MBBI = NI;
|
||||
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
|
||||
Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
|
||||
PI->getOperand(0).getReg() == StackPtr) {
|
||||
Offset -= PI->getOperand(2).getImm();
|
||||
MBB.erase(PI);
|
||||
if (!doMergeWithPrevious) MBBI = NI;
|
||||
}
|
||||
|
||||
return Offset;
|
||||
}
|
||||
|
||||
void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
||||
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
@ -1543,10 +1602,23 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
// Prepare for frame info.
|
||||
unsigned FrameLabelId = 0;
|
||||
|
||||
// Get the number of bytes to allocate from the FrameInfo
|
||||
// Get the number of bytes to allocate from the FrameInfo.
|
||||
uint64_t StackSize = MFI->getStackSize();
|
||||
// Add RETADDR move area to callee saved frame size.
|
||||
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||
if (TailCallReturnAddrDelta < 0)
|
||||
X86FI->setCalleeSavedFrameSize(
|
||||
X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
|
||||
uint64_t NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
|
||||
|
||||
// Insert stack pointer adjustment for later moving of return addr. Only
|
||||
// applies to tail call optimized functions where the callee argument stack
|
||||
// size is bigger than the callers.
|
||||
if (TailCallReturnAddrDelta < 0) {
|
||||
BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
|
||||
StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
|
||||
}
|
||||
|
||||
if (hasFP(MF)) {
|
||||
// Get the offset of the stack slot for the EBP register... which is
|
||||
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
|
||||
|
@ -1615,6 +1687,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
MBB.insert(MBBI, MI);
|
||||
}
|
||||
} else {
|
||||
// If there is an SUB32ri of ESP immediately before this instruction,
|
||||
// merge the two. This can be the case when tail call elimination is
|
||||
// enabled and the callee has more arguments then the caller.
|
||||
NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
|
||||
// If there is an ADD32ri or SUB32ri of ESP immediately after this
|
||||
// instruction, merge the two instructions.
|
||||
mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
|
||||
|
@ -1711,6 +1787,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
switch (RetOpcode) {
|
||||
case X86::RET:
|
||||
case X86::RETI:
|
||||
case X86::TCRETURNdi:
|
||||
case X86::TCRETURNri:
|
||||
case X86::TCRETURNri64:
|
||||
case X86::TCRETURNdi64:
|
||||
case X86::EH_RETURN:
|
||||
case X86::TAILJMPd:
|
||||
case X86::TAILJMPr:
|
||||
|
@ -1773,7 +1853,46 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
MachineOperand &DestAddr = MBBI->getOperand(0);
|
||||
assert(DestAddr.isRegister() && "Offset should be in register!");
|
||||
BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr).
|
||||
addReg(DestAddr.getReg());
|
||||
addReg(DestAddr.getReg());
|
||||
// Tail call return: adjust the stack pointer and jump to callee
|
||||
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
|
||||
RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
MachineOperand &StackAdjust = MBBI->getOperand(1);
|
||||
assert( StackAdjust.isImmediate() && "Expecting immediate value.");
|
||||
|
||||
// Adjust stack pointer.
|
||||
int StackAdj = StackAdjust.getImm();
|
||||
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
|
||||
int Offset = 0;
|
||||
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
|
||||
// Incoporate the retaddr area.
|
||||
Offset = StackAdj-MaxTCDelta;
|
||||
assert(Offset >= 0 && "Offset should never be negative");
|
||||
if (Offset) {
|
||||
// Check for possible merge with preceeding ADD instruction.
|
||||
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
|
||||
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
|
||||
}
|
||||
// Jump to label or value in register.
|
||||
if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
|
||||
BuildMI(MBB, MBBI, TII.get(X86::TAILJMPd)).
|
||||
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
|
||||
else if (RetOpcode== X86::TCRETURNri64) {
|
||||
BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
|
||||
} else
|
||||
BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr), JumpTarget.getReg());
|
||||
// Delete the pseudo instruction TCRETURN.
|
||||
MBB.erase(MBBI);
|
||||
} else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
|
||||
(X86FI->getTCReturnAddrDelta() < 0)) {
|
||||
// Add the return addr area delta back since we are not tail calling.
|
||||
int delta = -1*X86FI->getTCReturnAddrDelta();
|
||||
MBBI = prior(MBB.end());
|
||||
// Check for possible merge with preceeding ADD instruction.
|
||||
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
|
||||
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
; RUN: llvm-as < %s | llc -tailcallopt | grep TAILCALL
|
||||
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
|
||||
|
||||
define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
|
||||
|
||||
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
}
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
ret i32 %tmp11
|
||||
}
|
Loading…
Reference in New Issue