Implement proper tail calls in the X86 backend for all fastcc->fastcc

tail calls.

llvm-svn: 22046
This commit is contained in:
Chris Lattner 2005-05-15 05:46:45 +00:00
parent 406bddfb3a
commit dd66a41e0e
1 changed files with 295 additions and 10 deletions

View File

@ -84,7 +84,8 @@ namespace {
class X86TargetLowering : public TargetLowering {
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
int ReturnAddrIndex; // FrameIndex for return slot.
int BytesToPopOnReturn; // Number of bytes ret should pop.
int BytesToPopOnReturn; // Number of arg bytes ret should pop.
int BytesCallerReserves; // Number of arg bytes caller makes.
public:
X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) {
// Set up the TargetLowering object.
@ -154,6 +155,10 @@ namespace {
//
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
// Return the number of bytes that the caller reserves for arguments passed
// to this function.
unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
/// LowerOperation - Provide custom lowering hooks for some operations.
///
virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
@ -180,6 +185,9 @@ namespace {
virtual std::pair<SDOperand, SDOperand>
LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, unsigned Depth,
SelectionDAG &DAG);
SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG);
private:
// C Calling Convention implementation.
std::vector<SDOperand> LowerCCCArguments(Function &F, SelectionDAG &DAG);
@ -279,6 +287,7 @@ X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
ReturnAddrIndex = 0; // No return address slot generated yet.
BytesToPopOnReturn = 0; // Callee pops nothing.
BytesCallerReserves = ArgOffset;
// Finally, inform the code generator which regs we return values in.
switch (getValueType(F.getReturnType())) {
@ -631,6 +640,7 @@ X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) {
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
ReturnAddrIndex = 0; // No return address slot generated yet.
BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments.
BytesCallerReserves = 0;
// Finally, inform the code generator which regs we return values in.
switch (getValueType(F.getReturnType())) {
@ -838,6 +848,15 @@ X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
return std::make_pair(ResultVal, Chain);
}
SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
MachineFunction &MF = DAG.getMachineFunction();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
}
return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
}
@ -848,14 +867,7 @@ LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
if (Depth) // Depths > 0 not supported yet!
Result = DAG.getConstant(0, getPointerTy());
else {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
MachineFunction &MF = DAG.getMachineFunction();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
}
SDOperand RetAddrFI = DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
if (!isFrameAddress)
// Just load the return address
Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
@ -951,6 +963,8 @@ namespace {
/// tree.
std::map<SDOperand, unsigned> ExprMap;
/// TheDAG - The DAG being selected during Select* operations.
SelectionDAG *TheDAG;
public:
ISel(TargetMachine &TM) : SelectionDAGISel(X86Lowering), X86Lowering(TM) {
}
@ -974,7 +988,6 @@ namespace {
bool FloatPromoteOk = false);
void EmitFoldedLoad(SDOperand Op, X86AddressMode &AM);
bool TryToFoldLoadOpStore(SDNode *Node);
bool EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg);
void EmitCMP(SDOperand LHS, SDOperand RHS, bool isOnlyUse);
bool EmitBranchCC(MachineBasicBlock *Dest, SDOperand Chain, SDOperand Cond);
@ -985,6 +998,8 @@ namespace {
X86AddressMode SelectAddrExprs(const X86ISelAddressMode &IAM);
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM);
void SelectAddress(SDOperand N, X86AddressMode &AM);
bool EmitPotentialTailCall(SDNode *Node);
void EmitFastCCToFastCCTailCall(SDNode *TailCallNode);
void Select(SDOperand N);
};
}
@ -1063,9 +1078,13 @@ void ISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
// registers required to compute each node.
ComputeRegPressure(DAG.getRoot());
TheDAG = &DAG;
// Codegen the basic block.
Select(DAG.getRoot());
TheDAG = 0;
// Finally, look at all of the successors of this block. If any contain a PHI
// node of FP type, we need to insert an FP_REG_KILL in this block.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
@ -3645,6 +3664,266 @@ bool ISel::TryToFoldLoadOpStore(SDNode *Node) {
return true;
}
/// If node is a ret(tailcall) node, emit the specified tail call and return
/// true, otherwise return false.
///
/// FIXME: This whole thing should be a post-legalize optimization pass which
/// recognizes and transforms the dag. We don't want the selection phase doing
/// this stuff!!
///
bool ISel::EmitPotentialTailCall(SDNode *RetNode) {
assert(RetNode->getOpcode() == ISD::RET && "Not a return");
SDOperand Chain = RetNode->getOperand(0);
// If this is a token factor node where one operand is a call, dig into it.
SDOperand TokFactor;
unsigned TokFactorOperand = 0;
if (Chain.getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
if (Chain.getOperand(i).getOpcode() == ISD::CALLSEQ_END ||
Chain.getOperand(i).getOpcode() == X86ISD::TAILCALL) {
TokFactorOperand = i;
TokFactor = Chain;
Chain = Chain.getOperand(i);
break;
}
if (TokFactor.Val == 0) return false; // No call operand.
}
// Skip the CALLSEQ_END node if present.
if (Chain.getOpcode() == ISD::CALLSEQ_END)
Chain = Chain.getOperand(0);
// Is a tailcall the last control operation that occurs before the return?
if (Chain.getOpcode() != X86ISD::TAILCALL)
return false;
// If we return a value, is it the value produced by the call?
if (RetNode->getNumOperands() > 1) {
// Not returning the ret val of the call?
if (Chain.Val->getNumValues() == 1 ||
RetNode->getOperand(1) != Chain.getValue(1))
return false;
if (RetNode->getNumOperands() > 2) {
if (Chain.Val->getNumValues() == 2 ||
RetNode->getOperand(2) != Chain.getValue(2))
return false;
}
assert(RetNode->getNumOperands() <= 3);
}
// CalleeCallArgAmt - The total number of bytes used for the callee arg area.
// For FastCC, this will always be > 0.
unsigned CalleeCallArgAmt =
cast<ConstantSDNode>(Chain.getOperand(2))->getValue();
// CalleeCallArgPopAmt - The number of bytes in the call area popped by the
// callee. For FastCC this will always be > 0, for CCC this is always 0.
unsigned CalleeCallArgPopAmt =
cast<ConstantSDNode>(Chain.getOperand(3))->getValue();
// There are several cases we can handle here. First, if the caller and
// callee are both CCC functions, we can tailcall if the callee takes <= the
// number of argument bytes that the caller does.
if (CalleeCallArgPopAmt == 0 && // Callee is C CallingConv?
X86Lowering.getBytesToPopOnReturn() == 0) { // Caller is C CallingConv?
// Check to see if caller arg area size >= callee arg area size.
if (X86Lowering.getBytesCallerReserves() >= CalleeCallArgAmt) {
//std::cerr << "CCC TAILCALL UNIMP!\n";
// If TokFactor is non-null, emit all operands.
//EmitCCCToCCCTailCall(Chain.Val);
//return true;
}
return false;
}
// Second, if both are FastCC functions, we can always perform the tail call.
if (CalleeCallArgPopAmt && X86Lowering.getBytesToPopOnReturn()) {
// If TokFactor is non-null, emit all operands before the call.
if (TokFactor.Val) {
for (unsigned i = 0, e = TokFactor.getNumOperands(); i != e; ++i)
if (i != TokFactorOperand)
Select(TokFactor.getOperand(i));
}
EmitFastCCToFastCCTailCall(Chain.Val);
return true;
}
// We don't support mixed calls, due to issues with alignment. We could in
// theory handle some mixed calls from CCC -> FastCC if the stack is properly
// aligned (which depends on the number of arguments to the callee). TODO.
return false;
}
static SDOperand GetAdjustedArgumentStores(SDOperand Chain, int Offset,
SelectionDAG &DAG) {
MVT::ValueType StoreVT;
switch (Chain.getOpcode()) {
case ISD::CALLSEQ_START:
// If we found the start of the call sequence, we're done.
return Chain;
case ISD::TokenFactor: {
std::vector<SDOperand> Ops;
Ops.reserve(Chain.getNumOperands());
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
Ops.push_back(GetAdjustedArgumentStores(Chain.getOperand(i), Offset,DAG));
return DAG.getNode(ISD::TokenFactor, MVT::Other, Ops);
}
case ISD::STORE: // Normal store
StoreVT = Chain.getOperand(1).getValueType();
break;
case ISD::TRUNCSTORE: // FLOAT store
StoreVT = cast<MVTSDNode>(Chain)->getExtraValueType();
break;
}
SDOperand OrigDest = Chain.getOperand(2);
unsigned OrigOffset;
if (OrigDest.getOpcode() == ISD::CopyFromReg) {
OrigOffset = 0;
assert(cast<RegSDNode>(OrigDest)->getReg() == X86::ESP);
} else {
// We expect only (ESP+C)
assert(OrigDest.getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(OrigDest.getOperand(1)) &&
OrigDest.getOperand(0).getOpcode() == ISD::CopyFromReg &&
cast<RegSDNode>(OrigDest.getOperand(0))->getReg() == X86::ESP);
OrigOffset = cast<ConstantSDNode>(OrigDest.getOperand(1))->getValue();
}
// Compute the new offset from the incoming ESP value we wish to use.
unsigned NewOffset = OrigOffset + Offset;
unsigned OpSize = (MVT::getSizeInBits(StoreVT)+7)/8; // Bits -> Bytes
MachineFunction &MF = DAG.getMachineFunction();
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, NewOffset);
SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
SDOperand InChain = GetAdjustedArgumentStores(Chain.getOperand(0), Offset,
DAG);
if (Chain.getOpcode() == ISD::STORE)
return DAG.getNode(ISD::STORE, MVT::Other, InChain, Chain.getOperand(1),
FIN);
assert(Chain.getOpcode() == ISD::TRUNCSTORE);
return DAG.getNode(ISD::TRUNCSTORE, MVT::Other, InChain, Chain.getOperand(1),
FIN, DAG.getSrcValue(NULL), StoreVT);
}
/// EmitFastCCToFastCCTailCall - Given a tailcall in the tail position to a
/// fastcc function from a fastcc function, emit the code to emit a 'proper'
/// tail call.
void ISel::EmitFastCCToFastCCTailCall(SDNode *TailCallNode) {
unsigned CalleeCallArgSize =
cast<ConstantSDNode>(TailCallNode->getOperand(2))->getValue();
unsigned CallerArgSize = X86Lowering.getBytesToPopOnReturn();
//std::cerr << "****\n*** EMITTING TAIL CALL!\n****\n";
// Adjust argument stores. Instead of storing to [ESP], f.e., store to frame
// indexes that are relative to the incoming ESP. If the incoming and
// outgoing arg sizes are the same we will store to [InESP] instead of
// [CurESP] and the ESP referenced will be relative to the incoming function
// ESP.
int ESPOffset = CallerArgSize-CalleeCallArgSize;
SDOperand AdjustedArgStores =
GetAdjustedArgumentStores(TailCallNode->getOperand(0), ESPOffset, *TheDAG);
// Copy the return address of the caller into a virtual register so we don't
// clobber it.
SDOperand RetVal;
if (ESPOffset) {
SDOperand RetValAddr = X86Lowering.getReturnAddressFrameIndex(*TheDAG);
RetVal = TheDAG->getLoad(MVT::i32, TheDAG->getEntryNode(),
RetValAddr, TheDAG->getSrcValue(NULL));
SelectExpr(RetVal);
}
// Codegen all of the argument stores.
Select(AdjustedArgStores);
if (RetVal.Val) {
// Emit a store of the saved ret value to the new location.
MachineFunction &MF = TheDAG->getMachineFunction();
int ReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(4, ESPOffset-4);
SDOperand RetValAddr = TheDAG->getFrameIndex(ReturnAddrFI, MVT::i32);
Select(TheDAG->getNode(ISD::STORE, MVT::Other, TheDAG->getEntryNode(),
RetVal, RetValAddr));
}
// Get the destination value.
SDOperand Callee = TailCallNode->getOperand(1);
bool isDirect = isa<GlobalAddressSDNode>(Callee) ||
isa<ExternalSymbolSDNode>(Callee);
unsigned CalleeReg;
if (!isDirect) CalleeReg = SelectExpr(Callee);
unsigned RegOp1 = 0;
unsigned RegOp2 = 0;
if (TailCallNode->getNumOperands() > 4) {
// The first value is passed in (a part of) EAX, the second in EDX.
RegOp1 = SelectExpr(TailCallNode->getOperand(4));
if (TailCallNode->getNumOperands() > 5)
RegOp2 = SelectExpr(TailCallNode->getOperand(5));
switch (TailCallNode->getOperand(4).getValueType()) {
default: assert(0 && "Bad thing to pass in regs");
case MVT::i1:
case MVT::i8:
BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(RegOp1);
RegOp1 = X86::AL;
break;
case MVT::i16:
BuildMI(BB, X86::MOV16rr, 1,X86::AX).addReg(RegOp1);
RegOp1 = X86::AX;
break;
case MVT::i32:
BuildMI(BB, X86::MOV32rr, 1,X86::EAX).addReg(RegOp1);
RegOp1 = X86::EAX;
break;
}
if (RegOp2)
switch (TailCallNode->getOperand(5).getValueType()) {
default: assert(0 && "Bad thing to pass in regs");
case MVT::i1:
case MVT::i8:
BuildMI(BB, X86::MOV8rr, 1, X86::DL).addReg(RegOp2);
RegOp2 = X86::DL;
break;
case MVT::i16:
BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(RegOp2);
RegOp2 = X86::DX;
break;
case MVT::i32:
BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RegOp2);
RegOp2 = X86::EDX;
break;
}
}
// Adjust ESP.
if (ESPOffset)
BuildMI(BB, X86::ADJSTACKPTRri, 2,
X86::ESP).addReg(X86::ESP).addImm(ESPOffset);
// TODO: handle jmp [mem]
if (!isDirect) {
BuildMI(BB, X86::TAILJMPr, 1).addReg(CalleeReg);
} else if (GlobalAddressSDNode *GASD = dyn_cast<GlobalAddressSDNode>(Callee)){
BuildMI(BB, X86::TAILJMPd, 1).addGlobalAddress(GASD->getGlobal(),true);
} else {
ExternalSymbolSDNode *ESSDN = cast<ExternalSymbolSDNode>(Callee);
BuildMI(BB, X86::TAILJMPd, 1).addExternalSymbol(ESSDN->getSymbol(), true);
}
// ADD IMPLICIT USE RegOp1/RegOp2's
}
void ISel::Select(SDOperand N) {
unsigned Tmp1, Tmp2, Opc;
@ -3698,6 +3977,12 @@ void ISel::Select(SDOperand N) {
}
return;
case ISD::RET:
if (N.getOperand(0).getOpcode() == ISD::CALLSEQ_END ||
N.getOperand(0).getOpcode() == X86ISD::TAILCALL ||
N.getOperand(0).getOpcode() == ISD::TokenFactor)
if (EmitPotentialTailCall(Node))
return;
switch (N.getNumOperands()) {
default:
assert(0 && "Unknown return instruction!");