diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 9b4c8af95dc3..5f8c1c2ae4fb 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1033,6 +1033,36 @@ public: return false; } + /// CheckTailCallReturnConstraints - Check whether CALL node immediatly + /// preceeds the RET node and whether the return uses the result of the node + /// or is a void return. This function can be used by the target to determine + /// eligiblity of tail call optimization. + static bool CheckTailCallReturnConstraints(SDOperand Call, SDOperand Ret) { + unsigned NumOps = Ret.getNumOperands(); + if ((NumOps == 1 && + (Ret.getOperand(0) == SDOperand(Call.Val,1) || + Ret.getOperand(0) == SDOperand(Call.Val,0))) || + (NumOps > 1 && + Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && + Ret.getOperand(1) == SDOperand(Call.Val,0))) + return true; + return false; + } + + /// GetPossiblePreceedingTailCall - Get preceeding TailCallNodeOpCode node if + /// it exists skip possible ISD:TokenFactor. + static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain, + unsigned TailCallNodeOpCode) { + if (Chain.getOpcode() == TailCallNodeOpCode) { + return Chain; + } else if (Chain.getOpcode() == ISD::TokenFactor) { + if (Chain.getNumOperands() && + Chain.getOperand(0).getOpcode() == TailCallNodeOpCode) + return Chain.getOperand(0); + } + return Chain; + } + /// CustomPromoteOperation - This callback is invoked for operations that are /// unsupported by the target, are registered to use 'custom' lowering, and /// whose type needs to be promoted. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 530ffd754b43..c5911e5d660a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -4612,6 +4612,40 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, } } +/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and +/// whether object offset >= 0. +static bool +IsFixedFrameObjectWithPosOffset(MachineFrameInfo * MFI, SDOperand Op) { + if (!isa(Op)) return false; + + FrameIndexSDNode * FrameIdxNode = dyn_cast(Op); + int FrameIdx = FrameIdxNode->getIndex(); + return MFI->isFixedObjectIndex(FrameIdx) && + MFI->getObjectOffset(FrameIdx) >= 0; +} + +/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could +/// possibly be overwritten when lowering the outgoing arguments in a tail +/// call. Currently the implementation of this call is very conservative and +/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with +/// virtual registers would be overwritten by direct lowering. +static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op, + MachineFrameInfo * MFI) { + RegisterSDNode * OpReg = NULL; + if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || + (Op.getOpcode()== ISD::CopyFromReg && + (OpReg = dyn_cast(Op.getOperand(1))) && + (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || + (Op.getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || + (Op.getOpcode() == ISD::MERGE_VALUES && + Op.getOperand(Op.ResNo).getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.ResNo). + getOperand(1)))) + return true; + return false; +} + /// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the /// DAG and fixes their tailcall attribute operand. static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, @@ -4636,19 +4670,51 @@ static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, // eligible (no RET or the target rejects) the attribute is fixed to // false. The TargetLowering::IsEligibleForTailCallOptimization function // must correctly identify tail call optimizable calls. - if (isMarkedTailCall && - (Ret==NULL || - !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) { + if (!isMarkedTailCall) continue; + if (Ret==NULL || + !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG)) { + // Not eligible. Mark CALL node as non tail call. SmallVector Ops; unsigned idx=0; - for(SDNode::op_iterator I =OpCall.Val->op_begin(), - E=OpCall.Val->op_end(); I!=E; I++, idx++) { + for(SDNode::op_iterator I =OpCall.Val->op_begin(), + E = OpCall.Val->op_end(); I != E; I++, idx++) { if (idx!=3) Ops.push_back(*I); - else + else Ops.push_back(DAG.getConstant(false, TLI.getPointerTy())); } DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); + } else { + // Look for tail call clobbered arguments. Emit a series of + // copyto/copyfrom virtual register nodes to protect them. + SmallVector Ops; + SDOperand Chain = OpCall.getOperand(0), InFlag; + unsigned idx=0; + for(SDNode::op_iterator I = OpCall.Val->op_begin(), + E = OpCall.Val->op_end(); I != E; I++, idx++) { + SDOperand Arg = *I; + if (idx > 4 && (idx % 2)) { + bool isByVal = cast(OpCall.getOperand(idx+1))-> + getArgFlags().isByVal(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!isByVal && + IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { + MVT::ValueType VT = Arg.getValueType(); + unsigned VReg = MF.getRegInfo(). + createVirtualRegister(TLI.getRegClassFor(VT)); + Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag); + InFlag = Chain.getValue(1); + Arg = DAG.getCopyFromReg(Chain, VReg, VT, InFlag); + Chain = Arg.getValue(1); + InFlag = Arg.getValue(2); + } + } + Ops.push_back(Arg); + } + // Link in chain of CopyTo/CopyFromReg. + Ops[0] = Chain; + DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); } } } diff --git a/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp index 50c9664fbc98..c0621a074f34 100644 --- a/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -143,7 +143,8 @@ int PPCCodeEmitter::getMachineOpValue(MachineInstr &MI, MachineOperand &MO) { MO.isConstantPoolIndex() || MO.isJumpTableIndex()) { unsigned Reloc = 0; if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho || - MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF) + MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF || + MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8) Reloc = PPC::reloc_pcrel_bx; else { if (TM.getRelocationModel() == Reloc::PIC_) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d3f410ed6781..37b8e7bb114e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -26,9 +26,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" +#include "llvm/ParameterAttributes.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -412,6 +414,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; + case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; + case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; } } @@ -1317,6 +1321,20 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { return FPR; } +/// CalculateStackSlotSize - Calculates the size reserved for this argument on +/// the stack. +static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag, + bool isVarArg, unsigned PtrByteSize) { + MVT::ValueType ArgVT = Arg.getValueType(); + ISD::ArgFlagsTy Flags = cast(Flag)->getArgFlags(); + unsigned ArgSize =MVT::getSizeInBits(ArgVT)/8; + if (Flags.isByVal()) + ArgSize = Flags.getByValSize(); + ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + + return ArgSize; +} + SDOperand PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, @@ -1338,10 +1356,15 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, bool isPPC64 = PtrVT == MVT::i64; bool isMachoABI = Subtarget.isMachoABI(); bool isELF32_ABI = Subtarget.isELF32_ABI(); + // Potential tail calls could cause overwriting of argument stack slots. + unsigned CC = MF.getFunction()->getCallingConv(); + bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); - + // Area that is at least reserved in caller of this function. + unsigned MinReservedArea = ArgOffset; + static const unsigned GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1426,7 +1449,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, // even GPR_idx value or to an even ArgOffset value. SmallVector MemOps; - + unsigned nAltivecParamsAtEnd = 0; for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { SDOperand ArgVal; bool needsLoad = false; @@ -1440,6 +1463,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, unsigned CurArgOffset = ArgOffset; + // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. + if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + if (isVarArg || isPPC64) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + Op.getOperand(ArgNo+3), + isVarArg, + PtrByteSize); + } else nAltivecParamsAtEnd++; + } else + // Calculate min reserved area. + MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + Op.getOperand(ArgNo+3), + isVarArg, + PtrByteSize); + // FIXME alignment for ELF may not be right // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. @@ -1614,7 +1654,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, // that we ran out of physical registers of the appropriate type. if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, - CurArgOffset + (ArgSize - ObjSize)); + CurArgOffset + (ArgSize - ObjSize), + isImmutable); SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); } @@ -1622,6 +1663,25 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, ArgValues.push_back(ArgVal); } + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo(); + // Add the Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + MinReservedArea = + std::max(MinReservedArea, + PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + FI->setMinReservedArea(MinReservedArea); + // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { @@ -1720,6 +1780,131 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); } +/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus +/// linkage area. +static unsigned +CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, + bool isPPC64, + bool isMachoABI, + bool isVarArg, + unsigned CC, + SDOperand Call, + unsigned &nAltivecParamsAtEnd) { + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with 24/48 bytes, which is + // prereserved space for [SP][CR][LR][3 x unused]. + unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned NumOps = (Call.getNumOperands() - 5) / 2; + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + // Add up all the space actually used. + // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually + // they all go in registers, but we must reserve stack space for them for + // possible use by the caller. In varargs or 64-bit calls, parameters are + // assigned stack space in order, with padding so Altivec parameters are + // 16-byte aligned. + nAltivecParamsAtEnd = 0; + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Call.getOperand(5+2*i); + SDOperand Flag = Call.getOperand(5+2*i+1); + MVT::ValueType ArgVT = Arg.getValueType(); + // Varargs Altivec parameters are padded to a 16 byte boundary. + if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || + ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { + if (!isVarArg && !isPPC64) { + // Non-varargs Altivec parameters go after all the non-Altivec + // parameters; handle those later so we know how much padding we need. + nAltivecParamsAtEnd++; + continue; + } + // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. + NumBytes = ((NumBytes+15)/16)*16; + } + NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize); + } + + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + NumBytes = ((NumBytes+15)/16)*16; + NumBytes += 16*nAltivecParamsAtEnd; + } + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, + PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + + // Tail call needs the stack to be aligned. + if (CC==CallingConv::Fast && PerformTailCallOpt) { + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + NumBytes = (NumBytes + AlignMask) & ~AlignMask; + } + + return NumBytes; +} + +/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be +/// adjusted to accomodate the arguments for the tailcall. +static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, + unsigned ParamSize) { + + if (!IsTailCall) return 0; + + PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); + unsigned CallerMinReservedArea = FI->getMinReservedArea(); + int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; + // Remember only if the new adjustement is bigger. + if (SPDiff < FI->getTailCallSPDelta()) + FI->setTailCallSPDelta(SPDiff); + + return SPDiff; +} + +/// IsEligibleForTailCallElimination - Check to see whether the next instruction +/// following the call is a return. A function is eligible if caller/callee +/// calling conventions match, currently only fastcc supports tail calls, and +/// the function CALL is immediatly followed by a RET. +bool +PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG& DAG) const { + // Variable argument functions are not supported. + if (!PerformTailCallOpt || + cast(Call.getOperand(2))->getValue() != 0) return false; + + if (CheckTailCallReturnConstraints(Call, Ret)) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CallerCC = MF.getFunction()->getCallingConv(); + unsigned CalleeCC = cast(Call.getOperand(1))->getValue(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + // Functions containing by val parameters are not supported. + for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) { + ISD::ArgFlagsTy Flags = cast(Call.getOperand(5+2*i+1)) + ->getArgFlags(); + if (Flags.isByVal()) return false; + } + + SDOperand Callee = Call.getOperand(4); + // Non PIC/GOT tail calls are supported. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return true; + + // At the moment we can only do local tail calls (in same module, hidden + // or protected) if we are generating PIC. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + return G->getGlobal()->hasHiddenVisibility() + || G->getGlobal()->hasProtectedVisibility(); + } + } + + return false; +} + /// isCallCompatibleAddress - Return the immediate to use if the specified /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { @@ -1735,6 +1920,102 @@ static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { DAG.getTargetLoweringInfo().getPointerTy()).Val; } +struct TailCallArgumentInfo { + SDOperand Arg; + SDOperand FrameIdxOp; + int FrameIdx; + + TailCallArgumentInfo() : FrameIdx(0) {} +}; + +/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. +static void +StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, + SDOperand Chain, + const SmallVector &TailCallArgs, + SmallVector &MemOpChains) { + for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { + SDOperand Arg = TailCallArgs[i].Arg; + SDOperand FIN = TailCallArgs[i].FrameIdxOp; + int FI = TailCallArgs[i].FrameIdx; + // Store relative to framepointer. + MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, + PseudoSourceValue::getFixedStack(), + FI)); + } +} + +/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to +/// the appropriate stack slot for the tail call optimized function call. +static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, + MachineFunction &MF, + SDOperand Chain, + SDOperand OldRetAddr, + SDOperand OldFP, + int SPDiff, + bool isPPC64, + bool isMachoABI) { + if (SPDiff) { + // Calculate the new stack slot for the return address. + int SlotSize = isPPC64 ? 8 : 4; + int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, + isMachoABI); + int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, + NewRetAddrLoc); + int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, + isMachoABI); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + + MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32; + SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); + Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx, + PseudoSourceValue::getFixedStack(), NewRetAddr); + SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); + Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx, + PseudoSourceValue::getFixedStack(), NewFPIdx); + } + return Chain; +} + +/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate +/// the position of the argument. +static void +CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, + SDOperand Arg, int SPDiff, unsigned ArgOffset, + SmallVector& TailCallArguments) { + int Offset = ArgOffset + SPDiff; + uint32_t OpSize = (MVT::getSizeInBits(Arg.getValueType())+7)/8; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + MVT::ValueType VT = isPPC64 ? MVT::i64 : MVT::i32; + SDOperand FIN = DAG.getFrameIndex(FI, VT); + TailCallArgumentInfo Info; + Info.Arg = Arg; + Info.FrameIdxOp = FIN; + Info.FrameIdx = FI; + TailCallArguments.push_back(Info); +} + +/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address +/// stack slot. Returns the chain as result and the loaded frame pointers in +/// LROpOut/FPOpout. Used when tail calling. +SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDOperand Chain, + SDOperand &LROpOut, + SDOperand &FPOpOut) { + if (SPDiff) { + // Load the LR and FP stack slot for later adjusting. + MVT::ValueType VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + LROpOut = getReturnAddrFrameIndex(DAG); + LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0); + Chain = SDOperand(LROpOut.Val, 1); + FPOpOut = getFramePointerFrameIndex(DAG); + FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0); + Chain = SDOperand(FPOpOut.Val, 1); + } + return Chain; +} + /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as @@ -1750,11 +2031,39 @@ CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, NULL, 0, NULL, 0); } +/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of +/// tail calls. +static void +LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain, + SDOperand Arg, SDOperand PtrOff, int SPDiff, + unsigned ArgOffset, bool isPPC64, bool isTailCall, + bool isVector, SmallVector &MemOpChains, + SmallVector& TailCallArguments) { + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + if (!isTailCall) { + if (isVector) { + SDOperand StackPtr; + if (isPPC64) + StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + else + StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + } + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + // Calculate and remember argument location. + } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, + TailCallArguments); +} + SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget, TargetMachine &TM) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + unsigned CC = cast(Op.getOperand(1))->getValue(); + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0 && + CC == CallingConv::Fast && PerformTailCallOpt; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; @@ -1765,58 +2074,32 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; + MachineFunction &MF = DAG.getMachineFunction(); + // args_to_use will accumulate outgoing args for the PPCISD::CALL case in // SelectExpr to use to put the arguments in the appropriate registers. std::vector args_to_use; + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (PerformTailCallOpt && CC==CallingConv::Fast) + MF.getInfo()->setHasFastCall(); + + unsigned nAltivecParamsAtEnd = 0; + // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned NumBytes = + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, + Op, nAltivecParamsAtEnd); - // Add up all the space actually used. - // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually - // they all go in registers, but we must reserve stack space for them for - // possible use by the caller. In varargs or 64-bit calls, parameters are - // assigned stack space in order, with padding so Altivec parameters are - // 16-byte aligned. - unsigned nAltivecParamsAtEnd = 0; - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - MVT::ValueType ArgVT = Arg.getValueType(); - if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || - ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { - if (!isVarArg && !isPPC64) { - // Non-varargs Altivec parameters go after all the non-Altivec parameters; - // do those last so we know how much padding we need. - nAltivecParamsAtEnd++; - continue; - } else { - // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. - NumBytes = ((NumBytes+15)/16)*16; - } - } - ISD::ArgFlagsTy Flags = - cast(Op.getOperand(5+2*i+1))->getArgFlags(); - unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8; - if (Flags.isByVal()) - ArgSize = Flags.getByValSize(); - ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; - NumBytes += ArgSize; - } - // Allow for Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - NumBytes = ((NumBytes+15)/16)*16; - NumBytes += 16*nAltivecParamsAtEnd; - } - - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - NumBytes = std::max(NumBytes, - PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass @@ -1824,6 +2107,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, DAG.getConstant(NumBytes, PtrVT)); SDOperand CallSeqStart = Chain; + // Load the return address and frame pointer so it can be move somewhere else + // later. + SDOperand LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp); + // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. @@ -1861,6 +2149,8 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; std::vector > RegsToPass; + SmallVector TailCallArguments; + SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { bool inMem = false; @@ -1959,7 +2249,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, if (GPR_idx != NumGPRs) { RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, false, MemOpChains, + TailCallArguments); inMem = true; } if (inMem || isMachoABI) { @@ -2007,7 +2299,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, } } } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, false, MemOpChains, + TailCallArguments); inMem = true; } if (inMem || isMachoABI) { @@ -2058,6 +2352,7 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, } break; } + // Non-varargs Altivec params generally go in registers, but have // stack space allocated at the end. if (VR_idx != NumVRs) { @@ -2065,10 +2360,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); } else if (nAltivecParamsAtEnd==0) { // We are emitting Altivec params in order. - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, - DAG.getConstant(ArgOffset, PtrVT)); - SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); - MemOpChains.push_back(Store); + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, true, MemOpChains, + TailCallArguments); ArgOffset += 16; } break; @@ -2090,10 +2384,11 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { - SDOperand PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, - DAG.getConstant(ArgOffset, PtrVT)); - SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); - MemOpChains.push_back(Store); + SDOperand PtrOff; + // We are emitting Altivec params in order. + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, true, MemOpChains, + TailCallArguments); ArgOffset += 16; } } @@ -2120,6 +2415,37 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, InFlag = Chain.getValue(1); } + // Emit a sequence of copyto/copyfrom virtual registers for arguments that + // might overwrite each other in case of tail call optimization. + if (isTailCall) { + SmallVector MemOpChains2; + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDOperand(); + StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, + MemOpChains2); + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, + isPPC64, isMachoABI); + } + + // Emit callseq_end just before tailcall node. + if (isTailCall) { + SmallVector CallSeqOps; + SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + CallSeqOps.push_back(Chain); + CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes)); + CallSeqOps.push_back(DAG.getIntPtrConstant(0)); + if (InFlag.Val) + CallSeqOps.push_back(InFlag); + Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0], + CallSeqOps.size()); + InFlag = Chain.getValue(1); + } + std::vector NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. @@ -2157,6 +2483,9 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, Ops.push_back(Chain); CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; Callee.Val = 0; + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); } // If this is a direct call, pass the chain and the callee. @@ -2164,29 +2493,48 @@ SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, Ops.push_back(Chain); Ops.push_back(Callee); } - + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - + + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCRegisterInfo::eliminateCallFramePseudoInstr. + int BytesCalleePops = + (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + if (InFlag.Val) Ops.push_back(InFlag); + + // Emit tail call. + if (isTailCall) { + assert(InFlag.Val && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(PPCISD::TAILCALL, + Op.Val->getVTList(), &Ops[0], Ops.size()); + return SDOperand(Chain.Val, Op.ResNo); + } + Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, PtrVT), - DAG.getConstant(0, PtrVT), + DAG.getConstant(BytesCalleePops, PtrVT), InFlag); if (Op.Val->getValueType(0) != MVT::Other) InFlag = Chain.getValue(1); SmallVector ResultVals; SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); + unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); + CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC); // Copy all of the result registers out of their specified physreg. @@ -2226,6 +2574,36 @@ SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG, } SDOperand Chain = Op.getOperand(0); + + Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); + if (Chain.getOpcode() == PPCISD::TAILCALL) { + SDOperand TailCall = Chain; + SDOperand TargetAddress = TailCall.getOperand(1); + SDOperand StackAdjustment = TailCall.getOperand(2); + + assert(((TargetAddress.getOpcode() == ISD::Register && + cast(TargetAddress)->getReg() == PPC::CTR) || + TargetAddress.getOpcode() == ISD::TargetExternalSymbol || + TargetAddress.getOpcode() == ISD::TargetGlobalAddress || + isa(TargetAddress)) && + "Expecting an global address, external symbol, absolute value or register"); + + assert(StackAdjustment.getOpcode() == ISD::Constant && + "Expecting a const value"); + + SmallVector Operands; + Operands.push_back(Chain.getOperand(0)); + Operands.push_back(TargetAddress); + Operands.push_back(StackAdjustment); + // Copy registers used by the call. Last operand is a flag so it is not + // copied. + for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { + Operands.push_back(Chain.getOperand(i)); + } + return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0], + Operands.size()); + } + SDOperand Flag; // Copy the result values into the output registers. @@ -2268,18 +2646,44 @@ SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); } -SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, - SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { + + +SDOperand +PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool IsPPC64 = Subtarget.isPPC64(); - bool isMachoABI = Subtarget.isMachoABI(); + bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isMachoABI = PPCSubTarget.isMachoABI(); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo(); + int RASI = FI->getReturnAddrSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!RASI) { + // Find out what the fix offset of the frame pointer save area. + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); + // Allocate the frame index for frame pointer save area. + RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); + // Save the result. + FI->setReturnAddrSaveIndex(RASI); + } + return DAG.getFrameIndex(RASI, PtrVT); +} + +SDOperand +PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isMachoABI = PPCSubTarget.isMachoABI(); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int FPSI = FI->getFramePointerSaveIndex(); - + // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. @@ -2290,7 +2694,12 @@ SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, // Save the result. FI->setFramePointerSaveIndex(FPSI); } + return DAG.getFrameIndex(FPSI, PtrVT); +} +SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, + SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { // Get the inputs. SDOperand Chain = Op.getOperand(0); SDOperand Size = Op.getOperand(1); @@ -2301,7 +2710,7 @@ SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, DAG.getConstant(0, PtrVT), Size); // Construct a node for the frame pointer save index. - SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT); + SDOperand FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNALLOC node. SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); @@ -4099,25 +4508,13 @@ SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); - int RAIdx = FuncInfo->getReturnAddrSaveIndex(); - if (RAIdx == 0) { - bool isPPC64 = PPCSubTarget.isPPC64(); - int Offset = - PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI()); - // Set up a frame object for the return address. - RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset); - - // Remember it for next time. - FuncInfo->setReturnAddrSaveIndex(RAIdx); - - // Make sure the function really does not optimize away the store of the RA - // to the stack. - FuncInfo->setLRStoreRequired(); - } - // Just load the return address off the stack. - SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy()); + SDOperand RetAddrFI = getReturnAddrFrameIndex(DAG); + + // Make sure the function really does not optimize away the store of the RA + // to the stack. + FuncInfo->setLRStoreRequired(); return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 8ea419d0ea58..d28799ad582e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -162,7 +162,16 @@ namespace llvm { /// CMP_UNRESERVE = Test for equality and "unreserve" if not true. This /// is used to implement atomic operations. - CMP_UNRESERVE + CMP_UNRESERVE, + + /// TAILCALL - Indicates a tail call should be taken. + TAILCALL, + /// TC_RETURN - A tail call return. + /// operand #0 chain + /// operand #1 callee (register or absolute) + /// operand #2 stack adjustment + /// operand #3 optional in flag + TC_RETURN }; } @@ -308,11 +317,27 @@ namespace llvm { /// the offset of the target addressing mode. virtual bool isLegalAddressImmediate(GlobalValue *GV) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Target which want to do tail call + /// optimization should implement this function. + virtual bool IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG &DAG) const; + private: /// PPCAtomicLabelIndex - Keep track the number of PPC atomic labels. /// unsigned PPCAtomicLabelIndex; + SDOperand getFramePointerFrameIndex(SelectionDAG & DAG) const; + SDOperand getReturnAddrFrameIndex(SelectionDAG & DAG) const; + + SDOperand EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDOperand Chain, + SDOperand &LROpOut, + SDOperand &FPOpOut); + SDOperand LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG); SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG); diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 0b23c67f94b2..1c47bfbc29e7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -116,7 +116,6 @@ def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)), def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; - // Atomic operations. def LDARX : Pseudo<(outs G8RC:$rD), (ins memrr:$ptr, i32imm:$label), "\nLa${label}_entry:\n\tldarx $rD, $ptr", @@ -135,6 +134,53 @@ def CMP_UNRESdi : Pseudo<(outs), (ins G8RC:$rA, s16imm64:$imm, i32imm:$label), [(PPCcmp_unres G8RC:$rA, immSExt16:$imm, imm:$label)]>; } +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi8 :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd8 $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa8 $func $offset", + [(PPCtc_return (i64 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr8 $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1 in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; + + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1 in +def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1 in +def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + +def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), + (TCRETURNdi8 texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), + (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; + + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a765494eec8e..04968edf34de 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -42,6 +42,7 @@ def SDT_PPCstbrx : SDTypeProfile<0, 4, [ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> ]>; + def SDT_PPClarx : SDTypeProfile<1, 2, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32> ]>; @@ -52,6 +53,10 @@ def SDT_PPCcmp_unres : SDTypeProfile<0, 3, [ SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32> ]>; +def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -121,6 +126,12 @@ def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone, def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; +def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, + [SDNPHasChain, SDNPOptInFlag]>; + +def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; @@ -453,6 +464,46 @@ let isCall = 1, PPC970_Unit = 7, [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>; } + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa $func $offset", + [(PPCtc_return (i32 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1 in +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In32BitMode]>; + + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1 in +def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1 in +def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + + // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, @@ -1211,6 +1262,18 @@ def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)), def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)), (BL_ELF texternalsym:$dst)>; + +def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), + (TCRETURNdi texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), + (TCRETURNri CTRRC:$dst, imm:$imm)>; + + + // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>; diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 5478455ed49e..230ea0b0e046 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -43,19 +43,42 @@ private: /// requires that the code generator produce a store of LR to the stack on /// entry, even though LR may otherwise apparently not be used. bool LRStoreRequired; + + /// MinReservedArea - This is the frame size that is at least reserved in a + /// potential caller (parameter+linkage area). + unsigned MinReservedArea; + + /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum + /// amount the stack pointer is adjusted to make the frame bigger for tail + /// calls. Used for creating an area before the register spill area. + int TailCallSPDelta; + + /// HasFastCall - Does this function contain a fast call. Used to determine + /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). + bool HasFastCall; + public: PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), SpillsCR(false), - LRStoreRequired(false) {} + LRStoreRequired(false), + MinReservedArea(0), + TailCallSPDelta(0), + HasFastCall(false) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } - + + unsigned getMinReservedArea() const { return MinReservedArea; } + void setMinReservedArea(unsigned size) { MinReservedArea = size; } + + int getTailCallSPDelta() const { return TailCallSPDelta; } + void setTailCallSPDelta(int size) { TailCallSPDelta = size; } + /// UsesLR - This is set when the prolog/epilog inserter does its initial scan /// of the function, it is true if the LR/LR8 register is ever explicitly /// accessed/clobbered in the machine function (e.g. by calls and movpctolr, @@ -68,6 +91,9 @@ public: void setLRStoreRequired() { LRStoreRequired = true; } bool isLRStoreRequired() const { return LRStoreRequired; } + + void setHasFastCall() { HasFastCall = true; } + bool hasFastCall() const { return HasFastCall;} }; } // end of namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 984021068b2f..d8ea207ecab8 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "PPCRegisterInfo.h" #include "PPCFrameInfo.h" #include "PPCSubtarget.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Type.h" @@ -332,7 +333,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { // static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return NoFramePointerElim || MFI->hasVarSizedObjects() || + (PerformTailCallOpt && MF.getInfo()->hasFastCall()); } static bool spillsCR(const MachineFunction &MF) { @@ -399,9 +401,42 @@ static bool MustSaveLR(const MachineFunction &MF) { MF.getFrameInfo()->hasCalls(); } + + void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually substract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + MachineInstr * New = NULL; + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + + if (isInt16(CalleeAmt)) { + New = BuildMI(TII.get(ADDIInstr), StackReg).addReg(StackReg). + addImm(CalleeAmt); + MBB.insert(I, New); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, false, false, true) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(StackReg) + .addReg(TmpReg); + } + } + } // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. MBB.erase(I); } @@ -924,6 +959,13 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FI->setFramePointerSaveIndex(FPSI); } + // Reserve stack space to move the linkage area to in case of a tail call. + int TCSPDelta = 0; + if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) { + int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0; + MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta, + AddFPOffsetAmount + TCSPDelta); + } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. @@ -1160,7 +1202,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert(MBBI->getOpcode() == PPC::BLR && + unsigned RetOpcode = MBBI->getOpcode(); + + assert( (RetOpcode == PPC::BLR || + RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8) && "Can only insert epilog into returning blocks"); // Get alignment info so we know how to restore r1 @@ -1169,7 +1219,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, unsigned MaxAlign = MFI->getMaxAlignment(); // Get the number of bytes allocated from the FrameInfo. - unsigned FrameSize = MFI->getStackSize(); + int FrameSize = MFI->getStackSize(); // Get processor type. bool IsPPC64 = Subtarget.isPPC64(); @@ -1183,19 +1233,75 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8; + + PPCFunctionInfo *FI = MF.getInfo(); + + if (UsesTCRet) { + int MaxTCRetDelta = FI->getTailCallSPDelta(); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert( StackAdjust.isImmediate() && "Expecting immediate value."); + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int Delta = StackAdj - MaxTCRetDelta; + assert((Delta >= 0) && "Delta must be positive"); + if (MaxTCRetDelta>0) + FrameSize += (StackAdj +Delta); + else + FrameSize += StackAdj; + } + if (FrameSize) { // The loaded (or persistent) stack pointer value is offset by the 'stwu' // on entry to the function. Add this offset back now. - if (!Subtarget.isPPC64()) { - if (isInt16(FrameSize) && (!ALIGN_STACK || TargetAlign >= MaxAlign) && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R1).addImm(FrameSize); + if (!IsPPC64) { + // If this function contained a fastcc call and PerformTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall() && isInt16(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, TII.get(PPC::LIS), PPC::R0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, false, false, true) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, TII.get(PPC::ADD4)) + .addReg(PPC::R1) + .addReg(PPC::R31) + .addReg(PPC::R0); + } else if (isInt16(FrameSize) && + (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R1).addImm(FrameSize); } else { BuildMI(MBB, MBBI, TII.get(PPC::LWZ),PPC::R1).addImm(0).addReg(PPC::R1); } } else { - if (isInt16(FrameSize) && TargetAlign >= MaxAlign && + if (FI->hasFastCall() && isInt16(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, TII.get(PPC::LIS8), PPC::X0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, false, false, true) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, TII.get(PPC::ADD8)) + .addReg(PPC::X1) + .addReg(PPC::X31) + .addReg(PPC::X0); + } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X1).addImm(FrameSize); @@ -1228,6 +1334,64 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, if (UsesLR) BuildMI(MBB, MBBI, TII.get(PPC::MTLR)).addReg(PPC::R0); } + + // Callee pop calling convention. Pop parameter/linkage area. Used for tail + // call optimization + if (PerformTailCallOpt && RetOpcode == PPC::BLR && + MF.getFunction()->getCallingConv() == CallingConv::Fast) { + PPCFunctionInfo *FI = MF.getInfo(); + unsigned CallerAllocatedAmt = FI->getMinReservedArea(); + unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI; + + if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) { + BuildMI(MBB, MBBI, TII.get(ADDIInstr), StackReg) + .addReg(StackReg).addImm(CallerAllocatedAmt); + } else { + BuildMI(MBB, MBBI, TII.get(LISInstr), TmpReg) + .addImm(CallerAllocatedAmt >> 16); + BuildMI(MBB, MBBI, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, false, false, true) + .addImm(CallerAllocatedAmt & 0xFFFF); + BuildMI(MBB, MBBI, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(FPReg) + .addReg(TmpReg); + } + } else if (RetOpcode == PPC::TCRETURNdi) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + assert(JumpTarget.isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + assert(JumpTarget.isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } } unsigned PPCRegisterInfo::getRARegister() const { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index dba17fa9f58a..86a8c5cef852 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -346,4 +346,6 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, let CopyCost = -1; } - + +def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; +def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 799ba007fa19..8560dfe463c8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -788,19 +788,6 @@ SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, #include "X86GenCallingConv.inc" -/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it -/// exists skip possible ISD:TokenFactor. -static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { - if (Chain.getOpcode() == X86ISD::TAILCALL) { - return Chain; - } else if (Chain.getOpcode() == ISD::TokenFactor) { - if (Chain.getNumOperands() && - Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL) - return Chain.getOperand(0); - } - return Chain; -} - /// LowerRET - Lower an ISD::RET node. SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); @@ -821,7 +808,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { SDOperand Chain = Op.getOperand(0); // Handle tail call return. - Chain = GetPossiblePreceedingTailCall(Chain); + Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); if (Chain.getOpcode() == X86ISD::TAILCALL) { SDOperand TailCall = Chain; SDOperand TargetAddress = TailCall.getOperand(1); @@ -1057,27 +1044,6 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) { return None; } -/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could -/// possibly be overwritten when lowering the outgoing arguments in a tail -/// call. Currently the implementation of this call is very conservative and -/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with -/// virtual registers would be overwritten by direct lowering. -static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op, - MachineFrameInfo * MFI) { - RegisterSDNode * OpReg = NULL; - FrameIndexSDNode * FrameIdxNode = NULL; - int FrameIdx = 0; - if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || - (Op.getOpcode()== ISD::CopyFromReg && - (OpReg = dyn_cast(Op.getOperand(1))) && - (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || - (Op.getOpcode() == ISD::LOAD && - (FrameIdxNode = dyn_cast(Op.getOperand(1))) && - (MFI->isFixedObjectIndex((FrameIdx = FrameIdxNode->getIndex()))) && - (MFI->getObjectOffset(FrameIdx) >= 0))) - return true; - return false; -} /// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer /// in a register before calling. @@ -1087,7 +1053,6 @@ bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { Subtarget->isPICStyleGOT(); } - /// CallRequiresFnAddressInReg - Check whether the call requires the function /// address to be loaded in a register. bool @@ -1097,33 +1062,6 @@ X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { Subtarget->isPICStyleGOT(); } -/// CopyTailCallClobberedArgumentsToVRegs - Create virtual registers for all -/// arguments to force loading and guarantee that arguments sourcing from -/// incomming parameters are not overwriting each other. -static SDOperand -CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain, - SmallVector, 8> &TailCallClobberedVRegs, - SelectionDAG &DAG, - MachineFunction &MF, - const TargetLowering * TL) { - - SDOperand InFlag; - for (unsigned i = 0, e = TailCallClobberedVRegs.size(); i != e; i++) { - SDOperand Arg = TailCallClobberedVRegs[i].second; - unsigned Idx = TailCallClobberedVRegs[i].first; - unsigned VReg = - MF.getRegInfo(). - createVirtualRegister(TL->getRegClassFor(Arg.getValueType())); - Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag); - InFlag = Chain.getValue(1); - Arg = DAG.getCopyFromReg(Chain, VReg, Arg.getValueType(), InFlag); - TailCallClobberedVRegs[i] = std::make_pair(Idx, Arg); - Chain = Arg.getValue(1); - InFlag = Arg.getValue(2); - } - return Chain; -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1133,8 +1071,7 @@ CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), - /*AlwaysInline=*/true, - NULL, 0, NULL, 0); + /*AlwaysInline=*/true, NULL, 0, NULL, 0); } SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, @@ -1463,65 +1400,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, return Chain; } -/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target -/// which might be overwritten by later byval tail call lowering to a virtual -/// register. -bool -X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg, - SmallVector< std::pair, 8> &TailCallByValClobberedVRegs, - SmallVector &TailCallByValClobberedVRegTypes, - std::pair &RegToPass, - SDOperand &OutChain, - SDOperand &OutFlag, - MachineFunction &MF, - SelectionDAG & DAG) { - if (!containsByValArg) return false; - - std::pair ArgRegVReg; - MVT::ValueType VT = RegToPass.second.getValueType(); - - ArgRegVReg.first = RegToPass.first; - ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT)); - - // Copy Argument to virtual register. - OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second, - RegToPass.second, OutFlag); - OutFlag = OutChain.getValue(1); - // Remember virtual register and type. - TailCallByValClobberedVRegs.push_back(ArgRegVReg); - TailCallByValClobberedVRegTypes.push_back(VT); - return true; -} - - -/// RestoreTailCallByValClobberedReg - Restore registers which were saved to -/// virtual registers to prevent tail call byval lowering from overwriting -/// parameter registers. -static SDOperand -RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain, - SmallVector< std::pair, 8> &TailCallByValClobberedVRegs, - SmallVector &TailCallByValClobberedVRegTypes) { - if (TailCallByValClobberedVRegs.size()==0) return Chain; - - SmallVector RegOpChains; - for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) { - SDOperand InFlag; - unsigned DestReg = TailCallByValClobberedVRegs[i].first; - unsigned VirtReg = TailCallByValClobberedVRegs[i].second; - MVT::ValueType VT = TailCallByValClobberedVRegTypes[i]; - SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag); - Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag); - RegOpChains.push_back(Chain); - } - if (!RegOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, - &RegOpChains[0], RegOpChains.size()); - return Chain; -} - SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo * MFI = MF.getFrameInfo(); SDOperand Chain = Op.getOperand(0); unsigned CC = cast(Op.getOperand(1))->getValue(); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; @@ -1572,17 +1452,11 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { FPDiff); SmallVector, 8> RegsToPass; - SmallVector, 8> TailCallClobberedVRegs; - SmallVector MemOpChains; - SDOperand StackPtr; - bool containsTailCallByValArg = false; - SmallVector, 8> TailCallByValClobberedVRegs; - SmallVector TailCallByValClobberedVRegTypes; - // Walk the register/memloc assignments, inserting copies/loads. For tail - // calls, remember all arguments for later special lowering. + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization arguments are handle later. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); @@ -1638,10 +1512,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, Arg)); - // Remember fact that this call contains byval arguments. - containsTailCallByValArg |= IsTailCall && isByVal; - } else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { - TailCallClobberedVRegs.push_back(std::make_pair(i,Arg)); } } } @@ -1653,21 +1523,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into registers. SDOperand InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - // Tail call byval lowering might overwrite argument registers so arguments - // passed to be copied to a virtual register for - // later processing. - if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg, - TailCallByValClobberedVRegs, - TailCallByValClobberedVRegTypes, - RegsToPass[i], Chain, InFlag, MF, - DAG)) - continue; - - Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, - InFlag); - InFlag = Chain.getValue(1); - } + // Tail call byval lowering might overwrite argument registers so in case of + // tail call optimization the copies to registers are lowered later. + if (!IsTailCall) + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. @@ -1723,10 +1586,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { int FI = 0; // Do not flag preceeding copytoreg stuff together with the following stuff. InFlag = SDOperand(); - - Chain = CopyTailCallClobberedArgumentsToVRegs(Chain, TailCallClobberedVRegs, - DAG, MF, this); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) { @@ -1741,17 +1600,6 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); FIN = DAG.getFrameIndex(FI, getPointerTy()); - // Find virtual register for this argument. - bool Found=false; - for (unsigned idx=0, e= TailCallClobberedVRegs.size(); idx < e; idx++) - if (TailCallClobberedVRegs[idx].first==i) { - Arg = TailCallClobberedVRegs[idx].second; - Found=true; - break; - } - assert(IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)==false || - (Found==true && "No corresponding Argument was found")); - if (Flags.isByVal()) { // Copy relative to framepointer. SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); @@ -1774,10 +1622,13 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOpChains2[0], MemOpChains2.size()); - // Restore byval lowering clobbered registers. - Chain = RestoreTailCallByValClobberedRegs(DAG, Chain, - TailCallByValClobberedVRegs, - TailCallByValClobberedVRegTypes); + // Copy arguments to their registers. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDOperand(); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, @@ -1955,15 +1806,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, if (!PerformTailCallOpt) return false; - // Check whether CALL node immediatly preceeds the RET node and whether the - // return uses the result of the node or is a void return. - unsigned NumOps = Ret.getNumOperands(); - if ((NumOps == 1 && - (Ret.getOperand(0) == SDOperand(Call.Val,1) || - Ret.getOperand(0) == SDOperand(Call.Val,0))) || - (NumOps > 1 && - Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && - Ret.getOperand(1) == SDOperand(Call.Val,0))) { + if (CheckTailCallReturnConstraints(Call, Ret)) { MachineFunction &MF = DAG.getMachineFunction(); unsigned CallerCC = MF.getFunction()->getCallingConv(); unsigned CalleeCC = cast(Call.getOperand(1))->getValue(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 987cf73102f3..55ad70e64269 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -475,15 +475,6 @@ namespace llvm { SDOperand EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDOperand &OutRetAddr, SDOperand Chain, bool IsTailCall, bool Is64Bit, int FPDiff); - - bool CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg, - SmallVector< std::pair,8> &TailCallByValClobberedVRegs, - SmallVector &TailCallByValClobberedVRegTypes, - std::pair &RegToPass, - SDOperand &OutChain, - SDOperand &OutFlag, - MachineFunction &MF, - SelectionDAG & DAG); CCAssignFn *CCAssignFnForNode(SDOperand Op) const; NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDOperand Op); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 37a56ba74326..bea4798f52d3 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -391,6 +391,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in diff --git a/llvm/test/CodeGen/PowerPC/tailcall1-64.ll b/llvm/test/CodeGen/PowerPC/tailcall1-64.ll new file mode 100644 index 000000000000..f39b40bdab81 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/tailcall1-64.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=ppc64 -tailcallopt | grep TC_RETURNd8 +define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +} diff --git a/llvm/test/CodeGen/PowerPC/tailcall1.ll b/llvm/test/CodeGen/PowerPC/tailcall1.ll new file mode 100644 index 000000000000..1fc4b94ddcf9 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/tailcall1.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -tailcallopt | grep TC_RETURN +define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +} diff --git a/llvm/test/CodeGen/PowerPC/tailcallpic1.ll b/llvm/test/CodeGen/PowerPC/tailcallpic1.ll new file mode 100644 index 000000000000..678d366fb6a6 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/tailcallpic1.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN + + + +define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +}