forked from OSchip/llvm-project
[mips] Add code to do tail call optimization.
Currently, it is enabled only if option "enable-mips-tail-calls" is given and all of the callee's arguments are passed in registers. llvm-svn: 166342
This commit is contained in:
parent
59a32e91f9
commit
90131ac26c
|
@ -25,6 +25,7 @@
|
||||||
#include "llvm/GlobalVariable.h"
|
#include "llvm/GlobalVariable.h"
|
||||||
#include "llvm/Intrinsics.h"
|
#include "llvm/Intrinsics.h"
|
||||||
#include "llvm/CallingConv.h"
|
#include "llvm/CallingConv.h"
|
||||||
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/CodeGen/CallingConvLower.h"
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
@ -32,12 +33,19 @@
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||||
#include "llvm/CodeGen/ValueTypes.h"
|
#include "llvm/CodeGen/ValueTypes.h"
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
STATISTIC(NumTailCalls, "Number of tail calls");
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
|
||||||
|
cl::desc("MIPS: Enable tail calls."), cl::init(false));
|
||||||
|
|
||||||
// If I is a shifted mask, set the size (Size) and the first bit of the
|
// If I is a shifted mask, set the size (Size) and the first bit of the
|
||||||
// mask (Pos), and return true.
|
// mask (Pos), and return true.
|
||||||
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
|
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
|
||||||
|
@ -2871,9 +2879,26 @@ PassByValArg64(SDValue Chain, DebugLoc dl,
|
||||||
MemOpChains.push_back(Chain);
|
MemOpChains.push_back(Chain);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||||
|
/// for tail call optimization.
|
||||||
|
bool MipsTargetLowering::
|
||||||
|
IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC,
|
||||||
|
unsigned NextStackOffset) const {
|
||||||
|
if (!EnableMipsTailCalls)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Do not tail-call optimize if there is an argument passed on stack.
|
||||||
|
if (IsO32 && (CalleeCC != CallingConv::Fast)) {
|
||||||
|
if (NextStackOffset > 16)
|
||||||
|
return false;
|
||||||
|
} else if (NextStackOffset)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// LowerCall - functions arguments are copied from virtual regs to
|
/// LowerCall - functions arguments are copied from virtual regs to
|
||||||
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
|
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
|
||||||
/// TODO: isTailCall.
|
|
||||||
SDValue
|
SDValue
|
||||||
MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
SmallVectorImpl<SDValue> &InVals) const {
|
SmallVectorImpl<SDValue> &InVals) const {
|
||||||
|
@ -2888,9 +2913,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
CallingConv::ID CallConv = CLI.CallConv;
|
CallingConv::ID CallConv = CLI.CallConv;
|
||||||
bool isVarArg = CLI.IsVarArg;
|
bool isVarArg = CLI.IsVarArg;
|
||||||
|
|
||||||
// MIPs target does not yet support tail call optimization.
|
|
||||||
isTailCall = false;
|
|
||||||
|
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
|
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
|
||||||
|
@ -2921,11 +2943,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
if (IsO32 && (CallConv != CallingConv::Fast))
|
if (IsO32 && (CallConv != CallingConv::Fast))
|
||||||
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
|
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
|
||||||
|
|
||||||
|
// Check if it's really possible to do a tail call.
|
||||||
|
if (isTailCall)
|
||||||
|
isTailCall = IsEligibleForTailCallOptimization(CallConv, NextStackOffset);
|
||||||
|
|
||||||
|
if (isTailCall)
|
||||||
|
++NumTailCalls;
|
||||||
|
|
||||||
// Chain is the output chain of the last Load/Store or CopyToReg node.
|
// Chain is the output chain of the last Load/Store or CopyToReg node.
|
||||||
// ByValChain is the output chain of the last Memcpy node created for copying
|
// ByValChain is the output chain of the last Memcpy node created for copying
|
||||||
// byval arguments to the stack.
|
// byval arguments to the stack.
|
||||||
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
|
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
|
||||||
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
|
|
||||||
|
if (!isTailCall)
|
||||||
|
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
|
||||||
|
|
||||||
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
|
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
|
||||||
IsN64 ? Mips::SP_64 : Mips::SP,
|
IsN64 ? Mips::SP_64 : Mips::SP,
|
||||||
|
@ -3135,6 +3166,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
if (InFlag.getNode())
|
if (InFlag.getNode())
|
||||||
Ops.push_back(InFlag);
|
Ops.push_back(InFlag);
|
||||||
|
|
||||||
|
if (isTailCall)
|
||||||
|
return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size());
|
||||||
|
|
||||||
Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
|
Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
|
||||||
InFlag = Chain.getValue(1);
|
InFlag = Chain.getValue(1);
|
||||||
|
|
||||||
|
|
|
@ -208,6 +208,11 @@ namespace llvm {
|
||||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||||
|
/// for tail call optimization.
|
||||||
|
bool IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC,
|
||||||
|
unsigned NextStackOffset) const;
|
||||||
|
|
||||||
virtual SDValue
|
virtual SDValue
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
CallingConv::ID CallConv, bool isVarArg,
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
; RUN: llc -march=mipsel -relocation-model=pic -enable-mips-tail-calls < %s | \
|
||||||
|
; RUN: FileCheck %s -check-prefix=PIC32
|
||||||
|
; RUN: llc -march=mipsel -relocation-model=static \
|
||||||
|
; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32
|
||||||
|
; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \
|
||||||
|
; RUN: < %s | FileCheck %s -check-prefix=N64
|
||||||
|
|
||||||
|
@g0 = common global i32 0, align 4
|
||||||
|
@g1 = common global i32 0, align 4
|
||||||
|
@g2 = common global i32 0, align 4
|
||||||
|
@g3 = common global i32 0, align 4
|
||||||
|
@g4 = common global i32 0, align 4
|
||||||
|
@g5 = common global i32 0, align 4
|
||||||
|
@g6 = common global i32 0, align 4
|
||||||
|
@g7 = common global i32 0, align 4
|
||||||
|
@g8 = common global i32 0, align 4
|
||||||
|
@g9 = common global i32 0, align 4
|
||||||
|
|
||||||
|
define i32 @caller1(i32 %a0) nounwind {
|
||||||
|
entry:
|
||||||
|
; PIC32-NOT: jalr
|
||||||
|
; STATIC32-NOT: jal
|
||||||
|
; N64-NOT: jalr
|
||||||
|
|
||||||
|
%call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @callee1(i32, i32, i32, i32)
|
||||||
|
|
||||||
|
define i32 @caller2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
|
||||||
|
entry:
|
||||||
|
; PIC32: jalr
|
||||||
|
; STATIC32: jal
|
||||||
|
; N64-NOT: jalr
|
||||||
|
|
||||||
|
%call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @callee2(i32, i32, i32, i32, i32)
|
||||||
|
|
||||||
|
define i32 @caller3(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind {
|
||||||
|
entry:
|
||||||
|
; PIC32: jalr
|
||||||
|
; STATIC32: jal
|
||||||
|
; N64-NOT: jalr
|
||||||
|
|
||||||
|
%call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @callee3(i32, i32, i32, i32, i32, i32, i32, i32)
|
||||||
|
|
||||||
|
define i32 @caller4(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
|
||||||
|
entry:
|
||||||
|
; PIC32: jalr
|
||||||
|
; STATIC32: jal
|
||||||
|
; N64: jalr
|
||||||
|
|
||||||
|
%call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32)
|
||||||
|
|
||||||
|
define i32 @caller5() nounwind readonly {
|
||||||
|
entry:
|
||||||
|
; PIC32-NOT: jalr
|
||||||
|
; STATIC32-NOT: jal
|
||||||
|
; N64-NOT: jalr
|
||||||
|
|
||||||
|
%0 = load i32* @g0, align 4
|
||||||
|
%1 = load i32* @g1, align 4
|
||||||
|
%2 = load i32* @g2, align 4
|
||||||
|
%3 = load i32* @g3, align 4
|
||||||
|
%4 = load i32* @g4, align 4
|
||||||
|
%5 = load i32* @g5, align 4
|
||||||
|
%6 = load i32* @g6, align 4
|
||||||
|
%7 = load i32* @g7, align 4
|
||||||
|
%8 = load i32* @g8, align 4
|
||||||
|
%9 = load i32* @g9, align 4
|
||||||
|
%call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
|
||||||
|
ret i32 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
define internal fastcc i32 @callee5(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind readnone noinline {
|
||||||
|
entry:
|
||||||
|
%add = add nsw i32 %a1, %a0
|
||||||
|
%add1 = add nsw i32 %add, %a2
|
||||||
|
%add2 = add nsw i32 %add1, %a3
|
||||||
|
%add3 = add nsw i32 %add2, %a4
|
||||||
|
%add4 = add nsw i32 %add3, %a5
|
||||||
|
%add5 = add nsw i32 %add4, %a6
|
||||||
|
%add6 = add nsw i32 %add5, %a7
|
||||||
|
%add7 = add nsw i32 %add6, %a8
|
||||||
|
%add8 = add nsw i32 %add7, %a9
|
||||||
|
ret i32 %add8
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue