forked from OSchip/llvm-project
musttail: Forward regparms of variadic functions on x86_64
Summary: If a variadic function body contains a musttail call, then we copy all of the remaining register parameters into virtual registers in the function prologue. We track the virtual registers through the function body, and add them as additional registers to pass to the call. Because this is all done in virtual registers, the register allocator usually gives us good code. If the function does a call, however, it will have to spill and reload all argument registers (ew). Forwarding regparms on x86_32 is not implemented because most compilers don't support varargs in 32-bit with regparms. Reviewers: majnemer Subscribers: aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D5060 llvm-svn: 216780
This commit is contained in:
parent
329d4a2b29
commit
16e5541211
|
@ -239,6 +239,9 @@ class MachineFrameInfo {
|
|||
/// True if the function contains a call to the llvm.vastart intrinsic.
|
||||
bool HasVAStart;
|
||||
|
||||
/// True if this is a varargs function that contains a musttail call.
|
||||
bool HasMustTailInVarArgFunc;
|
||||
|
||||
const TargetFrameLowering *getFrameLowering() const;
|
||||
public:
|
||||
explicit MachineFrameInfo(const TargetMachine &TM, bool RealignOpt)
|
||||
|
@ -260,6 +263,7 @@ public:
|
|||
UseLocalStackAllocationBlock = false;
|
||||
HasInlineAsmWithSPAdjust = false;
|
||||
HasVAStart = false;
|
||||
HasMustTailInVarArgFunc = false;
|
||||
}
|
||||
|
||||
/// hasStackObjects - Return true if there are any stack objects in this
|
||||
|
@ -483,6 +487,10 @@ public:
|
|||
bool hasVAStart() const { return HasVAStart; }
|
||||
void setHasVAStart(bool B) { HasVAStart = B; }
|
||||
|
||||
/// Returns true if the function is variadic and contains a musttail call.
|
||||
bool hasMustTailInVarArgFunc() const { return HasMustTailInVarArgFunc; }
|
||||
void setHasMustTailInVarArgFunc(bool B) { HasMustTailInVarArgFunc = B; }
|
||||
|
||||
/// getMaxCallFrameSize - Return the maximum size of a call frame that must be
|
||||
/// allocated for an outgoing function call. This is only available if
|
||||
/// CallFrameSetup/Destroy pseudo instructions are used by the target, and
|
||||
|
|
|
@ -146,6 +146,13 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
|
|||
MF->getFrameInfo()->setHasVAStart(true);
|
||||
}
|
||||
|
||||
// If we have a musttail call in a variadic funciton, we need to ensure we
|
||||
// forward implicit register parameters.
|
||||
if (auto *CI = dyn_cast<CallInst>(I)) {
|
||||
if (CI->isMustTailCall() && Fn->isVarArg())
|
||||
MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
|
||||
}
|
||||
|
||||
// Mark values used outside their block as exported, by allocating
|
||||
// a virtual register for them.
|
||||
if (isUsedOutsideOfDefiningBlock(I))
|
||||
|
|
|
@ -2326,6 +2326,52 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
|
|||
}
|
||||
}
|
||||
|
||||
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
|
||||
const X86Subtarget *Subtarget) {
|
||||
assert(Subtarget->is64Bit());
|
||||
|
||||
if (Subtarget->isCallingConvWin64(CallConv)) {
|
||||
static const MCPhysReg GPR64ArgRegsWin64[] = {
|
||||
X86::RCX, X86::RDX, X86::R8, X86::R9
|
||||
};
|
||||
return GPR64ArgRegsWin64;
|
||||
}
|
||||
|
||||
static const MCPhysReg GPR64ArgRegs64Bit[] = {
|
||||
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
|
||||
};
|
||||
return GPR64ArgRegs64Bit;
|
||||
}
|
||||
|
||||
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
|
||||
CallingConv::ID CallConv,
|
||||
const X86Subtarget *Subtarget) {
|
||||
assert(Subtarget->is64Bit());
|
||||
if (Subtarget->isCallingConvWin64(CallConv)) {
|
||||
// The XMM registers which might contain var arg parameters are shadowed
|
||||
// in their paired GPR. So we only need to save the GPR to their home
|
||||
// slots.
|
||||
return None;
|
||||
}
|
||||
|
||||
const Function *Fn = MF.getFunction();
|
||||
bool NoImplicitFloatOps = Fn->getAttributes().
|
||||
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
|
||||
assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
|
||||
!Subtarget->hasSSE1())
|
||||
// Kernel mode asks for SSE to be disabled, so there are no XMM argument
|
||||
// registers.
|
||||
return None;
|
||||
|
||||
static const MCPhysReg XMMArgRegs64Bit[] = {
|
||||
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
|
||||
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
||||
};
|
||||
return XMMArgRegs64Bit;
|
||||
}
|
||||
|
||||
SDValue
|
||||
X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
|
@ -2469,57 +2515,49 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
// If the function takes variable number of arguments, make a frame index for
|
||||
// the start of the first vararg value... for expansion of llvm.va_start. We
|
||||
// can skip this if there are no va_start calls.
|
||||
if (isVarArg && MFI->hasVAStart()) {
|
||||
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
|
||||
CallConv != CallingConv::X86_ThisCall)) {
|
||||
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
|
||||
if (MFI->hasVAStart() &&
|
||||
(Is64Bit || (CallConv != CallingConv::X86_FastCall &&
|
||||
CallConv != CallingConv::X86_ThisCall))) {
|
||||
FuncInfo->setVarArgsFrameIndex(
|
||||
MFI->CreateFixedObject(1, StackSize, true));
|
||||
}
|
||||
|
||||
// 64-bit calling conventions support varargs and register parameters, so we
|
||||
// have to do extra work to spill them in the prologue or forward them to
|
||||
// musttail calls.
|
||||
if (Is64Bit && isVarArg &&
|
||||
(MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
|
||||
// Find the first unallocated argument registers.
|
||||
ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
|
||||
ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
|
||||
unsigned NumIntRegs =
|
||||
CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
|
||||
unsigned NumXMMRegs =
|
||||
CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
|
||||
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
|
||||
// Gather all the live in physical registers.
|
||||
SmallVector<SDValue, 6> LiveGPRs;
|
||||
SmallVector<SDValue, 8> LiveXMMRegs;
|
||||
SDValue ALVal;
|
||||
for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
|
||||
unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
|
||||
LiveGPRs.push_back(
|
||||
DAG.getCopyFromReg(DAG.getEntryNode(), dl, GPR, MVT::i64));
|
||||
}
|
||||
if (Is64Bit) {
|
||||
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
|
||||
|
||||
// FIXME: We should really autogenerate these arrays
|
||||
static const MCPhysReg GPR64ArgRegsWin64[] = {
|
||||
X86::RCX, X86::RDX, X86::R8, X86::R9
|
||||
};
|
||||
static const MCPhysReg GPR64ArgRegs64Bit[] = {
|
||||
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
|
||||
};
|
||||
static const MCPhysReg XMMArgRegs64Bit[] = {
|
||||
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
|
||||
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
||||
};
|
||||
const MCPhysReg *GPR64ArgRegs;
|
||||
unsigned NumXMMRegs = 0;
|
||||
|
||||
if (IsWin64) {
|
||||
// The XMM registers which might contain var arg parameters are shadowed
|
||||
// in their paired GPR. So we only need to save the GPR to their home
|
||||
// slots.
|
||||
TotalNumIntRegs = 4;
|
||||
GPR64ArgRegs = GPR64ArgRegsWin64;
|
||||
} else {
|
||||
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
|
||||
GPR64ArgRegs = GPR64ArgRegs64Bit;
|
||||
|
||||
NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
|
||||
TotalNumXMMRegs);
|
||||
if (!ArgXMMs.empty()) {
|
||||
unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
|
||||
ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
|
||||
for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
|
||||
unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
|
||||
LiveXMMRegs.push_back(
|
||||
DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
|
||||
}
|
||||
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
|
||||
TotalNumIntRegs);
|
||||
|
||||
bool NoImplicitFloatOps = Fn->getAttributes().
|
||||
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
|
||||
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
|
||||
NoImplicitFloatOps) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
|
||||
!Subtarget->hasSSE1())
|
||||
// Kernel mode asks for SSE to be disabled, so don't push them
|
||||
// on the stack.
|
||||
TotalNumXMMRegs = 0;
|
||||
}
|
||||
|
||||
// Store them to the va_list returned by va_start.
|
||||
if (MFI->hasVAStart()) {
|
||||
if (IsWin64) {
|
||||
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
|
||||
// Get to the caller-allocated home save location. Add 8 to account
|
||||
|
@ -2535,10 +2573,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
// registers, then we must store them to their spots on the stack so
|
||||
// they may be loaded by deferencing the result of va_next.
|
||||
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
|
||||
FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
|
||||
FuncInfo->setRegSaveFrameIndex(
|
||||
MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
|
||||
false));
|
||||
FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
|
||||
FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
|
||||
ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
|
||||
}
|
||||
|
||||
// Store the integer parameter registers.
|
||||
|
@ -2546,12 +2583,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
|
||||
getPointerTy());
|
||||
unsigned Offset = FuncInfo->getVarArgsGPOffset();
|
||||
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
|
||||
for (SDValue Val : LiveGPRs) {
|
||||
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
|
||||
DAG.getIntPtrConstant(Offset));
|
||||
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
|
||||
&X86::GR64RegClass);
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
|
||||
SDValue Store =
|
||||
DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo::getFixedStack(
|
||||
|
@ -2561,32 +2595,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
Offset += 8;
|
||||
}
|
||||
|
||||
if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
|
||||
if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
|
||||
// Now store the XMM (fp + vector) parameter registers.
|
||||
SmallVector<SDValue, 12> SaveXMMOps;
|
||||
SaveXMMOps.push_back(Chain);
|
||||
|
||||
unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
|
||||
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
|
||||
SaveXMMOps.push_back(ALVal);
|
||||
|
||||
SaveXMMOps.push_back(DAG.getIntPtrConstant(
|
||||
FuncInfo->getRegSaveFrameIndex()));
|
||||
SaveXMMOps.push_back(DAG.getIntPtrConstant(
|
||||
FuncInfo->getVarArgsFPOffset()));
|
||||
|
||||
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
|
||||
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
|
||||
&X86::VR128RegClass);
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
|
||||
SaveXMMOps.push_back(Val);
|
||||
}
|
||||
SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
|
||||
LiveXMMRegs.end());
|
||||
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
|
||||
MVT::Other, SaveXMMOps));
|
||||
}
|
||||
|
||||
if (!MemOps.empty())
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
|
||||
} else {
|
||||
// TODO: Save virtual registers away some where so we can do
|
||||
// getCopyFromReg in the musttail call lowering bb.
|
||||
assert(MFI->hasMustTailInVarArgFunc());
|
||||
auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
|
||||
typedef X86MachineFunctionInfo::Forward Forward;
|
||||
|
||||
// Add all GPRs, al, and XMMs to the list of forwards.
|
||||
for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
|
||||
unsigned VReg =
|
||||
MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
|
||||
Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
|
||||
}
|
||||
|
||||
if (!ArgXMMs.empty()) {
|
||||
unsigned ALVReg =
|
||||
MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
|
||||
Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
|
||||
|
||||
for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
|
||||
unsigned VReg =
|
||||
MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
|
||||
Forwards.push_back(
|
||||
Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2689,6 +2743,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
|
||||
StructReturnType SR = callIsStructReturn(Outs);
|
||||
bool IsSibcall = false;
|
||||
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
|
||||
|
||||
if (MF.getTarget().Options.DisableTailCalls)
|
||||
isTailCall = false;
|
||||
|
@ -2741,7 +2796,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
int FPDiff = 0;
|
||||
if (isTailCall && !IsSibcall && !IsMustTail) {
|
||||
// Lower arguments at fp - stackoffset + fpdiff.
|
||||
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
|
||||
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
|
||||
|
||||
FPDiff = NumBytesCallerPushed - NumBytes;
|
||||
|
@ -2884,7 +2938,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
}
|
||||
}
|
||||
|
||||
if (Is64Bit && isVarArg && !IsWin64) {
|
||||
if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
|
||||
// From AMD64 ABI document:
|
||||
// For calls that may call functions that use varargs or stdargs
|
||||
// (prototype-less calls or calls to functions containing ellipsis (...) in
|
||||
|
@ -2906,6 +2960,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
DAG.getConstant(NumXMMRegs, MVT::i8)));
|
||||
}
|
||||
|
||||
if (Is64Bit && isVarArg && IsMustTail) {
|
||||
const auto &Forwards = X86Info->getForwardedMustTailRegParms();
|
||||
for (const auto &F : Forwards) {
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
|
||||
RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
|
||||
}
|
||||
}
|
||||
|
||||
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
|
||||
// don't need this because the eligibility check rejects calls that require
|
||||
// shuffling arguments passed in memory.
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineValueType.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -69,6 +71,22 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
|
||||
unsigned NumLocalDynamics;
|
||||
|
||||
public:
|
||||
/// Describes a register that needs to be forwarded from the prologue to a
|
||||
/// musttail call.
|
||||
struct Forward {
|
||||
Forward(unsigned VReg, MCPhysReg PReg, MVT VT)
|
||||
: VReg(VReg), PReg(PReg), VT(VT) {}
|
||||
unsigned VReg;
|
||||
MCPhysReg PReg;
|
||||
MVT VT;
|
||||
};
|
||||
|
||||
private:
|
||||
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
||||
/// that must be forwarded to every musttail call.
|
||||
std::vector<Forward> ForwardedMustTailRegParms;
|
||||
|
||||
public:
|
||||
X86MachineFunctionInfo() : ForceFramePointer(false),
|
||||
CalleeSavedFrameSize(0),
|
||||
|
@ -138,6 +156,9 @@ public:
|
|||
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
|
||||
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
|
||||
|
||||
std::vector<Forward> &getForwardedMustTailRegParms() {
|
||||
return ForwardedMustTailRegParms;
|
||||
}
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
|
||||
; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
|
||||
|
||||
; Test that we actually spill and reload all arguments in the variadic argument
|
||||
; pack. Doing a normal call will clobber all argument registers, and we will
|
||||
; spill around it. A simple adjustment should not require any XMM spills.
|
||||
|
||||
declare void(i8*, ...)* @get_f(i8* %this)
|
||||
|
||||
define void @f_thunk(i8* %this, ...) {
|
||||
%fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
|
||||
musttail call void (i8*, ...)* %fptr(i8* %this, ...)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Save and restore 6 GPRs, 8 XMMs, and AL around the call.
|
||||
|
||||
; LINUX-LABEL: f_thunk:
|
||||
; LINUX-DAG: movq %rdi, {{.*}}
|
||||
; LINUX-DAG: movq %rsi, {{.*}}
|
||||
; LINUX-DAG: movq %rdx, {{.*}}
|
||||
; LINUX-DAG: movq %rcx, {{.*}}
|
||||
; LINUX-DAG: movq %r8, {{.*}}
|
||||
; LINUX-DAG: movq %r9, {{.*}}
|
||||
; LINUX-DAG: movb %al, {{.*}}
|
||||
; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp)
|
||||
; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp)
|
||||
; LINUX: callq get_f
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6
|
||||
; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7
|
||||
; LINUX-DAG: movq {{.*}}, %rdi
|
||||
; LINUX-DAG: movq {{.*}}, %rsi
|
||||
; LINUX-DAG: movq {{.*}}, %rdx
|
||||
; LINUX-DAG: movq {{.*}}, %rcx
|
||||
; LINUX-DAG: movq {{.*}}, %r8
|
||||
; LINUX-DAG: movq {{.*}}, %r9
|
||||
; LINUX-DAG: movb {{.*}}, %al
|
||||
; LINUX: jmpq *{{.*}} # TAILCALL
|
||||
|
||||
; WINDOWS-LABEL: f_thunk:
|
||||
; WINDOWS-NOT: mov{{.}}ps
|
||||
; WINDOWS-DAG: movq %rdx, {{.*}}
|
||||
; WINDOWS-DAG: movq %rcx, {{.*}}
|
||||
; WINDOWS-DAG: movq %r8, {{.*}}
|
||||
; WINDOWS-DAG: movq %r9, {{.*}}
|
||||
; WINDOWS-NOT: mov{{.}}ps
|
||||
; WINDOWS: callq get_f
|
||||
; WINDOWS-NOT: mov{{.}}ps
|
||||
; WINDOWS-DAG: movq {{.*}}, %rdx
|
||||
; WINDOWS-DAG: movq {{.*}}, %rcx
|
||||
; WINDOWS-DAG: movq {{.*}}, %r8
|
||||
; WINDOWS-DAG: movq {{.*}}, %r9
|
||||
; WINDOWS-NOT: mov{{.}}ps
|
||||
; WINDOWS: jmpq *{{.*}} # TAILCALL
|
||||
|
||||
; This thunk shouldn't require any spills and reloads, assuming the register
|
||||
; allocator knows what it's doing.
|
||||
|
||||
define void @g_thunk(i8* %fptr_i8, ...) {
|
||||
%fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
|
||||
musttail call void (i8*, ...)* %fptr(i8* %fptr_i8, ...)
|
||||
ret void
|
||||
}
|
||||
|
||||
; LINUX-LABEL: g_thunk:
|
||||
; LINUX-NOT: movq
|
||||
; LINUX: jmpq *%rdi # TAILCALL
|
||||
|
||||
; WINDOWS-LABEL: g_thunk:
|
||||
; WINDOWS-NOT: movq
|
||||
; WINDOWS: jmpq *%rcx # TAILCALL
|
||||
|
||||
; Do a simple multi-exit multi-bb test.
|
||||
|
||||
%struct.Foo = type { i1, i8*, i8* }
|
||||
|
||||
@g = external global i32
|
||||
|
||||
define void @h_thunk(%struct.Foo* %this, ...) {
|
||||
%cond_p = getelementptr %struct.Foo* %this, i32 0, i32 0
|
||||
%cond = load i1* %cond_p
|
||||
br i1 %cond, label %then, label %else
|
||||
|
||||
then:
|
||||
%a_p = getelementptr %struct.Foo* %this, i32 0, i32 1
|
||||
%a_i8 = load i8** %a_p
|
||||
%a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
|
||||
musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
|
||||
ret void
|
||||
|
||||
else:
|
||||
%b_p = getelementptr %struct.Foo* %this, i32 0, i32 2
|
||||
%b_i8 = load i8** %b_p
|
||||
%b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
|
||||
store i32 42, i32* @g
|
||||
musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
|
||||
ret void
|
||||
}
|
||||
|
||||
; LINUX-LABEL: h_thunk:
|
||||
; LINUX: jne
|
||||
; LINUX: jmpq *{{.*}} # TAILCALL
|
||||
; LINUX: jmpq *{{.*}} # TAILCALL
|
||||
; WINDOWS-LABEL: h_thunk:
|
||||
; WINDOWS: jne
|
||||
; WINDOWS: jmpq *{{.*}} # TAILCALL
|
||||
; WINDOWS: jmpq *{{.*}} # TAILCALL
|
Loading…
Reference in New Issue