forked from OSchip/llvm-project
add support, testcases, and dox for the new GHC calling
convention. Patch by David Terei! llvm-svn: 98212
This commit is contained in:
parent
aa87b4eab1
commit
a179e4d0a8
|
@ -1679,7 +1679,8 @@ $ llc -regalloc=linearscan file.bc -o ln.s;
|
|||
supported on x86/x86-64 and PowerPC. It is performed if:</p>
|
||||
|
||||
<ul>
|
||||
<li>Caller and callee have the calling convention <tt>fastcc</tt>.</li>
|
||||
<li>Caller and callee have the calling convention <tt>fastcc</tt> or
|
||||
<tt>cc 10</tt> (GHC call convention).</li>
|
||||
|
||||
<li>The call is a tail call - in tail position (ret immediately follows call
|
||||
and ret uses value of call or is void).</li>
|
||||
|
|
|
@ -691,9 +691,9 @@ define i32 @main() { <i>; i32()* </i>
|
|||
target, without having to conform to an externally specified ABI
|
||||
(Application Binary Interface).
|
||||
<a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
|
||||
when this convention is used.</a> This calling convention does not
|
||||
support varargs and requires the prototype of all callees to exactly match
|
||||
the prototype of the function definition.</dd>
|
||||
when this or the GHC convention is used.</a> This calling convention
|
||||
does not support varargs and requires the prototype of all callees to
|
||||
exactly match the prototype of the function definition.</dd>
|
||||
|
||||
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
|
||||
<dd>This calling convention attempts to make code in the caller as efficient
|
||||
|
@ -703,6 +703,26 @@ define i32 @main() { <i>; i32()* </i>
|
|||
does not support varargs and requires the prototype of all callees to
|
||||
exactly match the prototype of the function definition.</dd>
|
||||
|
||||
<dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
|
||||
<dd>This calling convention has been implemented specifically for use by the
|
||||
<a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
|
||||
It passes everything in registers, going to extremes to achieve this by
|
||||
disabling callee save registers. This calling convention should not be
|
||||
used lightly but only for specific situations such as an alternative to
|
||||
the <em>register pinning</em> performance technique often used when
|
||||
implementing functional programming languages.At the moment only X86
|
||||
supports this convention and it has the following limitations:
|
||||
<ul>
|
||||
<li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
|
||||
floating point types are supported.</li>
|
||||
<li>On <em>X86-64</em> only supports up to 10 bit type parameters and
|
||||
6 floating point parameters.</li>
|
||||
</ul>
|
||||
This calling convention supports
|
||||
<a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
|
||||
requires both the caller and callee are using it.
|
||||
</dd>
|
||||
|
||||
<dt><b>"<tt>cc <<em>n</em>></tt>" - Numbered convention</b>:</dt>
|
||||
<dd>Any calling convention may be specified by number, allowing
|
||||
target-specific calling conventions to be used. Target specific calling
|
||||
|
|
|
@ -44,6 +44,9 @@ namespace CallingConv {
|
|||
// call does not break any live ranges in the caller side.
|
||||
Cold = 9,
|
||||
|
||||
// GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
|
||||
GHC = 10,
|
||||
|
||||
// Target - This is the start of the target-specific calling conventions,
|
||||
// e.g. fastcall and thiscall on X86.
|
||||
FirstTargetCC = 64,
|
||||
|
|
|
@ -221,6 +221,20 @@ def CC_X86_Win64_C : CallingConv<[
|
|||
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
def CC_X86_64_GHC : CallingConv<[
|
||||
// Promote i8/i16/i32 arguments to i64.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
|
||||
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
|
||||
CCIfType<[i64],
|
||||
CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
|
||||
|
||||
// Pass in STG registers: F1, F2, F3, F4, D1, D2
|
||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasSSE1()",
|
||||
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 C Calling Convention
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -320,3 +334,11 @@ def CC_X86_32_FastCC : CallingConv<[
|
|||
// Otherwise, same as everything else.
|
||||
CCDelegateTo<CC_X86_32_Common>
|
||||
]>;
|
||||
|
||||
def CC_X86_32_GHC : CallingConv<[
|
||||
// Promote i8/i16 arguments to i32.
|
||||
CCIfType<[i8, i16], CCPromoteToType<i32>>,
|
||||
|
||||
// Pass in STG registers: Base, Sp, Hp, R1
|
||||
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
|
||||
]>;
|
||||
|
|
|
@ -172,7 +172,9 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
|
|||
CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
|
||||
bool isTaillCall) {
|
||||
if (Subtarget->is64Bit()) {
|
||||
if (Subtarget->isTargetWin64())
|
||||
if (CC == CallingConv::GHC)
|
||||
return CC_X86_64_GHC;
|
||||
else if (Subtarget->isTargetWin64())
|
||||
return CC_X86_Win64_C;
|
||||
else
|
||||
return CC_X86_64_C;
|
||||
|
@ -182,6 +184,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
|
|||
return CC_X86_32_FastCall;
|
||||
else if (CC == CallingConv::Fast)
|
||||
return CC_X86_32_FastCC;
|
||||
else if (CC == CallingConv::GHC)
|
||||
return CC_X86_32_GHC;
|
||||
else
|
||||
return CC_X86_32_C;
|
||||
}
|
||||
|
|
|
@ -1378,6 +1378,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
|
|||
return !Subtarget->is64Bit();
|
||||
case CallingConv::Fast:
|
||||
return GuaranteedTailCallOpt;
|
||||
case CallingConv::GHC:
|
||||
return GuaranteedTailCallOpt;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1385,7 +1387,9 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
|
|||
/// given CallingConvention value.
|
||||
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
|
||||
if (Subtarget->is64Bit()) {
|
||||
if (Subtarget->isTargetWin64())
|
||||
if (CC == CallingConv::GHC)
|
||||
return CC_X86_64_GHC;
|
||||
else if (Subtarget->isTargetWin64())
|
||||
return CC_X86_Win64_C;
|
||||
else
|
||||
return CC_X86_64_C;
|
||||
|
@ -1395,6 +1399,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
|
|||
return CC_X86_32_FastCall;
|
||||
else if (CC == CallingConv::Fast)
|
||||
return CC_X86_32_FastCC;
|
||||
else if (CC == CallingConv::GHC)
|
||||
return CC_X86_32_GHC;
|
||||
else
|
||||
return CC_X86_32_C;
|
||||
}
|
||||
|
@ -1412,10 +1418,16 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
|
|||
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
|
||||
}
|
||||
|
||||
/// IsTailCallConvention - Return true if the calling convention is one that
|
||||
/// supports tail call optimization.
|
||||
static bool IsTailCallConvention(CallingConv::ID CC) {
|
||||
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
|
||||
}
|
||||
|
||||
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
|
||||
/// a tailcall target by changing its ABI.
|
||||
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
|
||||
return GuaranteedTailCallOpt && CC == CallingConv::Fast;
|
||||
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -1479,8 +1491,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
bool Is64Bit = Subtarget->is64Bit();
|
||||
bool IsWin64 = Subtarget->isTargetWin64();
|
||||
|
||||
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
|
||||
"Var args not supported with calling convention fastcc");
|
||||
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
|
||||
"Var args not supported with calling convention fastcc or ghc");
|
||||
|
||||
// Assign locations to all of the incoming arguments.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
|
@ -1683,7 +1695,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||
} else {
|
||||
BytesToPopOnReturn = 0; // Callee pops nothing.
|
||||
// If this is an sret function, the return should pop the hidden pointer.
|
||||
if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
|
||||
if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
|
||||
BytesToPopOnReturn = 4;
|
||||
}
|
||||
|
||||
|
@ -1779,8 +1791,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
|||
++NumTailCalls;
|
||||
}
|
||||
|
||||
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
|
||||
"Var args not supported with calling convention fastcc");
|
||||
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
|
||||
"Var args not supported with calling convention fastcc or ghc");
|
||||
|
||||
// Analyze operands of the call, assigning locations to each operand.
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
|
@ -1794,7 +1806,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
|||
// This is a sibcall. The memory operands are available in caller's
|
||||
// own caller's stack.
|
||||
NumBytes = 0;
|
||||
else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast)
|
||||
else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
|
||||
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
|
||||
|
||||
int FPDiff = 0;
|
||||
|
@ -2150,7 +2162,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
|||
unsigned NumBytesForCalleeToPush;
|
||||
if (IsCalleePop(isVarArg, CallConv))
|
||||
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
|
||||
else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
|
||||
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
|
||||
// If this is a call to a struct-return function, the callee
|
||||
// pops the hidden struct pointer, so we have to push it back.
|
||||
// This is common for Darwin/X86, Linux & Mingw32 targets.
|
||||
|
@ -2288,14 +2300,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
|
|||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SelectionDAG& DAG) const {
|
||||
if (CalleeCC != CallingConv::Fast &&
|
||||
if (!IsTailCallConvention(CalleeCC) &&
|
||||
CalleeCC != CallingConv::C)
|
||||
return false;
|
||||
|
||||
// If -tailcallopt is specified, make fastcc functions tail-callable.
|
||||
const Function *CallerF = DAG.getMachineFunction().getFunction();
|
||||
if (GuaranteedTailCallOpt) {
|
||||
if (CalleeCC == CallingConv::Fast &&
|
||||
if (IsTailCallConvention(CalleeCC) &&
|
||||
CallerF->getCallingConv() == CalleeCC)
|
||||
return true;
|
||||
return false;
|
||||
|
|
|
@ -294,13 +294,20 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
|
|||
const unsigned *
|
||||
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
bool callsEHReturn = false;
|
||||
bool ghcCall = false;
|
||||
|
||||
if (MF) {
|
||||
const MachineFrameInfo *MFI = MF->getFrameInfo();
|
||||
const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
|
||||
callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
|
||||
const Function *F = MF->getFunction();
|
||||
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
|
||||
}
|
||||
|
||||
static const unsigned GhcCalleeSavedRegs[] = {
|
||||
0
|
||||
};
|
||||
|
||||
static const unsigned CalleeSavedRegs32Bit[] = {
|
||||
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
|
||||
};
|
||||
|
@ -326,7 +333,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
X86::XMM14, X86::XMM15, 0
|
||||
};
|
||||
|
||||
if (Is64Bit) {
|
||||
if (ghcCall) {
|
||||
return GhcCalleeSavedRegs;
|
||||
} else if (Is64Bit) {
|
||||
if (IsWin64)
|
||||
return CalleeSavedRegsWin64;
|
||||
else
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s
|
||||
|
||||
; Test the GHC call convention works (x86-32)
|
||||
|
||||
@base = external global i32 ; assigned to register: EBX
|
||||
@sp = external global i32 ; assigned to register: EBP
|
||||
@hp = external global i32 ; assigned to register: EDI
|
||||
@r1 = external global i32 ; assigned to register: ESI
|
||||
|
||||
define void @zap(i32 %a, i32 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: movl {{[0-9]*}}(%esp), %ebx
|
||||
; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
|
||||
; CHECK-NEXT: call addtwo
|
||||
%0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
|
||||
; CHECK: call foo
|
||||
call void @foo() nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
|
||||
entry:
|
||||
; CHECK: leal (%ebx,%ebp), %eax
|
||||
%0 = add i32 %x, %y
|
||||
; CHECK-NEXT: ret
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define cc 10 void @foo() nounwind {
|
||||
entry:
|
||||
; CHECK: movl base, %ebx
|
||||
; CHECK-NEXT: movl sp, %ebp
|
||||
; CHECK-NEXT: movl hp, %edi
|
||||
; CHECK-NEXT: movl r1, %esi
|
||||
%0 = load i32* @r1
|
||||
%1 = load i32* @hp
|
||||
%2 = load i32* @sp
|
||||
%3 = load i32* @base
|
||||
; CHECK: jmp bar
|
||||
tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare cc 10 void @bar(i32, i32, i32, i32)
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s
|
||||
|
||||
; Check the GHC call convention works (x86-64)
|
||||
|
||||
@base = external global i64 ; assigned to register: R13
|
||||
@sp = external global i64 ; assigned to register: RBP
|
||||
@hp = external global i64 ; assigned to register: R12
|
||||
@r1 = external global i64 ; assigned to register: RBX
|
||||
@r2 = external global i64 ; assigned to register: R14
|
||||
@r3 = external global i64 ; assigned to register: RSI
|
||||
@r4 = external global i64 ; assigned to register: RDI
|
||||
@r5 = external global i64 ; assigned to register: R8
|
||||
@r6 = external global i64 ; assigned to register: R9
|
||||
@splim = external global i64 ; assigned to register: R15
|
||||
|
||||
@f1 = external global float ; assigned to register: XMM1
|
||||
@f2 = external global float ; assigned to register: XMM2
|
||||
@f3 = external global float ; assigned to register: XMM3
|
||||
@f4 = external global float ; assigned to register: XMM4
|
||||
@d1 = external global double ; assigned to register: XMM5
|
||||
@d2 = external global double ; assigned to register: XMM6
|
||||
|
||||
define void @zap(i64 %a, i64 %b) nounwind {
|
||||
entry:
|
||||
; CHECK: movq %rdi, %r13
|
||||
; CHECK-NEXT: movq %rsi, %rbp
|
||||
; CHECK-NEXT: callq addtwo
|
||||
%0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
|
||||
; CHECK: callq foo
|
||||
call void @foo() nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
|
||||
entry:
|
||||
; CHECK: leaq (%r13,%rbp), %rax
|
||||
%0 = add i64 %x, %y
|
||||
; CHECK-NEXT: ret
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
define cc 10 void @foo() nounwind {
|
||||
entry:
|
||||
; CHECK: movq base(%rip), %r13
|
||||
; CHECK-NEXT: movq sp(%rip), %rbp
|
||||
; CHECK-NEXT: movq hp(%rip), %r12
|
||||
; CHECK-NEXT: movq r1(%rip), %rbx
|
||||
; CHECK-NEXT: movq r2(%rip), %r14
|
||||
; CHECK-NEXT: movq r3(%rip), %rsi
|
||||
; CHECK-NEXT: movq r4(%rip), %rdi
|
||||
; CHECK-NEXT: movq r5(%rip), %r8
|
||||
; CHECK-NEXT: movq r6(%rip), %r9
|
||||
; CHECK-NEXT: movq splim(%rip), %r15
|
||||
; CHECK-NEXT: movss f1(%rip), %xmm1
|
||||
; CHECK-NEXT: movss f2(%rip), %xmm2
|
||||
; CHECK-NEXT: movss f3(%rip), %xmm3
|
||||
; CHECK-NEXT: movss f4(%rip), %xmm4
|
||||
; CHECK-NEXT: movsd d1(%rip), %xmm5
|
||||
; CHECK-NEXT: movsd d2(%rip), %xmm6
|
||||
%0 = load double* @d2
|
||||
%1 = load double* @d1
|
||||
%2 = load float* @f4
|
||||
%3 = load float* @f3
|
||||
%4 = load float* @f2
|
||||
%5 = load float* @f1
|
||||
%6 = load i64* @splim
|
||||
%7 = load i64* @r6
|
||||
%8 = load i64* @r5
|
||||
%9 = load i64* @r4
|
||||
%10 = load i64* @r3
|
||||
%11 = load i64* @r2
|
||||
%12 = load i64* @r1
|
||||
%13 = load i64* @hp
|
||||
%14 = load i64* @sp
|
||||
%15 = load i64* @base
|
||||
; CHECK: jmp bar
|
||||
tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
|
||||
i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
|
||||
float %5, float %4, float %3, float %2, double %1,
|
||||
double %0 ) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
|
||||
float, float, float, float, double, double)
|
||||
|
Loading…
Reference in New Issue