add support, testcases, and dox for the new GHC calling

convention.  Patch by David Terei!

llvm-svn: 98212
This commit is contained in:
Chris Lattner 2010-03-11 00:22:57 +00:00
parent aa87b4eab1
commit a179e4d0a8
9 changed files with 219 additions and 17 deletions

View File

@ -1679,7 +1679,8 @@ $ llc -regalloc=linearscan file.bc -o ln.s;
supported on x86/x86-64 and PowerPC. It is performed if:</p>
<ul>
<li>Caller and callee have the calling convention <tt>fastcc</tt>.</li>
<li>Caller and callee have the calling convention <tt>fastcc</tt> or
<tt>cc 10</tt> (GHC call convention).</li>
<li>The call is a tail call - in tail position (ret immediately follows call
and ret uses value of call or is void).</li>

View File

@ -691,9 +691,9 @@ define i32 @main() { <i>; i32()* </i>
target, without having to conform to an externally specified ABI
(Application Binary Interface).
<a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
when this convention is used.</a> This calling convention does not
support varargs and requires the prototype of all callees to exactly match
the prototype of the function definition.</dd>
when this or the GHC convention is used.</a> This calling convention
does not support varargs and requires the prototype of all callees to
exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
<dd>This calling convention attempts to make code in the caller as efficient
@ -703,6 +703,26 @@ define i32 @main() { <i>; i32()* </i>
does not support varargs and requires the prototype of all callees to
exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
<dd>This calling convention has been implemented specifically for use by the
<a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
It passes everything in registers, going to extremes to achieve this by
disabling callee save registers. This calling convention should not be
used lightly but only for specific situations such as an alternative to
the <em>register pinning</em> performance technique often used when
implementing functional programming languages.At the moment only X86
supports this convention and it has the following limitations:
<ul>
<li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
floating point types are supported.</li>
<li>On <em>X86-64</em> only supports up to 10 bit type parameters and
6 floating point parameters.</li>
</ul>
This calling convention supports
<a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
requires both the caller and callee are using it.
</dd>
<dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
<dd>Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific calling

View File

@ -44,6 +44,9 @@ namespace CallingConv {
// call does not break any live ranges in the caller side.
Cold = 9,
// GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
GHC = 10,
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,

View File

@ -221,6 +221,20 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
CCIfType<[i64],
CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@ -320,3 +334,11 @@ def CC_X86_32_FastCC : CallingConv<[
// Otherwise, same as everything else.
CCDelegateTo<CC_X86_32_Common>
]>;
def CC_X86_32_GHC : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// Pass in STG registers: Base, Sp, Hp, R1
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;

View File

@ -172,7 +172,9 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
if (CC == CallingConv::GHC)
return CC_X86_64_GHC;
else if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else
return CC_X86_64_C;
@ -182,6 +184,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
return CC_X86_32_GHC;
else
return CC_X86_32_C;
}

View File

@ -1378,6 +1378,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
return !Subtarget->is64Bit();
case CallingConv::Fast:
return GuaranteedTailCallOpt;
case CallingConv::GHC:
return GuaranteedTailCallOpt;
}
}
@ -1385,7 +1387,9 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
if (CC == CallingConv::GHC)
return CC_X86_64_GHC;
else if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else
return CC_X86_64_C;
@ -1395,6 +1399,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
return CC_X86_32_GHC;
else
return CC_X86_32_C;
}
@ -1412,10 +1418,16 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
}
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
return GuaranteedTailCallOpt && CC == CallingConv::Fast;
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
SDValue
@ -1479,8 +1491,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@ -1683,7 +1695,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
BytesToPopOnReturn = 4;
}
@ -1779,8 +1791,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
++NumTailCalls;
}
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@ -1794,7 +1806,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast)
else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@ -2150,7 +2162,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
unsigned NumBytesForCalleeToPush;
if (IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@ -2288,14 +2300,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
if (CalleeCC != CallingConv::Fast &&
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
if (CalleeCC == CallingConv::Fast &&
if (IsTailCallConvention(CalleeCC) &&
CallerF->getCallingConv() == CalleeCC)
return true;
return false;

View File

@ -294,13 +294,20 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
const unsigned *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
if (MF) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
}
static const unsigned GhcCalleeSavedRegs[] = {
0
};
static const unsigned CalleeSavedRegs32Bit[] = {
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
};
@ -326,7 +333,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
X86::XMM14, X86::XMM15, 0
};
if (Is64Bit) {
if (ghcCall) {
return GhcCalleeSavedRegs;
} else if (Is64Bit) {
if (IsWin64)
return CalleeSavedRegsWin64;
else

View File

@ -0,0 +1,45 @@
; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s
; Test the GHC call convention works (x86-32)
@base = external global i32 ; assigned to register: EBX
@sp = external global i32 ; assigned to register: EBP
@hp = external global i32 ; assigned to register: EDI
@r1 = external global i32 ; assigned to register: ESI
define void @zap(i32 %a, i32 %b) nounwind {
entry:
; CHECK: movl {{[0-9]*}}(%esp), %ebx
; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
; CHECK-NEXT: call addtwo
%0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
; CHECK: call foo
call void @foo() nounwind
ret void
}
define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
entry:
; CHECK: leal (%ebx,%ebp), %eax
%0 = add i32 %x, %y
; CHECK-NEXT: ret
ret i32 %0
}
define cc 10 void @foo() nounwind {
entry:
; CHECK: movl base, %ebx
; CHECK-NEXT: movl sp, %ebp
; CHECK-NEXT: movl hp, %edi
; CHECK-NEXT: movl r1, %esi
%0 = load i32* @r1
%1 = load i32* @hp
%2 = load i32* @sp
%3 = load i32* @base
; CHECK: jmp bar
tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
ret void
}
declare cc 10 void @bar(i32, i32, i32, i32)

View File

@ -0,0 +1,86 @@
; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s
; Check the GHC call convention works (x86-64)
@base = external global i64 ; assigned to register: R13
@sp = external global i64 ; assigned to register: RBP
@hp = external global i64 ; assigned to register: R12
@r1 = external global i64 ; assigned to register: RBX
@r2 = external global i64 ; assigned to register: R14
@r3 = external global i64 ; assigned to register: RSI
@r4 = external global i64 ; assigned to register: RDI
@r5 = external global i64 ; assigned to register: R8
@r6 = external global i64 ; assigned to register: R9
@splim = external global i64 ; assigned to register: R15
@f1 = external global float ; assigned to register: XMM1
@f2 = external global float ; assigned to register: XMM2
@f3 = external global float ; assigned to register: XMM3
@f4 = external global float ; assigned to register: XMM4
@d1 = external global double ; assigned to register: XMM5
@d2 = external global double ; assigned to register: XMM6
define void @zap(i64 %a, i64 %b) nounwind {
entry:
; CHECK: movq %rdi, %r13
; CHECK-NEXT: movq %rsi, %rbp
; CHECK-NEXT: callq addtwo
%0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
; CHECK: callq foo
call void @foo() nounwind
ret void
}
define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
entry:
; CHECK: leaq (%r13,%rbp), %rax
%0 = add i64 %x, %y
; CHECK-NEXT: ret
ret i64 %0
}
define cc 10 void @foo() nounwind {
entry:
; CHECK: movq base(%rip), %r13
; CHECK-NEXT: movq sp(%rip), %rbp
; CHECK-NEXT: movq hp(%rip), %r12
; CHECK-NEXT: movq r1(%rip), %rbx
; CHECK-NEXT: movq r2(%rip), %r14
; CHECK-NEXT: movq r3(%rip), %rsi
; CHECK-NEXT: movq r4(%rip), %rdi
; CHECK-NEXT: movq r5(%rip), %r8
; CHECK-NEXT: movq r6(%rip), %r9
; CHECK-NEXT: movq splim(%rip), %r15
; CHECK-NEXT: movss f1(%rip), %xmm1
; CHECK-NEXT: movss f2(%rip), %xmm2
; CHECK-NEXT: movss f3(%rip), %xmm3
; CHECK-NEXT: movss f4(%rip), %xmm4
; CHECK-NEXT: movsd d1(%rip), %xmm5
; CHECK-NEXT: movsd d2(%rip), %xmm6
%0 = load double* @d2
%1 = load double* @d1
%2 = load float* @f4
%3 = load float* @f3
%4 = load float* @f2
%5 = load float* @f1
%6 = load i64* @splim
%7 = load i64* @r6
%8 = load i64* @r5
%9 = load i64* @r4
%10 = load i64* @r3
%11 = load i64* @r2
%12 = load i64* @r1
%13 = load i64* @hp
%14 = load i64* @sp
%15 = load i64* @base
; CHECK: jmp bar
tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
float %5, float %4, float %3, float %2, double %1,
double %0 ) nounwind
ret void
}
declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
float, float, float, float, double, double)