forked from OSchip/llvm-project
[PowerPC] fix a bug in TCO eligibility check
If the callee and caller use different calling convensions, we cannot apply TCO if the callee requires arguments on stack; e.g. C calling convention and Fast CC use the same registers for parameter passing, but the stack offset is not necessarily same. This patch also recommit r319218 "[PowerPC] Allow tail calls of fastcc functions from C CallingConv functions." by @sfertile since the problem reported in r320106 should be fixed. Differential Revision: https://reviews.llvm.org/D40893 llvm-svn: 321579
This commit is contained in:
parent
0c958fba14
commit
ca3cdd7f27
|
@ -4397,13 +4397,18 @@ hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
|
|||
static bool
|
||||
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
|
||||
CallingConv::ID CalleeCC) {
|
||||
// Tail or Sibling call optimization (TCO/SCO) needs callee and caller to
|
||||
// have the same calling convention.
|
||||
if (CallerCC != CalleeCC)
|
||||
// Tail calls are possible with fastcc and ccc.
|
||||
auto isTailCallableCC = [] (CallingConv::ID CC){
|
||||
return CC == CallingConv::C || CC == CallingConv::Fast;
|
||||
};
|
||||
if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
|
||||
return false;
|
||||
|
||||
// Tail or Sibling calls can be done with fastcc/ccc.
|
||||
return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C);
|
||||
// We can safely tail call both fastcc and ccc callees from a c calling
|
||||
// convention caller. If the caller is fastcc, we may have less stack space
|
||||
// than a non-fastcc caller with the same signature so disable tail-calls in
|
||||
// that case.
|
||||
return CallerCC == CallingConv::C || CallerCC == CalleeCC;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -4434,10 +4439,28 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
|
|||
// Callee contains any byval parameter is not supported, too.
|
||||
// Note: This is a quick work around, because in some cases, e.g.
|
||||
// caller's stack size > callee's stack size, we are still able to apply
|
||||
// sibling call optimization. See: https://reviews.llvm.org/D23441#513574
|
||||
// sibling call optimization. For example, gcc is able to do SCO for caller1
|
||||
// in the following example, but not for caller2.
|
||||
// struct test {
|
||||
// long int a;
|
||||
// char ary[56];
|
||||
// } gTest;
|
||||
// __attribute__((noinline)) int callee(struct test v, struct test *b) {
|
||||
// b->a = v.a;
|
||||
// return 0;
|
||||
// }
|
||||
// void caller1(struct test a, struct test c, struct test *b) {
|
||||
// callee(gTest, b); }
|
||||
// void caller2(struct test *b) { callee(gTest, b); }
|
||||
if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
|
||||
return false;
|
||||
|
||||
// If callee and caller use different calling conventions, we cannot pass
|
||||
// parameters on stack since offsets for the parameter area may be different.
|
||||
if (Caller.getCallingConv() != CalleeCC &&
|
||||
needStackSlotPassParameters(Subtarget, Outs))
|
||||
return false;
|
||||
|
||||
// No TCO/SCO on indirect call because Caller have to restore its TOC
|
||||
if (!isFunctionGlobalAddress(Callee) &&
|
||||
!isa<ExternalSymbolSDNode>(Callee))
|
||||
|
|
|
@ -42,10 +42,10 @@ if.end4: ; preds = %if.end
|
|||
if.then6: ; preds = %if.end4
|
||||
%call7 = tail call fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c)
|
||||
br label %return
|
||||
; No duplication here because the calling convention mismatch means we won't tail-call
|
||||
; tail calling a fastcc function from a ccc function is supported.
|
||||
; CHECK_LABEL: if.then13:
|
||||
; CHECK: tail call fastcc signext i32 @call3
|
||||
; CHECK-NEXT: br
|
||||
; CHECK: %[[T2:[a-zA-Z0-9]+]] = tail call fastcc signext i32 @call3
|
||||
; CHECK-NEXT: ret i32 %[[T2]]
|
||||
|
||||
return: ; preds = %if.end4, %if.then6, %if.then2, %if.then
|
||||
%retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %c, %if.end4 ]
|
||||
|
|
|
@ -41,6 +41,15 @@ define void @caller_64_64_copy([8 x i64] %a, [8 x i64] %b) #1 {
|
|||
; CHECK-SCO: b callee_64_64_copy
|
||||
}
|
||||
|
||||
define internal fastcc void @callee_64_64_copy_fastcc([8 x i64] %a, [8 x i64] %b) #0 { ret void }
|
||||
define void @caller_64_64_copy_ccc([8 x i64] %a, [8 x i64] %b) #1 {
|
||||
tail call fastcc void @callee_64_64_copy_fastcc([8 x i64] %a, [8 x i64] %b)
|
||||
ret void
|
||||
; If caller and callee use different calling convensions, we cannot apply TCO.
|
||||
; CHECK-SCO-LABEL: caller_64_64_copy_ccc:
|
||||
; CHECK-SCO: bl callee_64_64_copy_fastcc
|
||||
}
|
||||
|
||||
define void @caller_64_64_reorder_copy([8 x i64] %a, [8 x i64] %b) #1 {
|
||||
tail call void @callee_64_64_copy([8 x i64] %b, [8 x i64] %a)
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue