forked from OSchip/llvm-project
[ppc64] Enable sibling call optimization on ppc64 ELFv1/ELFv2 abi
This patch enable sibling call optimization on ppc64 ELFv1/ELFv2 abi, and add a couple of test cases. This patch also passed llvm/clang bootstrap test, and spec2006 build/run/result validation. Original issue: https://llvm.org/bugs/show_bug.cgi?id=25617 Great thanks to Tom's (tjablin) help, he contributed a lot to this patch. Thanks Hal and Kit's invaluable opinions! Reviewers: hfinkel kbarton http://reviews.llvm.org/D16315 llvm-svn: 265506
This commit is contained in:
parent
024a623c55
commit
2e5973ef74
|
@ -19,6 +19,7 @@
|
|||
#include "PPCTargetMachine.h"
|
||||
#include "PPCTargetObjectFile.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
|
@ -36,12 +37,15 @@
|
|||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "ppc-lowering"
|
||||
|
||||
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
|
||||
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
|
||||
|
||||
|
@ -51,6 +55,12 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi
|
|||
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
|
||||
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> DisableSCO("disable-ppc-sco",
|
||||
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
|
||||
|
||||
STATISTIC(NumTailCalls, "Number of tail calls");
|
||||
STATISTIC(NumSiblingCalls, "Number of sibling calls");
|
||||
|
||||
// FIXME: Remove this once the bug has been fixed!
|
||||
extern cl::opt<bool> ANDIGlueBug;
|
||||
|
||||
|
@ -3842,6 +3852,176 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
|
|||
return SPDiff;
|
||||
}
|
||||
|
||||
static bool isFunctionGlobalAddress(SDValue Callee);
|
||||
|
||||
static bool
|
||||
resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
|
||||
// If !G, Callee can be an external symbol.
|
||||
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
|
||||
if (!G) return false;
|
||||
|
||||
const GlobalValue *GV = G->getGlobal();
|
||||
|
||||
if (GV->isDeclaration()) return false;
|
||||
|
||||
switch(GV->getLinkage()) {
|
||||
default: llvm_unreachable("unknow linkage type");
|
||||
case GlobalValue::AvailableExternallyLinkage:
|
||||
case GlobalValue::ExternalWeakLinkage:
|
||||
return false;
|
||||
|
||||
// Callee with weak linkage is allowed if it has hidden or protected
|
||||
// visibility
|
||||
case GlobalValue::LinkOnceAnyLinkage:
|
||||
case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
|
||||
case GlobalValue::WeakAnyLinkage:
|
||||
case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
|
||||
if (GV->hasDefaultVisibility())
|
||||
return false;
|
||||
|
||||
case GlobalValue::ExternalLinkage:
|
||||
case GlobalValue::InternalLinkage:
|
||||
case GlobalValue::PrivateLinkage:
|
||||
break;
|
||||
}
|
||||
|
||||
// With '-fPIC', calling default visiblity function need insert 'nop' after
|
||||
// function call, no matter that function resides in same module or not, so
|
||||
// we treat it as in different module.
|
||||
if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs) {
|
||||
assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
|
||||
|
||||
const unsigned PtrByteSize = 8;
|
||||
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
|
||||
|
||||
static const MCPhysReg GPR[] = {
|
||||
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
|
||||
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
|
||||
};
|
||||
static const MCPhysReg VR[] = {
|
||||
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
|
||||
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
|
||||
};
|
||||
|
||||
const unsigned NumGPRs = array_lengthof(GPR);
|
||||
const unsigned NumFPRs = 13;
|
||||
const unsigned NumVRs = array_lengthof(VR);
|
||||
const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
|
||||
|
||||
unsigned NumBytes = LinkageSize;
|
||||
unsigned AvailableFPRs = NumFPRs;
|
||||
unsigned AvailableVRs = NumVRs;
|
||||
|
||||
for (const ISD::OutputArg& Param : Outs) {
|
||||
if (Param.Flags.isNest()) continue;
|
||||
|
||||
if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
|
||||
PtrByteSize, LinkageSize, ParamAreaSize,
|
||||
NumBytes, AvailableFPRs, AvailableVRs,
|
||||
Subtarget.hasQPX()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
|
||||
if (CS->arg_size() != CallerFn->getArgumentList().size())
|
||||
return false;
|
||||
|
||||
ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
|
||||
ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
|
||||
Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
|
||||
|
||||
for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
|
||||
const Value* CalleeArg = *CalleeArgIter;
|
||||
const Value* CallerArg = &(*CallerArgIter);
|
||||
if (CalleeArg == CallerArg)
|
||||
continue;
|
||||
|
||||
// e.g. @caller([4 x i64] %a, [4 x i64] %b) {
|
||||
// tail call @callee([4 x i64] undef, [4 x i64] %b)
|
||||
// }
|
||||
// 1st argument of callee is undef and has the same type as caller.
|
||||
if (CalleeArg->getType() == CallerArg->getType() &&
|
||||
isa<UndefValue>(CalleeArg))
|
||||
continue;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
|
||||
SDValue Callee,
|
||||
CallingConv::ID CalleeCC,
|
||||
ImmutableCallSite *CS,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SelectionDAG& DAG) const {
|
||||
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
|
||||
|
||||
if (DisableSCO && !TailCallOpt) return false;
|
||||
|
||||
// Variadic argument functions are not supported.
|
||||
if (isVarArg) return false;
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
|
||||
|
||||
// Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
|
||||
// the same calling convention
|
||||
if (CallerCC != CalleeCC) return false;
|
||||
|
||||
// SCO support C calling convention
|
||||
if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
|
||||
return false;
|
||||
|
||||
// Functions containing by val parameters are not supported.
|
||||
if (std::any_of(Ins.begin(), Ins.end(),
|
||||
[](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
|
||||
return false;
|
||||
|
||||
// No TCO/SCO on indirect call because Caller have to restore its TOC
|
||||
if (!isFunctionGlobalAddress(Callee) &&
|
||||
!isa<ExternalSymbolSDNode>(Callee))
|
||||
return false;
|
||||
|
||||
// Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
|
||||
// (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
|
||||
// module.
|
||||
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
|
||||
if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
|
||||
return false;
|
||||
|
||||
// TCO allows altering callee ABI, so we don't have to check further.
|
||||
if (CalleeCC == CallingConv::Fast && TailCallOpt)
|
||||
return true;
|
||||
|
||||
if (DisableSCO) return false;
|
||||
|
||||
// If callee use the same argument list that caller is using, then we can
|
||||
// apply SCO on this case. If it is not, then we need to check if callee needs
|
||||
// stack for passing arguments.
|
||||
if (!hasSameArgumentList(MF.getFunction(), CS) &&
|
||||
needStackSlotPassParameters(Subtarget, Outs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
|
||||
/// for tail call optimization. Targets which want to do tail call
|
||||
/// optimization should implement this function.
|
||||
|
@ -4479,9 +4659,32 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
bool IsPatchPoint = CLI.IsPatchPoint;
|
||||
ImmutableCallSite *CS = CLI.CS;
|
||||
|
||||
if (isTailCall)
|
||||
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
|
||||
Ins, DAG);
|
||||
if (isTailCall) {
|
||||
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
|
||||
isTailCall =
|
||||
IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
|
||||
isVarArg, Outs, Ins, DAG);
|
||||
else
|
||||
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
|
||||
Ins, DAG);
|
||||
if (isTailCall) {
|
||||
++NumTailCalls;
|
||||
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
|
||||
++NumSiblingCalls;
|
||||
|
||||
assert(isa<GlobalAddressSDNode>(Callee) &&
|
||||
"Callee should be an llvm::Function object.");
|
||||
DEBUG(
|
||||
const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
|
||||
const unsigned Width = 80 - strlen("TCO caller: ")
|
||||
- strlen(", callee linkage: 0, 0");
|
||||
dbgs() << "TCO caller: "
|
||||
<< left_justify(DAG.getMachineFunction().getName(), Width)
|
||||
<< ", callee linkage: "
|
||||
<< GV->getVisibility() << ", " << GV->getLinkage() << "\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isTailCall && CS && CS->isMustTailCall())
|
||||
report_fatal_error("failed to perform tail call elimination on a call "
|
||||
|
@ -4760,12 +4963,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||
bool isLittleEndian = Subtarget.isLittleEndian();
|
||||
unsigned NumOps = Outs.size();
|
||||
bool hasNest = false;
|
||||
bool IsSibCall = false;
|
||||
|
||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
|
||||
unsigned PtrByteSize = 8;
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
|
||||
IsSibCall = true;
|
||||
|
||||
// Mark this function as potentially containing a function that contains a
|
||||
// tail call. As a consequence the frame pointer will be used for dynamicalloc
|
||||
// and restoring the callers stack pointer in this functions epilog. This is
|
||||
|
@ -4885,9 +5092,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||
CallConv == CallingConv::Fast)
|
||||
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
|
||||
|
||||
int SPDiff = 0;
|
||||
|
||||
// Calculate by how many bytes the stack has to be adjusted in case of tail
|
||||
// call optimization.
|
||||
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
|
||||
if (!IsSibCall)
|
||||
SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
|
||||
|
||||
// To protect arguments on the stack from being clobbered in a tail call,
|
||||
// force all the loads to happen before doing any other lowering.
|
||||
|
@ -4896,8 +5106,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||
|
||||
// Adjust the stack pointer for the new arguments...
|
||||
// These operations are automatically eliminated by the prolog/epilog pass
|
||||
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
|
||||
dl);
|
||||
if (!IsSibCall)
|
||||
Chain = DAG.getCALLSEQ_START(Chain,
|
||||
DAG.getIntPtrConstant(NumBytes, dl, true), dl);
|
||||
SDValue CallSeqStart = Chain;
|
||||
|
||||
// Load the return address and frame pointer so it can be move somewhere else
|
||||
|
@ -5366,7 +5577,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
if (isTailCall)
|
||||
if (isTailCall && !IsSibCall)
|
||||
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
|
||||
FPOp, true, TailCallArguments);
|
||||
|
||||
|
|
|
@ -713,6 +713,16 @@ namespace llvm {
|
|||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SelectionDAG& DAG) const;
|
||||
|
||||
bool
|
||||
IsEligibleForTailCallOptimization_64SVR4(
|
||||
SDValue Callee,
|
||||
CallingConv::ID CalleeCC,
|
||||
ImmutableCallSite *CS,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SelectionDAG& DAG) const;
|
||||
|
||||
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
|
||||
int SPDiff,
|
||||
SDValue Chain,
|
||||
|
|
|
@ -14,7 +14,8 @@ define weak void @foo_weak() nounwind {
|
|||
define void @test_direct() nounwind readnone {
|
||||
; CHECK-LABEL: test_direct:
|
||||
tail call void @foo() nounwind
|
||||
; CHECK: bl foo
|
||||
; Because of tail call optimization, it can be 'b' instruction.
|
||||
; CHECK: [[BR:b[l]?]] foo
|
||||
; CHECK-NOT: nop
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
|
||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
|
||||
|
||||
%"class.clang::NamedDecl" = type { i32 }
|
||||
declare void @__assert_fail();
|
||||
|
||||
define i8 @_ZNK5clang9NamedDecl23getLinkageAndVisibilityEv(
|
||||
%"class.clang::NamedDecl"* %this) {
|
||||
entry:
|
||||
%tobool = icmp eq %"class.clang::NamedDecl"* %this, null
|
||||
br i1 %tobool, label %cond.false, label %exit
|
||||
|
||||
cond.false:
|
||||
tail call void @__assert_fail()
|
||||
unreachable
|
||||
|
||||
exit:
|
||||
%DeclKind = getelementptr inbounds
|
||||
%"class.clang::NamedDecl",
|
||||
%"class.clang::NamedDecl"* %this, i64 0, i32 0
|
||||
%bf.load = load i32, i32* %DeclKind, align 4
|
||||
%call.i = tail call i8 @LVComputationKind(
|
||||
%"class.clang::NamedDecl"* %this,
|
||||
i32 %bf.load)
|
||||
ret i8 %call.i
|
||||
|
||||
; CHECK-SCO-SHRK-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
|
||||
; CHECK-SCO-SHRK: b LVComputationKind
|
||||
; CHECK-SCO-SHRK: #TC_RETURNd8
|
||||
; CHECK-SCO-SHRK: stdu 1, -{{[0-9]+}}(1)
|
||||
; CHECK-SCO-SHRK: bl __assert_fail
|
||||
;
|
||||
; CHECK-SCO-ONLY-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
|
||||
; CHECK-SCO-ONLY: stdu 1, -{{[0-9]+}}(1)
|
||||
; CHECK-SCO-ONLY: b LVComputationKind
|
||||
; CHECK-SCO-ONLY: #TC_RETURNd8
|
||||
; CHECK-SCO-ONLY: bl __assert_fail
|
||||
}
|
||||
|
||||
define fastcc i8 @LVComputationKind(
|
||||
%"class.clang::NamedDecl"* %D,
|
||||
i32 %computation) {
|
||||
ret i8 0
|
||||
}
|
|
@ -0,0 +1,191 @@
|
|||
; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
|
||||
; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
|
||||
; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
|
||||
|
||||
; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
|
||||
; only Power8 (and later) fully support LE.
|
||||
|
||||
%S_56 = type { [13 x i32], i32 }
|
||||
%S_64 = type { [15 x i32], i32 }
|
||||
%S_32 = type { [7 x i32], i32 }
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define void @callee_56_copy([7 x i64] %a, %S_56* %b) #0 { ret void }
|
||||
define void @callee_64_copy([8 x i64] %a, %S_64* %b) #0 { ret void }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @caller_56_reorder_copy(%S_56* %b, [7 x i64] %a) #1 {
|
||||
tail call void @callee_56_copy([7 x i64] %a, %S_56* %b)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_56_reorder_copy:
|
||||
; CHECK-SCO-NOT: stdu 1
|
||||
; CHECK-SCO: TC_RETURNd8 callee_56_copy
|
||||
}
|
||||
|
||||
define void @caller_64_reorder_copy(%S_64* %b, [8 x i64] %a) #1 {
|
||||
tail call void @callee_64_copy([8 x i64] %a, %S_64* %b)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_64_reorder_copy:
|
||||
; CHECK-SCO: bl callee_64_copy
|
||||
}
|
||||
|
||||
define void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b) #0 { ret void }
|
||||
define void @caller_64_64_copy([8 x i64] %a, [8 x i64] %b) #1 {
|
||||
tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_64_64_copy:
|
||||
; CHECK-SCO: b callee_64_64_copy
|
||||
}
|
||||
|
||||
define void @caller_64_64_reorder_copy([8 x i64] %a, [8 x i64] %b) #1 {
|
||||
tail call void @callee_64_64_copy([8 x i64] %b, [8 x i64] %a)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_64_64_reorder_copy:
|
||||
; CHECK-SCO: bl callee_64_64_copy
|
||||
}
|
||||
|
||||
define void @caller_64_64_undef_copy([8 x i64] %a, [8 x i64] %b) #1 {
|
||||
tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] undef)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_64_64_undef_copy:
|
||||
; CHECK-SCO: b callee_64_64_copy
|
||||
}
|
||||
|
||||
define void @arg8_callee(
|
||||
float %a, i32 signext %b, float %c, i32* %d,
|
||||
i8 zeroext %e, float %f, i32* %g, i32 signext %h)
|
||||
{
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @arg8_caller(float %a, i32 signext %b, i8 zeroext %c, i32* %d) {
|
||||
entry:
|
||||
tail call void @arg8_callee(float undef, i32 signext undef, float undef,
|
||||
i32* %d, i8 zeroext undef, float undef,
|
||||
i32* undef, i32 signext undef)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: arg8_caller:
|
||||
; CHECK-SCO: b arg8_callee
|
||||
}
|
||||
|
||||
; Struct return test
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define void @callee_sret_56(%S_56* noalias sret %agg.result) #0 { ret void }
|
||||
define void @callee_sret_32(%S_32* noalias sret %agg.result) #0 { ret void }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @caller_do_something_sret_32(%S_32* noalias sret %agg.result) #1 {
|
||||
%1 = alloca %S_56, align 4
|
||||
%2 = bitcast %S_56* %1 to i8*
|
||||
call void @callee_sret_56(%S_56* nonnull sret %1)
|
||||
tail call void @callee_sret_32(%S_32* sret %agg.result)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_do_something_sret_32:
|
||||
; CHECK-SCO: stdu 1
|
||||
; CHECK-SCO: bl callee_sret_56
|
||||
; CHECK-SCO: addi 1
|
||||
; CHECK-SCO: TC_RETURNd8 callee_sret_32
|
||||
}
|
||||
|
||||
define void @caller_local_sret_32(%S_32* %a) #1 {
|
||||
%tmp = alloca %S_32, align 4
|
||||
tail call void @callee_sret_32(%S_32* nonnull sret %tmp)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: caller_local_sret_32:
|
||||
; CHECK-SCO: bl callee_sret_32
|
||||
}
|
||||
|
||||
attributes #0 = { noinline nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
; vector <4 x i1> test
|
||||
|
||||
define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
|
||||
define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
|
||||
tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
|
||||
ret void
|
||||
|
||||
; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
|
||||
; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
|
||||
|
||||
; CHECK-SCO-LABEL: caller_v4i1_reorder:
|
||||
; CHECK-SCO: bl callee_v4i1
|
||||
|
||||
; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
|
||||
; CHECK-SCO-HASQPX: b callee_v4i1
|
||||
}
|
||||
|
||||
define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
|
||||
define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
|
||||
tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: f128_caller:
|
||||
; CHECK-SCO: b f128_callee
|
||||
}
|
||||
|
||||
; weak linkage test
|
||||
%class.T = type { [2 x i8] }
|
||||
|
||||
define weak_odr hidden void @wo_hcallee(%class.T* %this, i8* %c) { ret void }
|
||||
define void @wo_hcaller(%class.T* %this, i8* %c) {
|
||||
tail call void @wo_hcallee(%class.T* %this, i8* %c)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: wo_hcaller:
|
||||
; CHECK-SCO: b wo_hcallee
|
||||
}
|
||||
|
||||
define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void }
|
||||
define void @wo_pcaller(%class.T* %this, i8* %c) {
|
||||
tail call void @wo_pcallee(%class.T* %this, i8* %c)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: wo_pcaller:
|
||||
; CHECK-SCO: b wo_pcallee
|
||||
}
|
||||
|
||||
define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void }
|
||||
define void @wo_caller(%class.T* %this, i8* %c) {
|
||||
tail call void @wo_callee(%class.T* %this, i8* %c)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: wo_caller:
|
||||
; CHECK-SCO: bl wo_callee
|
||||
}
|
||||
|
||||
define weak protected void @w_pcallee(i8* %ptr) { ret void }
|
||||
define void @w_pcaller(i8* %ptr) {
|
||||
tail call void @w_pcallee(i8* %ptr)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: w_pcaller:
|
||||
; CHECK-SCO: b w_pcallee
|
||||
}
|
||||
|
||||
define weak hidden void @w_hcallee(i8* %ptr) { ret void }
|
||||
define void @w_hcaller(i8* %ptr) {
|
||||
tail call void @w_hcallee(i8* %ptr)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: w_hcaller:
|
||||
; CHECK-SCO: b w_hcallee
|
||||
}
|
||||
|
||||
define weak void @w_callee(i8* %ptr) { ret void }
|
||||
define void @w_caller(i8* %ptr) {
|
||||
tail call void @w_callee(i8* %ptr)
|
||||
ret void
|
||||
|
||||
; CHECK-SCO-LABEL: w_caller:
|
||||
; CHECK-SCO: bl w_callee
|
||||
}
|
Loading…
Reference in New Issue