forked from OSchip/llvm-project
AMDGPU: Add option to stress calls
This inverts the behavior of the AlwaysInline pass to mark every function not already marked alwaysinline as noinline. llvm-svn: 313865
This commit is contained in:
parent
6810367610
commit
1390af2dd2
|
@ -21,6 +21,12 @@ using namespace llvm;
|
|||
|
||||
namespace {
|
||||
|
||||
static cl::opt<bool> StressCalls(
|
||||
"amdgpu-stress-function-calls",
|
||||
cl::Hidden,
|
||||
cl::desc("Force all functions to be noinline"),
|
||||
cl::init(false));
|
||||
|
||||
class AMDGPUAlwaysInline : public ModulePass {
|
||||
bool GlobalOpt;
|
||||
|
||||
|
@ -57,9 +63,13 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
|||
}
|
||||
}
|
||||
|
||||
auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline;
|
||||
auto IncompatAttr
|
||||
= StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
|
||||
|
||||
for (Function &F : M) {
|
||||
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
|
||||
!F.hasFnAttribute(Attribute::NoInline))
|
||||
!F.hasFnAttribute(IncompatAttr))
|
||||
FuncsToClone.push_back(&F);
|
||||
}
|
||||
|
||||
|
@ -71,8 +81,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
|||
}
|
||||
|
||||
for (Function &F : M) {
|
||||
if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
|
||||
F.addFnAttr(Attribute::AlwaysInline);
|
||||
if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) {
|
||||
F.addFnAttr(NewAttr);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: opt -S -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck %s
|
||||
|
||||
; CHECK: define internal fastcc i32 @alwaysinline_func(i32 %a) #0 {
|
||||
define internal fastcc i32 @alwaysinline_func(i32 %a) alwaysinline {
|
||||
entry:
|
||||
%tmp0 = add i32 %a, 1
|
||||
ret i32 %tmp0
|
||||
}
|
||||
|
||||
; CHECK: define internal fastcc i32 @noinline_func(i32 %a) #1 {
|
||||
define internal fastcc i32 @noinline_func(i32 %a) noinline {
|
||||
entry:
|
||||
%tmp0 = add i32 %a, 2
|
||||
ret i32 %tmp0
|
||||
}
|
||||
|
||||
; CHECK: define internal fastcc i32 @unmarked_func(i32 %a) #1 {
|
||||
define internal fastcc i32 @unmarked_func(i32 %a) {
|
||||
entry:
|
||||
%tmp0 = add i32 %a, 3
|
||||
ret i32 %tmp0
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @alwaysinline_func(i32 1)
|
||||
store volatile i32 %tmp0, i32 addrspace(1)* %out
|
||||
%tmp1 = call i32 @noinline_func(i32 1)
|
||||
store volatile i32 %tmp1, i32 addrspace(1)* %out
|
||||
%tmp2 = call i32 @unmarked_func(i32 1)
|
||||
store volatile i32 %tmp2, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { alwaysinline }
|
||||
; CHECK: attributes #1 = { noinline }
|
Loading…
Reference in New Issue