forked from OSchip/llvm-project
AMDGPU: Run internalize symbols at -O0
The relocations used for externally visible functions aren't supported, so the direct call emitted ends up hitting a linker error. llvm-svn: 313616
This commit is contained in:
parent
515cffb8f6
commit
e745d9963e
|
@ -319,16 +319,34 @@ static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
|
|||
});
|
||||
}
|
||||
|
||||
/// Predicate for Internalize pass.
|
||||
bool mustPreserveGV(const GlobalValue &GV) {
|
||||
if (const Function *F = dyn_cast<Function>(&GV))
|
||||
return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
|
||||
|
||||
return !GV.use_empty();
|
||||
}
|
||||
|
||||
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
||||
Builder.DivergentTarget = true;
|
||||
|
||||
bool EnableOpt = getOptLevel() > CodeGenOpt::None;
|
||||
bool Internalize = InternalizeSymbols && EnableOpt &&
|
||||
(getTargetTriple().getArch() == Triple::amdgcn);
|
||||
bool Internalize = InternalizeSymbols;
|
||||
bool EarlyInline = EarlyInlineAll && EnableOpt;
|
||||
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
|
||||
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
|
||||
|
||||
if (Internalize) {
|
||||
// If we're generating code, we always have the whole program available. The
|
||||
// relocations expected for externally visible functions aren't supported,
|
||||
// so make sure every non-entry function is hidden.
|
||||
Builder.addExtension(
|
||||
PassManagerBuilder::EP_EnabledOnOptLevel0,
|
||||
[](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
|
||||
PM.add(createInternalizePass(mustPreserveGV));
|
||||
});
|
||||
}
|
||||
|
||||
Builder.addExtension(
|
||||
PassManagerBuilder::EP_ModuleOptimizerEarly,
|
||||
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
|
||||
|
@ -339,25 +357,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
|
|||
}
|
||||
PM.add(createAMDGPUUnifyMetadataPass());
|
||||
if (Internalize) {
|
||||
PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
|
||||
if (const Function *F = dyn_cast<Function>(&GV)) {
|
||||
if (F->isDeclaration())
|
||||
return true;
|
||||
switch (F->getCallingConv()) {
|
||||
default:
|
||||
return false;
|
||||
case CallingConv::AMDGPU_VS:
|
||||
case CallingConv::AMDGPU_HS:
|
||||
case CallingConv::AMDGPU_GS:
|
||||
case CallingConv::AMDGPU_PS:
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
case CallingConv::SPIR_KERNEL:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return !GV.use_empty();
|
||||
}));
|
||||
PM.add(createInternalizePass(mustPreserveGV));
|
||||
PM.add(createGlobalDCEPass());
|
||||
}
|
||||
if (EarlyInline)
|
||||
|
|
|
@ -1,35 +1,68 @@
|
|||
; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
|
||||
; CHECK-NOT: unused
|
||||
; CHECK-NOT: foo_used
|
||||
; CHECK: gvar_used
|
||||
; CHECK: main_kernel
|
||||
; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s
|
||||
; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s
|
||||
|
||||
; OPT-NOT: gvar_unused
|
||||
; OPTNONE: gvar_unused
|
||||
@gvar_unused = addrspace(1) global i32 undef, align 4
|
||||
|
||||
; ALL: gvar_used
|
||||
@gvar_used = addrspace(1) global i32 undef, align 4
|
||||
|
||||
; Function Attrs: alwaysinline nounwind
|
||||
define amdgpu_kernel void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
|
||||
; ALL: define internal fastcc void @func_used(
|
||||
define fastcc void @func_used(i32 addrspace(1)* %out, i32 %tid) #1 {
|
||||
entry:
|
||||
store i32 1, i32 addrspace(1)* %out
|
||||
store volatile i32 %tid, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: alwaysinline nounwind
|
||||
define amdgpu_kernel void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
|
||||
; ALL: define internal fastcc void @func_used_noinline(
|
||||
define fastcc void @func_used_noinline(i32 addrspace(1)* %out, i32 %tid) #2 {
|
||||
entry:
|
||||
store i32 %tid, i32 addrspace(1)* %out
|
||||
store volatile i32 %tid, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPTNONE: define internal fastcc void @func_used_alwaysinline(
|
||||
; OPT-NOT: @func_used_alwaysinline
|
||||
define fastcc void @func_used_alwaysinline(i32 addrspace(1)* %out, i32 %tid) #3 {
|
||||
entry:
|
||||
store volatile i32 %tid, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPTNONE: define internal void @func_unused(
|
||||
; OPT-NOT: @func_unused
|
||||
define void @func_unused(i32 addrspace(1)* %out, i32 %tid) #2 {
|
||||
entry:
|
||||
store volatile i32 %tid, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL: define amdgpu_kernel void @kernel_unused(
|
||||
define amdgpu_kernel void @kernel_unused(i32 addrspace(1)* %out) #1 {
|
||||
entry:
|
||||
store volatile i32 1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL: define amdgpu_kernel void @main_kernel()
|
||||
; ALL: tail call i32 @llvm.amdgcn.workitem.id.x
|
||||
; ALL: tail call fastcc void @func_used
|
||||
; ALL: tail call fastcc void @func_used_noinline
|
||||
; ALL: store volatile
|
||||
; ALL: ret void
|
||||
define amdgpu_kernel void @main_kernel() {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
tail call fastcc void @func_used(i32 addrspace(1)* @gvar_used, i32 %tid)
|
||||
tail call fastcc void @func_used_noinline(i32 addrspace(1)* @gvar_used, i32 %tid)
|
||||
tail call fastcc void @func_used_alwaysinline(i32 addrspace(1)* @gvar_used, i32 %tid)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { noinline nounwind }
|
||||
attributes #3 = { alwaysinline nounwind }
|
||||
|
|
Loading…
Reference in New Issue