AMDGPU: Run internalize symbols at -O0

The relocations used for externally visible functions
aren't supported, so the direct call emitted ends
up hitting a linker error.

llvm-svn: 313616
This commit is contained in:
Matt Arsenault 2017-09-19 07:40:11 +00:00
parent 515cffb8f6
commit e745d9963e
2 changed files with 69 additions and 36 deletions

View File

@ -319,16 +319,34 @@ static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
});
}
/// Predicate for Internalize pass.
bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
return !GV.use_empty();
}
void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.DivergentTarget = true;
bool EnableOpt = getOptLevel() > CodeGenOpt::None;
bool Internalize = InternalizeSymbols && EnableOpt &&
(getTargetTriple().getArch() == Triple::amdgcn);
bool Internalize = InternalizeSymbols;
bool EarlyInline = EarlyInlineAll && EnableOpt;
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
if (Internalize) {
// If we're generating code, we always have the whole program available. The
// relocations expected for externally visible functions aren't supported,
// so make sure every non-entry function is hidden.
Builder.addExtension(
PassManagerBuilder::EP_EnabledOnOptLevel0,
[](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createInternalizePass(mustPreserveGV));
});
}
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
@ -339,25 +357,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
}
PM.add(createAMDGPUUnifyMetadataPass());
if (Internalize) {
PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
if (const Function *F = dyn_cast<Function>(&GV)) {
if (F->isDeclaration())
return true;
switch (F->getCallingConv()) {
default:
return false;
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
}
}
return !GV.use_empty();
}));
PM.add(createInternalizePass(mustPreserveGV));
PM.add(createGlobalDCEPass());
}
if (EarlyInline)

View File

@ -1,35 +1,68 @@
; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
; CHECK-NOT: unused
; CHECK-NOT: foo_used
; CHECK: gvar_used
; CHECK: main_kernel
; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s
; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s
; OPT-NOT: gvar_unused
; OPTNONE: gvar_unused
@gvar_unused = addrspace(1) global i32 undef, align 4
; ALL: gvar_used
@gvar_used = addrspace(1) global i32 undef, align 4
; Function Attrs: alwaysinline nounwind
define amdgpu_kernel void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
; ALL: define internal fastcc void @func_used(
define fastcc void @func_used(i32 addrspace(1)* %out, i32 %tid) #1 {
entry:
store i32 1, i32 addrspace(1)* %out
store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
; Function Attrs: alwaysinline nounwind
define amdgpu_kernel void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
; ALL: define internal fastcc void @func_used_noinline(
define fastcc void @func_used_noinline(i32 addrspace(1)* %out, i32 %tid) #2 {
entry:
store i32 %tid, i32 addrspace(1)* %out
store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
; OPTNONE: define internal fastcc void @func_used_alwaysinline(
; OPT-NOT: @func_used_alwaysinline
define fastcc void @func_used_alwaysinline(i32 addrspace(1)* %out, i32 %tid) #3 {
entry:
store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
; OPTNONE: define internal void @func_unused(
; OPT-NOT: @func_unused
define void @func_unused(i32 addrspace(1)* %out, i32 %tid) #2 {
entry:
store volatile i32 %tid, i32 addrspace(1)* %out
ret void
}
; ALL: define amdgpu_kernel void @kernel_unused(
define amdgpu_kernel void @kernel_unused(i32 addrspace(1)* %out) #1 {
entry:
store volatile i32 1, i32 addrspace(1)* %out
ret void
}
; ALL: define amdgpu_kernel void @main_kernel()
; ALL: tail call i32 @llvm.amdgcn.workitem.id.x
; ALL: tail call fastcc void @func_used
; ALL: tail call fastcc void @func_used_noinline
; ALL: store volatile
; ALL: ret void
define amdgpu_kernel void @main_kernel() {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
tail call fastcc void @func_used(i32 addrspace(1)* @gvar_used, i32 %tid)
tail call fastcc void @func_used_noinline(i32 addrspace(1)* @gvar_used, i32 %tid)
tail call fastcc void @func_used_alwaysinline(i32 addrspace(1)* @gvar_used, i32 %tid)
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
attributes #1 = { alwaysinline nounwind }
attributes #1 = { nounwind }
attributes #2 = { noinline nounwind }
attributes #3 = { alwaysinline nounwind }