forked from OSchip/llvm-project
[HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined symbols
By default clang emits complete contructors as alias of base constructors if they are the same. The backend is supposed to emit symbols for the alias, otherwise it causes undefined symbols. @yaxunl observed that this issue is related to the llvm options `-amdgpu-early-inline-all=true` and `-amdgpu-function-calls=false`. This issue is resolved by only inlining global values with internal linkage. The `getCalleeFunction()` in AMDGPUResourceUsageAnalysis also had to be extended to support aliases to functions. inline-calls.ll was corrected appropriately. Reviewed By: yaxunl, #amdgpu Differential Revision: https://reviews.llvm.org/D109707
This commit is contained in:
parent
4e572db0c2
commit
03375a3fb3
|
@ -5089,9 +5089,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
}
|
||||
|
||||
// Enable -mconstructor-aliases except on darwin, where we have to work around
|
||||
// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
|
||||
// where aliases aren't supported.
|
||||
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
|
||||
// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
|
||||
// aliases aren't supported.
|
||||
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
|
||||
CmdArgs.push_back("-mconstructor-aliases");
|
||||
|
||||
// Darwin's kernel doesn't support guard variables; just die if we
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
// REQUIRES: amdgpu-registered-target, clang-driver
|
||||
|
||||
// RUN: %clang --offload-arch=gfx906 --cuda-device-only -nogpulib -nogpuinc -x hip -emit-llvm -S -o - %s \
|
||||
// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK: %struct.B = type { i8 }
|
||||
struct B {
|
||||
|
||||
// CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
|
||||
__device__ B(int x);
|
||||
};
|
||||
|
||||
__device__ B::B(int x) {
|
||||
}
|
|
@ -15,6 +15,7 @@
|
|||
#include "AMDGPU.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/CodeGen/CommandFlags.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
|
|||
|
||||
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
|
||||
SmallPtrSet<Function *, 8> FuncsToNoInline;
|
||||
Triple TT(M.getTargetTriple());
|
||||
|
||||
for (GlobalAlias &A : M.aliases()) {
|
||||
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
|
||||
if (TT.getArch() == Triple::amdgcn &&
|
||||
A.getLinkage() != GlobalValue::InternalLinkage)
|
||||
continue;
|
||||
A.replaceAllUsesWith(F);
|
||||
AliasesToRemove.push_back(&A);
|
||||
}
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/Analysis/CallGraph.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/GlobalAlias.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
|
|||
assert(Op.getImm() == 0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
|
||||
return cast<Function>(GA->getOperand(0));
|
||||
return cast<Function>(Op.getGlobal());
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple r600-unknown-linux-gnu -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefix=R600
|
||||
|
||||
; ALL-NOT: {{^}}func:
|
||||
define internal i32 @func(i32 %a) {
|
||||
|
@ -9,7 +9,7 @@ entry:
|
|||
ret i32 %tmp0
|
||||
}
|
||||
|
||||
; ALL: {{^}}kernel:
|
||||
; CHECK: {{^}}kernel:
|
||||
; GCN-NOT: s_swappc_b64
|
||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
|
@ -18,12 +18,13 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: func_alias
|
||||
; ALL-NOT: func_alias
|
||||
; CHECK: func_alias
|
||||
; R600-NOT: func_alias
|
||||
@func_alias = alias i32 (i32), i32 (i32)* @func
|
||||
|
||||
; ALL: {{^}}kernel3:
|
||||
; CHECK-NOT: {{^}}kernel3:
|
||||
; GCN-NOT: s_swappc_b64
|
||||
; R600: {{^}}kernel3:
|
||||
define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tmp0 = call i32 @func_alias(i32 1)
|
||||
|
|
Loading…
Reference in New Issue