[CUDA][HIP] Externalize kernels in anonymous name space

kernels in anonymous name space needs to have unique name
to avoid duplicate symbols.

Fixes: https://github.com/llvm/llvm-project/issues/54560

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D123353
This commit is contained in:
Yaxun (Sam) Liu 2022-04-07 22:57:56 -04:00
parent a91c77ee28
commit 4ea1d43509
6 changed files with 52 additions and 25 deletions

View File

@ -3289,11 +3289,11 @@ public:
/// Return a new OMPTraitInfo object owned by this context.
OMPTraitInfo &getNewOMPTraitInfo();
/// Whether a C++ static variable may be externalized.
bool mayExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel may be externalized.
bool mayExternalize(const Decl *D) const;
/// Whether a C++ static variable should be externalized.
bool shouldExternalizeStaticVar(const Decl *D) const;
/// Whether a C++ static variable or CUDA/HIP kernel should be externalized.
bool shouldExternalize(const Decl *D) const;
StringRef getCUIDHash() const;

View File

@ -11328,7 +11328,7 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
// name between the host and device compilation which is the same for the
// same compilation unit whereas different among different compilation
// units.
if (Context.shouldExternalizeStaticVar(D))
if (Context.shouldExternalize(D))
return GVA_StrongExternal;
}
return L;
@ -12277,7 +12277,7 @@ operator<<(const StreamingDiagnostic &DB,
return DB << "a prior #pragma section";
}
bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
bool ASTContext::mayExternalize(const Decl *D) const {
bool IsStaticVar =
isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
@ -12285,14 +12285,16 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
(D->hasAttr<CUDAConstantAttr>() &&
!D->getAttr<CUDAConstantAttr>()->isImplicit());
// CUDA/HIP: static managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage.
return IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar);
// a declaration in IR, therefore cannot have internal linkage. Kernels in
// anonymous name space needs to be externalized to avoid duplicate symbols.
return (IsStaticVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
(D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
}
bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
return mayExternalizeStaticVar(D) &&
(D->hasAttr<HIPManagedAttr>() ||
bool ASTContext::shouldExternalize(const Decl *D) const {
return mayExternalize(D) &&
(D->hasAttr<HIPManagedAttr>() || D->hasAttr<CUDAGlobalAttr>() ||
CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
}

View File

@ -281,13 +281,13 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
DeviceSideName = std::string(ND->getIdentifier()->getName());
// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
!CGM.getLangOpts().CUID.empty()) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << DeviceSideName;
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
DeviceSideName = std::string(Out.str());
}
return DeviceSideName;

View File

@ -1376,10 +1376,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
}
// Make unique name for device side static file-scope variable for HIP.
if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
if (CGM.getContext().shouldExternalize(ND) &&
CGM.getLangOpts().GPURelocatableDeviceCode &&
CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
CGM.printPostfixForExternalizedStaticVar(Out);
CGM.printPostfixForExternalizedDecl(Out, ND);
return std::string(Out.str());
}
@ -1446,8 +1446,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// static device variable depends on whether the variable is referenced by
// a host or device host function. Therefore the mangled name cannot be
// cached.
if (!LangOpts.CUDAIsDevice ||
!getContext().mayExternalizeStaticVar(GD.getDecl())) {
if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) {
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
@ -1467,7 +1466,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
// directly between host- and device-compilations, the host- and
// device-mangling in host compilation could help catching certain ones.
assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() ||
getLangOpts().CUDAIsDevice ||
getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice ||
(getContext().getAuxTargetInfo() &&
(getContext().getAuxTargetInfo()->getCXXABI() !=
getContext().getTargetInfo().getCXXABI())) ||
@ -6772,7 +6771,8 @@ bool CodeGenModule::stopAutoInit() {
return false;
}
void CodeGenModule::printPostfixForExternalizedStaticVar(
llvm::raw_ostream &OS) const {
OS << "__static__" << getContext().getCUIDHash();
void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const {
OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
<< getContext().getCUIDHash();
}

View File

@ -1457,9 +1457,10 @@ public:
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();
/// Print the postfix for externalized static variable for single source
/// offloading languages CUDA and HIP.
void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
/// Print the postfix for externalized static variable or kernels for single
/// source offloading languages CUDA and HIP.
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
const Decl *D) const;
private:
llvm::Constant *GetOrCreateLLVMFunction(

View File

@ -0,0 +1,24 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.dev
// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
// RUN: -emit-llvm -o - -x hip %s > %t.host
// RUN: cat %t.dev %t.host | FileCheck %s
#include "Inputs/cuda.h"
// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]
namespace {
__global__ void kernel() {
}
}
void test() {
kernel<<<1, 1>>>();
}