diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 03d6a0fbe6ee..e04766f93d69 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -677,6 +677,9 @@ public: ~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; } }; + /// Current CUDA name mangling is for device name in host compilation. + bool CUDAMangleDeviceNameInHostCompilation = false; + /// Returns the dynamic AST node parent map context. ParentMapContext &getParentMapContext(); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 85d2bcf268f3..c91839c50e95 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11762,7 +11762,14 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) { unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const { auto I = MangleNumbers.find(ND); - return I != MangleNumbers.end() ? I->second : 1; + unsigned Res = I != MangleNumbers.end() ? I->second : 1; + if (!LangOpts.CUDA || LangOpts.CUDAIsDevice) + return Res; + + // CUDA/HIP host compilation encodes host and device mangling numbers + // as lower and upper half of 32 bit integer. + Res = CUDAMangleDeviceNameInHostCompilation ? Res >> 16 : Res & 0xFFFF; + return Res > 1 ? Res : 1; } void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) { diff --git a/clang/lib/AST/MicrosoftCXXABI.cpp b/clang/lib/AST/MicrosoftCXXABI.cpp index b7dc0e62e66a..b3e26bc889c1 100644 --- a/clang/lib/AST/MicrosoftCXXABI.cpp +++ b/clang/lib/AST/MicrosoftCXXABI.cpp @@ -76,6 +76,20 @@ public: unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override { return DeviceCtx->getManglingNumber(CallOperator); } + + unsigned getManglingNumber(const TagDecl *TD, + unsigned MSLocalManglingNumber) override { + unsigned DeviceN = DeviceCtx->getManglingNumber(TD, MSLocalManglingNumber); + unsigned HostN = + MicrosoftNumberingContext::getManglingNumber(TD, MSLocalManglingNumber); + if (DeviceN > 0xFFFF || HostN > 0xFFFF) { + DiagnosticsEngine &Diags = TD->getASTContext().getDiagnostics(); + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Mangling number exceeds limit (65535)"); + Diags.Report(TD->getLocation(), DiagID); + } + return (DeviceN << 16) | HostN; + } }; class MSSYCLNumberingContext : public MicrosoftNumberingContext { diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 4390228297d0..961f7e39c8f6 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/Support/Format.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace CodeGen; @@ -260,6 +261,8 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { } std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { + llvm::SaveAndRestore MangleAsDevice( + CGM.getContext().CUDAMangleDeviceNameInHostCompilation, true); GlobalDecl GD; // D could be either a kernel or a variable. if (auto *FD = dyn_cast(ND)) diff --git a/clang/test/CodeGenCUDA/struct-mangling-number.cu b/clang/test/CodeGenCUDA/struct-mangling-number.cu new file mode 100644 index 000000000000..e9e80671c28c --- /dev/null +++ b/clang/test/CodeGenCUDA/struct-mangling-number.cu @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -emit-llvm -o - -aux-triple x86_64-pc-windows-msvc \ +// RUN: -fms-extensions -triple amdgcn-amd-amdhsa \ +// RUN: -target-cpu gfx1030 -fcuda-is-device -x hip %s \ +// RUN: | FileCheck -check-prefix=DEV %s + +// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \ +// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \ +// RUN: -aux-target-cpu gfx1030 -x hip %s \ +// RUN: | FileCheck -check-prefix=HOST %s + +// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \ +// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \ +// RUN: -aux-target-cpu gfx1030 -x hip %s \ +// RUN: | FileCheck -check-prefix=HOST-NEG %s + +// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \ +// RUN: -fms-extensions -x c++ %s \ +// RUN: | FileCheck -check-prefix=CPP %s + +#if __HIP__ +#include "Inputs/cuda.h" +#endif + +// Check local struct 'Op' uses Itanium mangling number instead of MSVC mangling +// number in device side name mangling. It is the same in device and host +// compilation. + +// DEV: define amdgpu_kernel void @_Z6kernelIZN4TestIiE3runEvE2OpEvv( + +// HOST-DAG: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2OpEvv\00" + +// HOST-NEG-NOT: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2Op_1Evv\00" +#if __HIP__ +template +__attribute__((global)) void kernel() +{ +} +#endif + +// Check local struct 'Op' uses MSVC mangling number in host function name mangling. +// It is the same when compiled as HIP or C++ program. + +// HOST-DAG: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"() +// CPP: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"() +template +void fun() +{ +} + +template +class Test { +public: + void run() + { + struct Op + { + }; +#if __HIP__ + kernel<<<1, 1>>>(); +#endif + fun(); + } +}; + +int main() { + Test A; + A.run(); +}