[CUDA][HIP] Fix mangling number for local struct

MSVC and Itanium mangling use different mangling numbers
for function-scope structs, which causes inconsistent
mangled kernel names in device and host compilations.

This patch uses Itanium mangling number for structs
in for mangling device side names in CUDA/HIP host
compilation on Windows to fix this issue.

A state is added to ASTContext to indicate whether the
current name mangling is for device side names in host
compilation. Device and host mangling number
are encoded/decoded as upper and lower half of 32 bit
unsigned integer to fit into the original mangling number
field for AST. Diagnostic will be emitted if a manglining
number exceeds limit.

Reviewed by: Artem Belevich, Reid Kleckner

Differential Revision: https://reviews.llvm.org/D122734

Fixes: SWDEV-328515
This commit is contained in:
Yaxun (Sam) Liu 2022-03-30 10:33:03 -04:00
parent 981ed72a17
commit 11d3e31c60
5 changed files with 96 additions and 1 deletions

View File

@ -677,6 +677,9 @@ public:
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
};
/// Current CUDA name mangling is for device name in host compilation.
bool CUDAMangleDeviceNameInHostCompilation = false;
/// Returns the dynamic AST node parent map context.
ParentMapContext &getParentMapContext();

View File

@ -11762,7 +11762,14 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
auto I = MangleNumbers.find(ND);
return I != MangleNumbers.end() ? I->second : 1;
unsigned Res = I != MangleNumbers.end() ? I->second : 1;
if (!LangOpts.CUDA || LangOpts.CUDAIsDevice)
return Res;
// CUDA/HIP host compilation encodes host and device mangling numbers
// as lower and upper half of 32 bit integer.
Res = CUDAMangleDeviceNameInHostCompilation ? Res >> 16 : Res & 0xFFFF;
return Res > 1 ? Res : 1;
}
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {

View File

@ -76,6 +76,20 @@ public:
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
return DeviceCtx->getManglingNumber(CallOperator);
}
unsigned getManglingNumber(const TagDecl *TD,
unsigned MSLocalManglingNumber) override {
unsigned DeviceN = DeviceCtx->getManglingNumber(TD, MSLocalManglingNumber);
unsigned HostN =
MicrosoftNumberingContext::getManglingNumber(TD, MSLocalManglingNumber);
if (DeviceN > 0xFFFF || HostN > 0xFFFF) {
DiagnosticsEngine &Diags = TD->getASTContext().getDiagnostics();
unsigned DiagID = Diags.getCustomDiagID(
DiagnosticsEngine::Error, "Mangling number exceeds limit (65535)");
Diags.Report(TD->getLocation(), DiagID);
}
return (DeviceN << 16) | HostN;
}
};
class MSSYCLNumberingContext : public MicrosoftNumberingContext {

View File

@ -24,6 +24,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
using namespace CodeGen;
@ -260,6 +261,8 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
}
std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
llvm::SaveAndRestore<bool> MangleAsDevice(
CGM.getContext().CUDAMangleDeviceNameInHostCompilation, true);
GlobalDecl GD;
// D could be either a kernel or a variable.
if (auto *FD = dyn_cast<FunctionDecl>(ND))

View File

@ -0,0 +1,68 @@
// RUN: %clang_cc1 -emit-llvm -o - -aux-triple x86_64-pc-windows-msvc \
// RUN: -fms-extensions -triple amdgcn-amd-amdhsa \
// RUN: -target-cpu gfx1030 -fcuda-is-device -x hip %s \
// RUN: | FileCheck -check-prefix=DEV %s
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
// RUN: -aux-target-cpu gfx1030 -x hip %s \
// RUN: | FileCheck -check-prefix=HOST %s
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
// RUN: -aux-target-cpu gfx1030 -x hip %s \
// RUN: | FileCheck -check-prefix=HOST-NEG %s
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
// RUN: -fms-extensions -x c++ %s \
// RUN: | FileCheck -check-prefix=CPP %s
#if __HIP__
#include "Inputs/cuda.h"
#endif
// Check local struct 'Op' uses Itanium mangling number instead of MSVC mangling
// number in device side name mangling. It is the same in device and host
// compilation.
// DEV: define amdgpu_kernel void @_Z6kernelIZN4TestIiE3runEvE2OpEvv(
// HOST-DAG: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2OpEvv\00"
// HOST-NEG-NOT: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2Op_1Evv\00"
#if __HIP__
template<typename T>
__attribute__((global)) void kernel()
{
}
#endif
// Check local struct 'Op' uses MSVC mangling number in host function name mangling.
// It is the same when compiled as HIP or C++ program.
// HOST-DAG: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
// CPP: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
template<typename T>
void fun()
{
}
template <typename T>
class Test {
public:
void run()
{
struct Op
{
};
#if __HIP__
kernel<Op><<<1, 1>>>();
#endif
fun<Op>();
}
};
int main() {
Test<int> A;
A.run();
}