forked from OSchip/llvm-project
[CUDA][HIP] Fix mangling number for local struct
MSVC and Itanium mangling use different mangling numbers for function-scope structs, which causes inconsistent mangled kernel names in device and host compilations. This patch uses Itanium mangling number for structs in for mangling device side names in CUDA/HIP host compilation on Windows to fix this issue. A state is added to ASTContext to indicate whether the current name mangling is for device side names in host compilation. Device and host mangling number are encoded/decoded as upper and lower half of 32 bit unsigned integer to fit into the original mangling number field for AST. Diagnostic will be emitted if a manglining number exceeds limit. Reviewed by: Artem Belevich, Reid Kleckner Differential Revision: https://reviews.llvm.org/D122734 Fixes: SWDEV-328515
This commit is contained in:
parent
981ed72a17
commit
11d3e31c60
|
@ -677,6 +677,9 @@ public:
|
|||
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
|
||||
};
|
||||
|
||||
/// Current CUDA name mangling is for device name in host compilation.
|
||||
bool CUDAMangleDeviceNameInHostCompilation = false;
|
||||
|
||||
/// Returns the dynamic AST node parent map context.
|
||||
ParentMapContext &getParentMapContext();
|
||||
|
||||
|
|
|
@ -11762,7 +11762,14 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
|
|||
|
||||
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
|
||||
auto I = MangleNumbers.find(ND);
|
||||
return I != MangleNumbers.end() ? I->second : 1;
|
||||
unsigned Res = I != MangleNumbers.end() ? I->second : 1;
|
||||
if (!LangOpts.CUDA || LangOpts.CUDAIsDevice)
|
||||
return Res;
|
||||
|
||||
// CUDA/HIP host compilation encodes host and device mangling numbers
|
||||
// as lower and upper half of 32 bit integer.
|
||||
Res = CUDAMangleDeviceNameInHostCompilation ? Res >> 16 : Res & 0xFFFF;
|
||||
return Res > 1 ? Res : 1;
|
||||
}
|
||||
|
||||
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
|
||||
|
|
|
@ -76,6 +76,20 @@ public:
|
|||
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
|
||||
return DeviceCtx->getManglingNumber(CallOperator);
|
||||
}
|
||||
|
||||
unsigned getManglingNumber(const TagDecl *TD,
|
||||
unsigned MSLocalManglingNumber) override {
|
||||
unsigned DeviceN = DeviceCtx->getManglingNumber(TD, MSLocalManglingNumber);
|
||||
unsigned HostN =
|
||||
MicrosoftNumberingContext::getManglingNumber(TD, MSLocalManglingNumber);
|
||||
if (DeviceN > 0xFFFF || HostN > 0xFFFF) {
|
||||
DiagnosticsEngine &Diags = TD->getASTContext().getDiagnostics();
|
||||
unsigned DiagID = Diags.getCustomDiagID(
|
||||
DiagnosticsEngine::Error, "Mangling number exceeds limit (65535)");
|
||||
Diags.Report(TD->getLocation(), DiagID);
|
||||
}
|
||||
return (DeviceN << 16) | HostN;
|
||||
}
|
||||
};
|
||||
|
||||
class MSSYCLNumberingContext : public MicrosoftNumberingContext {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/ReplaceConstant.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/SaveAndRestore.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
|
@ -260,6 +261,8 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
|
|||
}
|
||||
|
||||
std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
|
||||
llvm::SaveAndRestore<bool> MangleAsDevice(
|
||||
CGM.getContext().CUDAMangleDeviceNameInHostCompilation, true);
|
||||
GlobalDecl GD;
|
||||
// D could be either a kernel or a variable.
|
||||
if (auto *FD = dyn_cast<FunctionDecl>(ND))
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
// RUN: %clang_cc1 -emit-llvm -o - -aux-triple x86_64-pc-windows-msvc \
|
||||
// RUN: -fms-extensions -triple amdgcn-amd-amdhsa \
|
||||
// RUN: -target-cpu gfx1030 -fcuda-is-device -x hip %s \
|
||||
// RUN: | FileCheck -check-prefix=DEV %s
|
||||
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
|
||||
// RUN: -aux-target-cpu gfx1030 -x hip %s \
|
||||
// RUN: | FileCheck -check-prefix=HOST %s
|
||||
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
|
||||
// RUN: -aux-target-cpu gfx1030 -x hip %s \
|
||||
// RUN: | FileCheck -check-prefix=HOST-NEG %s
|
||||
|
||||
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||
// RUN: -fms-extensions -x c++ %s \
|
||||
// RUN: | FileCheck -check-prefix=CPP %s
|
||||
|
||||
#if __HIP__
|
||||
#include "Inputs/cuda.h"
|
||||
#endif
|
||||
|
||||
// Check local struct 'Op' uses Itanium mangling number instead of MSVC mangling
|
||||
// number in device side name mangling. It is the same in device and host
|
||||
// compilation.
|
||||
|
||||
// DEV: define amdgpu_kernel void @_Z6kernelIZN4TestIiE3runEvE2OpEvv(
|
||||
|
||||
// HOST-DAG: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2OpEvv\00"
|
||||
|
||||
// HOST-NEG-NOT: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2Op_1Evv\00"
|
||||
#if __HIP__
|
||||
template<typename T>
|
||||
__attribute__((global)) void kernel()
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check local struct 'Op' uses MSVC mangling number in host function name mangling.
|
||||
// It is the same when compiled as HIP or C++ program.
|
||||
|
||||
// HOST-DAG: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
|
||||
// CPP: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
|
||||
template<typename T>
|
||||
void fun()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Test {
|
||||
public:
|
||||
void run()
|
||||
{
|
||||
struct Op
|
||||
{
|
||||
};
|
||||
#if __HIP__
|
||||
kernel<Op><<<1, 1>>>();
|
||||
#endif
|
||||
fun<Op>();
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
Test<int> A;
|
||||
A.run();
|
||||
}
|
Loading…
Reference in New Issue