forked from OSchip/llvm-project
[CUDA][HIP] Fix mangling number for local struct
MSVC and Itanium mangling use different mangling numbers for function-scope structs, which causes inconsistent mangled kernel names in device and host compilations. This patch uses Itanium mangling number for structs in for mangling device side names in CUDA/HIP host compilation on Windows to fix this issue. A state is added to ASTContext to indicate whether the current name mangling is for device side names in host compilation. Device and host mangling number are encoded/decoded as upper and lower half of 32 bit unsigned integer to fit into the original mangling number field for AST. Diagnostic will be emitted if a manglining number exceeds limit. Reviewed by: Artem Belevich, Reid Kleckner Differential Revision: https://reviews.llvm.org/D122734 Fixes: SWDEV-328515
This commit is contained in:
parent
981ed72a17
commit
11d3e31c60
|
@ -677,6 +677,9 @@ public:
|
||||||
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
|
~CUDAConstantEvalContextRAII() { Ctx.CUDAConstantEvalCtx = SavedCtx; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Current CUDA name mangling is for device name in host compilation.
|
||||||
|
bool CUDAMangleDeviceNameInHostCompilation = false;
|
||||||
|
|
||||||
/// Returns the dynamic AST node parent map context.
|
/// Returns the dynamic AST node parent map context.
|
||||||
ParentMapContext &getParentMapContext();
|
ParentMapContext &getParentMapContext();
|
||||||
|
|
||||||
|
|
|
@ -11762,7 +11762,14 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
|
||||||
|
|
||||||
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
|
unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
|
||||||
auto I = MangleNumbers.find(ND);
|
auto I = MangleNumbers.find(ND);
|
||||||
return I != MangleNumbers.end() ? I->second : 1;
|
unsigned Res = I != MangleNumbers.end() ? I->second : 1;
|
||||||
|
if (!LangOpts.CUDA || LangOpts.CUDAIsDevice)
|
||||||
|
return Res;
|
||||||
|
|
||||||
|
// CUDA/HIP host compilation encodes host and device mangling numbers
|
||||||
|
// as lower and upper half of 32 bit integer.
|
||||||
|
Res = CUDAMangleDeviceNameInHostCompilation ? Res >> 16 : Res & 0xFFFF;
|
||||||
|
return Res > 1 ? Res : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
|
void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
|
||||||
|
|
|
@ -76,6 +76,20 @@ public:
|
||||||
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
|
unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
|
||||||
return DeviceCtx->getManglingNumber(CallOperator);
|
return DeviceCtx->getManglingNumber(CallOperator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned getManglingNumber(const TagDecl *TD,
|
||||||
|
unsigned MSLocalManglingNumber) override {
|
||||||
|
unsigned DeviceN = DeviceCtx->getManglingNumber(TD, MSLocalManglingNumber);
|
||||||
|
unsigned HostN =
|
||||||
|
MicrosoftNumberingContext::getManglingNumber(TD, MSLocalManglingNumber);
|
||||||
|
if (DeviceN > 0xFFFF || HostN > 0xFFFF) {
|
||||||
|
DiagnosticsEngine &Diags = TD->getASTContext().getDiagnostics();
|
||||||
|
unsigned DiagID = Diags.getCustomDiagID(
|
||||||
|
DiagnosticsEngine::Error, "Mangling number exceeds limit (65535)");
|
||||||
|
Diags.Report(TD->getLocation(), DiagID);
|
||||||
|
}
|
||||||
|
return (DeviceN << 16) | HostN;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class MSSYCLNumberingContext : public MicrosoftNumberingContext {
|
class MSSYCLNumberingContext : public MicrosoftNumberingContext {
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "llvm/IR/DerivedTypes.h"
|
#include "llvm/IR/DerivedTypes.h"
|
||||||
#include "llvm/IR/ReplaceConstant.h"
|
#include "llvm/IR/ReplaceConstant.h"
|
||||||
#include "llvm/Support/Format.h"
|
#include "llvm/Support/Format.h"
|
||||||
|
#include "llvm/Support/SaveAndRestore.h"
|
||||||
|
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
using namespace CodeGen;
|
using namespace CodeGen;
|
||||||
|
@ -260,6 +261,8 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
|
std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
|
||||||
|
llvm::SaveAndRestore<bool> MangleAsDevice(
|
||||||
|
CGM.getContext().CUDAMangleDeviceNameInHostCompilation, true);
|
||||||
GlobalDecl GD;
|
GlobalDecl GD;
|
||||||
// D could be either a kernel or a variable.
|
// D could be either a kernel or a variable.
|
||||||
if (auto *FD = dyn_cast<FunctionDecl>(ND))
|
if (auto *FD = dyn_cast<FunctionDecl>(ND))
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
// RUN: %clang_cc1 -emit-llvm -o - -aux-triple x86_64-pc-windows-msvc \
|
||||||
|
// RUN: -fms-extensions -triple amdgcn-amd-amdhsa \
|
||||||
|
// RUN: -target-cpu gfx1030 -fcuda-is-device -x hip %s \
|
||||||
|
// RUN: | FileCheck -check-prefix=DEV %s
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||||
|
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
|
||||||
|
// RUN: -aux-target-cpu gfx1030 -x hip %s \
|
||||||
|
// RUN: | FileCheck -check-prefix=HOST %s
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||||
|
// RUN: -fms-extensions -aux-triple amdgcn-amd-amdhsa \
|
||||||
|
// RUN: -aux-target-cpu gfx1030 -x hip %s \
|
||||||
|
// RUN: | FileCheck -check-prefix=HOST-NEG %s
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-pc-windows-msvc \
|
||||||
|
// RUN: -fms-extensions -x c++ %s \
|
||||||
|
// RUN: | FileCheck -check-prefix=CPP %s
|
||||||
|
|
||||||
|
#if __HIP__
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Check local struct 'Op' uses Itanium mangling number instead of MSVC mangling
|
||||||
|
// number in device side name mangling. It is the same in device and host
|
||||||
|
// compilation.
|
||||||
|
|
||||||
|
// DEV: define amdgpu_kernel void @_Z6kernelIZN4TestIiE3runEvE2OpEvv(
|
||||||
|
|
||||||
|
// HOST-DAG: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2OpEvv\00"
|
||||||
|
|
||||||
|
// HOST-NEG-NOT: @{{.*}} = {{.*}}c"_Z6kernelIZN4TestIiE3runEvE2Op_1Evv\00"
|
||||||
|
#if __HIP__
|
||||||
|
template<typename T>
|
||||||
|
__attribute__((global)) void kernel()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Check local struct 'Op' uses MSVC mangling number in host function name mangling.
|
||||||
|
// It is the same when compiled as HIP or C++ program.
|
||||||
|
|
||||||
|
// HOST-DAG: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
|
||||||
|
// CPP: call void @"??$fun@UOp@?2??run@?$Test@H@@QEAAXXZ@@@YAXXZ"()
|
||||||
|
template<typename T>
|
||||||
|
void fun()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class Test {
|
||||||
|
public:
|
||||||
|
void run()
|
||||||
|
{
|
||||||
|
struct Op
|
||||||
|
{
|
||||||
|
};
|
||||||
|
#if __HIP__
|
||||||
|
kernel<Op><<<1, 1>>>();
|
||||||
|
#endif
|
||||||
|
fun<Op>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Test<int> A;
|
||||||
|
A.run();
|
||||||
|
}
|
Loading…
Reference in New Issue