forked from OSchip/llvm-project
[NFC][CUDA] Refactor registering device variable
Extract registering device variable to CUDA runtime codegen function since it will be called in multiple places. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D95558
This commit is contained in:
parent
4a35941dbd
commit
0b2af1a288
|
@ -120,12 +120,8 @@ private:
|
|||
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
|
||||
std::string getDeviceSideName(const NamedDecl *ND) override;
|
||||
|
||||
public:
|
||||
CGNVCUDARuntime(CodeGenModule &CGM);
|
||||
|
||||
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
|
||||
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, bool Constant) override {
|
||||
bool Extern, bool Constant) {
|
||||
DeviceVars.push_back({&Var,
|
||||
VD,
|
||||
{DeviceVarFlags::Variable, Extern, Constant,
|
||||
|
@ -133,7 +129,7 @@ public:
|
|||
/*Normalized*/ false, 0}});
|
||||
}
|
||||
void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, int Type) override {
|
||||
bool Extern, int Type) {
|
||||
DeviceVars.push_back({&Var,
|
||||
VD,
|
||||
{DeviceVarFlags::Surface, Extern, /*Constant*/ false,
|
||||
|
@ -141,17 +137,27 @@ public:
|
|||
/*Normalized*/ false, Type}});
|
||||
}
|
||||
void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, int Type, bool Normalized) override {
|
||||
bool Extern, int Type, bool Normalized) {
|
||||
DeviceVars.push_back({&Var,
|
||||
VD,
|
||||
{DeviceVarFlags::Texture, Extern, /*Constant*/ false,
|
||||
/*Managed*/ false, Normalized, Type}});
|
||||
}
|
||||
|
||||
public:
|
||||
CGNVCUDARuntime(CodeGenModule &CGM);
|
||||
|
||||
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
|
||||
void handleVarRegistration(const VarDecl *VD,
|
||||
llvm::GlobalVariable &Var) override;
|
||||
|
||||
/// Creates module constructor function
|
||||
llvm::Function *makeModuleCtorFunction() override;
|
||||
/// Creates module destructor function
|
||||
llvm::Function *makeModuleDtorFunction() override;
|
||||
void
|
||||
internalizeDeviceSideVar(const VarDecl *D,
|
||||
llvm::GlobalValue::LinkageTypes &Linkage) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -915,3 +921,65 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
|
|||
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
|
||||
return new CGNVCUDARuntime(CGM);
|
||||
}
|
||||
|
||||
void CGNVCUDARuntime::internalizeDeviceSideVar(
|
||||
const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
|
||||
// Host-side shadows of external declarations of device-side
|
||||
// global variables become internal definitions. These have to
|
||||
// be internal in order to prevent name conflicts with global
|
||||
// host variables with the same name in a different TUs.
|
||||
//
|
||||
// __shared__ variables are odd. Shadows do get created, but
|
||||
// they are not registered with the CUDA runtime, so they
|
||||
// can't really be used to access their device-side
|
||||
// counterparts. It's not clear yet whether it's nvcc's bug or
|
||||
// a feature, but we've got to do the same for compatibility.
|
||||
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
|
||||
D->hasAttr<CUDASharedAttr>() ||
|
||||
D->getType()->isCUDADeviceBuiltinSurfaceType() ||
|
||||
D->getType()->isCUDADeviceBuiltinTextureType()) {
|
||||
Linkage = llvm::GlobalValue::InternalLinkage;
|
||||
}
|
||||
}
|
||||
|
||||
void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
|
||||
llvm::GlobalVariable &GV) {
|
||||
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
|
||||
// Shadow variables and their properties must be registered with CUDA
|
||||
// runtime. Skip Extern global variables, which will be registered in
|
||||
// the TU where they are defined.
|
||||
//
|
||||
// Don't register a C++17 inline variable. The local symbol can be
|
||||
// discarded and referencing a discarded local symbol from outside the
|
||||
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
|
||||
// TODO: Reject __device__ constexpr and __device__ inline in Sema.
|
||||
if (!D->hasExternalStorage() && !D->isInline())
|
||||
registerDeviceVar(D, GV, !D->hasDefinition(),
|
||||
D->hasAttr<CUDAConstantAttr>());
|
||||
} else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
|
||||
D->getType()->isCUDADeviceBuiltinTextureType()) {
|
||||
// Builtin surfaces and textures and their template arguments are
|
||||
// also registered with CUDA runtime.
|
||||
const ClassTemplateSpecializationDecl *TD =
|
||||
cast<ClassTemplateSpecializationDecl>(
|
||||
D->getType()->getAs<RecordType>()->getDecl());
|
||||
const TemplateArgumentList &Args = TD->getTemplateArgs();
|
||||
if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
|
||||
assert(Args.size() == 2 &&
|
||||
"Unexpected number of template arguments of CUDA device "
|
||||
"builtin surface type.");
|
||||
auto SurfType = Args[1].getAsIntegral();
|
||||
if (!D->hasExternalStorage())
|
||||
registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
|
||||
} else {
|
||||
assert(Args.size() == 3 &&
|
||||
"Unexpected number of template arguments of CUDA device "
|
||||
"builtin texture type.");
|
||||
auto TexType = Args[1].getAsIntegral();
|
||||
auto Normalized = Args[2].getAsIntegral();
|
||||
if (!D->hasExternalStorage())
|
||||
registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
|
||||
Normalized.getZExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
|
||||
namespace llvm {
|
||||
class Function;
|
||||
|
@ -80,12 +81,10 @@ public:
|
|||
|
||||
/// Emits a kernel launch stub.
|
||||
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
|
||||
virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, bool Constant) = 0;
|
||||
virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, int Type) = 0;
|
||||
virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
|
||||
bool Extern, int Type, bool Normalized) = 0;
|
||||
|
||||
/// Check whether a variable is a device variable and register it if true.
|
||||
virtual void handleVarRegistration(const VarDecl *VD,
|
||||
llvm::GlobalVariable &Var) = 0;
|
||||
|
||||
/// Constructs and returns a module initialization function or nullptr if it's
|
||||
/// not needed. Must be called after all kernels have been emitted.
|
||||
|
@ -98,6 +97,11 @@ public:
|
|||
/// Returns function or variable name on device side even if the current
|
||||
/// compilation is for host.
|
||||
virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
|
||||
|
||||
/// Adjust linkage of shadow variables in host compilation.
|
||||
virtual void
|
||||
internalizeDeviceSideVar(const VarDecl *D,
|
||||
llvm::GlobalValue::LinkageTypes &Linkage) = 0;
|
||||
};
|
||||
|
||||
/// Creates an instance of a CUDA runtime class.
|
||||
|
|
|
@ -4297,59 +4297,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
|
|||
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
|
||||
GV->setExternallyInitialized(true);
|
||||
} else {
|
||||
// Host-side shadows of external declarations of device-side
|
||||
// global variables become internal definitions. These have to
|
||||
// be internal in order to prevent name conflicts with global
|
||||
// host variables with the same name in a different TUs.
|
||||
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
|
||||
Linkage = llvm::GlobalValue::InternalLinkage;
|
||||
// Shadow variables and their properties must be registered with CUDA
|
||||
// runtime. Skip Extern global variables, which will be registered in
|
||||
// the TU where they are defined.
|
||||
//
|
||||
// Don't register a C++17 inline variable. The local symbol can be
|
||||
// discarded and referencing a discarded local symbol from outside the
|
||||
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
|
||||
// TODO: Reject __device__ constexpr and __device__ inline in Sema.
|
||||
if (!D->hasExternalStorage() && !D->isInline())
|
||||
getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
|
||||
D->hasAttr<CUDAConstantAttr>());
|
||||
} else if (D->hasAttr<CUDASharedAttr>()) {
|
||||
// __shared__ variables are odd. Shadows do get created, but
|
||||
// they are not registered with the CUDA runtime, so they
|
||||
// can't really be used to access their device-side
|
||||
// counterparts. It's not clear yet whether it's nvcc's bug or
|
||||
// a feature, but we've got to do the same for compatibility.
|
||||
Linkage = llvm::GlobalValue::InternalLinkage;
|
||||
} else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
|
||||
D->getType()->isCUDADeviceBuiltinTextureType()) {
|
||||
// Builtin surfaces and textures and their template arguments are
|
||||
// also registered with CUDA runtime.
|
||||
Linkage = llvm::GlobalValue::InternalLinkage;
|
||||
const ClassTemplateSpecializationDecl *TD =
|
||||
cast<ClassTemplateSpecializationDecl>(
|
||||
D->getType()->getAs<RecordType>()->getDecl());
|
||||
const TemplateArgumentList &Args = TD->getTemplateArgs();
|
||||
if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
|
||||
assert(Args.size() == 2 &&
|
||||
"Unexpected number of template arguments of CUDA device "
|
||||
"builtin surface type.");
|
||||
auto SurfType = Args[1].getAsIntegral();
|
||||
if (!D->hasExternalStorage())
|
||||
getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
|
||||
SurfType.getSExtValue());
|
||||
} else {
|
||||
assert(Args.size() == 3 &&
|
||||
"Unexpected number of template arguments of CUDA device "
|
||||
"builtin texture type.");
|
||||
auto TexType = Args[1].getAsIntegral();
|
||||
auto Normalized = Args[2].getAsIntegral();
|
||||
if (!D->hasExternalStorage())
|
||||
getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
|
||||
TexType.getSExtValue(),
|
||||
Normalized.getZExtValue());
|
||||
}
|
||||
}
|
||||
getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
|
||||
getCUDARuntime().handleVarRegistration(D, *GV);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue