[NFC][CUDA] Refactor registering device variable

Extract registering device variable to CUDA runtime codegen function since it
will be called in multiple places.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D95558
This commit is contained in:
Yaxun (Sam) Liu 2021-01-26 17:11:01 -05:00
parent 4a35941dbd
commit 0b2af1a288
3 changed files with 87 additions and 66 deletions

View File

@ -120,12 +120,8 @@ private:
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
std::string getDeviceSideName(const NamedDecl *ND) override;
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, bool Constant) override {
bool Extern, bool Constant) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Variable, Extern, Constant,
@ -133,7 +129,7 @@ public:
/*Normalized*/ false, 0}});
}
void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, int Type) override {
bool Extern, int Type) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Surface, Extern, /*Constant*/ false,
@ -141,17 +137,27 @@ public:
/*Normalized*/ false, Type}});
}
void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, int Type, bool Normalized) override {
bool Extern, int Type, bool Normalized) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Texture, Extern, /*Constant*/ false,
/*Managed*/ false, Normalized, Type}});
}
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void handleVarRegistration(const VarDecl *VD,
llvm::GlobalVariable &Var) override;
/// Creates module constructor function
llvm::Function *makeModuleCtorFunction() override;
/// Creates module destructor function
llvm::Function *makeModuleDtorFunction() override;
void
internalizeDeviceSideVar(const VarDecl *D,
llvm::GlobalValue::LinkageTypes &Linkage) override;
};
}
@ -915,3 +921,65 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}
void CGNVCUDARuntime::internalizeDeviceSideVar(
const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
// Host-side shadows of external declarations of device-side
// global variables become internal definitions. These have to
// be internal in order to prevent name conflicts with global
// host variables with the same name in a different TUs.
//
// __shared__ variables are odd. Shadows do get created, but
// they are not registered with the CUDA runtime, so they
// can't really be used to access their device-side
// counterparts. It's not clear yet whether it's nvcc's bug or
// a feature, but we've got to do the same for compatibility.
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
D->hasAttr<CUDASharedAttr>() ||
D->getType()->isCUDADeviceBuiltinSurfaceType() ||
D->getType()->isCUDADeviceBuiltinTextureType()) {
Linkage = llvm::GlobalValue::InternalLinkage;
}
}
void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
llvm::GlobalVariable &GV) {
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
// Shadow variables and their properties must be registered with CUDA
// runtime. Skip Extern global variables, which will be registered in
// the TU where they are defined.
//
// Don't register a C++17 inline variable. The local symbol can be
// discarded and referencing a discarded local symbol from outside the
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
// TODO: Reject __device__ constexpr and __device__ inline in Sema.
if (!D->hasExternalStorage() && !D->isInline())
registerDeviceVar(D, GV, !D->hasDefinition(),
D->hasAttr<CUDAConstantAttr>());
} else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
D->getType()->isCUDADeviceBuiltinTextureType()) {
// Builtin surfaces and textures and their template arguments are
// also registered with CUDA runtime.
const ClassTemplateSpecializationDecl *TD =
cast<ClassTemplateSpecializationDecl>(
D->getType()->getAs<RecordType>()->getDecl());
const TemplateArgumentList &Args = TD->getTemplateArgs();
if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
assert(Args.size() == 2 &&
"Unexpected number of template arguments of CUDA device "
"builtin surface type.");
auto SurfType = Args[1].getAsIntegral();
if (!D->hasExternalStorage())
registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
} else {
assert(Args.size() == 3 &&
"Unexpected number of template arguments of CUDA device "
"builtin texture type.");
auto TexType = Args[1].getAsIntegral();
auto Normalized = Args[2].getAsIntegral();
if (!D->hasExternalStorage())
registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
Normalized.getZExtValue());
}
}
}

View File

@ -16,6 +16,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/GlobalValue.h"
namespace llvm {
class Function;
@ -80,12 +81,10 @@ public:
/// Emits a kernel launch stub.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, bool Constant) = 0;
virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, int Type) = 0;
virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
bool Extern, int Type, bool Normalized) = 0;
/// Check whether a variable is a device variable and register it if true.
virtual void handleVarRegistration(const VarDecl *VD,
llvm::GlobalVariable &Var) = 0;
/// Constructs and returns a module initialization function or nullptr if it's
/// not needed. Must be called after all kernels have been emitted.
@ -98,6 +97,11 @@ public:
/// Returns function or variable name on device side even if the current
/// compilation is for host.
virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
/// Adjust linkage of shadow variables in host compilation.
virtual void
internalizeDeviceSideVar(const VarDecl *D,
llvm::GlobalValue::LinkageTypes &Linkage) = 0;
};
/// Creates an instance of a CUDA runtime class.

View File

@ -4297,59 +4297,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
GV->setExternallyInitialized(true);
} else {
// Host-side shadows of external declarations of device-side
// global variables become internal definitions. These have to
// be internal in order to prevent name conflicts with global
// host variables with the same name in a different TUs.
if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
Linkage = llvm::GlobalValue::InternalLinkage;
// Shadow variables and their properties must be registered with CUDA
// runtime. Skip Extern global variables, which will be registered in
// the TU where they are defined.
//
// Don't register a C++17 inline variable. The local symbol can be
// discarded and referencing a discarded local symbol from outside the
// comdat (__cuda_register_globals) is disallowed by the ELF spec.
// TODO: Reject __device__ constexpr and __device__ inline in Sema.
if (!D->hasExternalStorage() && !D->isInline())
getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
D->hasAttr<CUDAConstantAttr>());
} else if (D->hasAttr<CUDASharedAttr>()) {
// __shared__ variables are odd. Shadows do get created, but
// they are not registered with the CUDA runtime, so they
// can't really be used to access their device-side
// counterparts. It's not clear yet whether it's nvcc's bug or
// a feature, but we've got to do the same for compatibility.
Linkage = llvm::GlobalValue::InternalLinkage;
} else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
D->getType()->isCUDADeviceBuiltinTextureType()) {
// Builtin surfaces and textures and their template arguments are
// also registered with CUDA runtime.
Linkage = llvm::GlobalValue::InternalLinkage;
const ClassTemplateSpecializationDecl *TD =
cast<ClassTemplateSpecializationDecl>(
D->getType()->getAs<RecordType>()->getDecl());
const TemplateArgumentList &Args = TD->getTemplateArgs();
if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
assert(Args.size() == 2 &&
"Unexpected number of template arguments of CUDA device "
"builtin surface type.");
auto SurfType = Args[1].getAsIntegral();
if (!D->hasExternalStorage())
getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
SurfType.getSExtValue());
} else {
assert(Args.size() == 3 &&
"Unexpected number of template arguments of CUDA device "
"builtin texture type.");
auto TexType = Args[1].getAsIntegral();
auto Normalized = Args[2].getAsIntegral();
if (!D->hasExternalStorage())
getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
TexType.getSExtValue(),
Normalized.getZExtValue());
}
}
getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
getCUDARuntime().handleVarRegistration(D, *GV);
}
}