forked from OSchip/llvm-project
[CUDA] Do not generate unnecessary runtime init code.
Differential Revision: http://reviews.llvm.org/D17780 llvm-svn: 262499
This commit is contained in:
parent
42e1949b46
commit
8c1ec1ef38
|
@ -178,6 +178,10 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
|
|||
/// }
|
||||
/// \endcode
|
||||
llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
||||
// No need to register anything
|
||||
if (EmittedKernels.empty() && DeviceVars.empty())
|
||||
return nullptr;
|
||||
|
||||
llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
|
||||
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
|
||||
llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
|
||||
|
@ -251,6 +255,10 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
|
|||
/// }
|
||||
/// \endcode
|
||||
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
||||
// No need to generate ctors/dtors if there are no GPU binaries.
|
||||
if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
|
||||
return nullptr;
|
||||
|
||||
// void __cuda_register_globals(void* handle);
|
||||
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
|
||||
// void ** __cudaRegisterFatBinary(void *);
|
||||
|
@ -309,7 +317,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
|||
CGM.getPointerAlign());
|
||||
|
||||
// Call __cuda_register_globals(GpuBinaryHandle);
|
||||
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
|
||||
if (RegisterGlobalsFunc)
|
||||
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
|
||||
|
||||
// Save GpuBinaryHandle so we can unregister it in destructor.
|
||||
GpuBinaryHandles.push_back(GpuBinaryHandle);
|
||||
|
@ -329,6 +338,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
|
|||
/// }
|
||||
/// \endcode
|
||||
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
|
||||
// No need for destructor if we don't have handles to unregister.
|
||||
if (GpuBinaryHandles.empty())
|
||||
return nullptr;
|
||||
|
||||
// void __cudaUnregisterFatBinary(void ** handle);
|
||||
llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
|
||||
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
|
||||
|
|
|
@ -1,7 +1,11 @@
|
|||
// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - -DNOGLOBALS \
|
||||
// RUN: | FileCheck %s -check-prefix=NOGLOBALS
|
||||
// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
#ifndef NOGLOBALS
|
||||
// CHECK-DAG: @device_var = internal global i32
|
||||
__device__ int device_var;
|
||||
|
||||
|
@ -65,6 +69,7 @@ __global__ void kernelfunc(int i, int j, int k) {}
|
|||
// CHECK: call{{.*}}cudaConfigureCall
|
||||
// CHECK: call{{.*}}kernelfunc
|
||||
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
|
||||
#endif
|
||||
|
||||
// Test that we've built a function to register kernels and global vars.
|
||||
// CHECK: define internal void @__cuda_register_globals
|
||||
|
@ -89,3 +94,18 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
|
|||
// CHECK: load{{.*}}__cuda_gpubin_handle
|
||||
// CHECK-NEXT: call void @__cudaUnregisterFatBinary
|
||||
|
||||
// There should be no __cuda_register_globals if we have no
|
||||
// device-side globals, but we still need to register GPU binary.
|
||||
// Skip GPU binary string first.
|
||||
// NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
|
||||
// NOGLOBALS-NOT: define internal void @__cuda_register_globals
|
||||
// NOGLOBALS: define internal void @__cuda_module_ctor
|
||||
// NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
|
||||
// NOGLOBALS-NOT: call void @__cuda_register_globals
|
||||
// NOGLOBALS: define internal void @__cuda_module_dtor
|
||||
// NOGLOBALS: call void @__cudaUnregisterFatBinary
|
||||
|
||||
// There should be no constructors/destructors if we have no GPU binary.
|
||||
// NOGPUBIN-NOT: define internal void @__cuda_register_globals
|
||||
// NOGPUBIN-NOT: define internal void @__cuda_module_ctor
|
||||
// NOGPUBIN-NOT: define internal void @__cuda_module_dtor
|
||||
|
|
Loading…
Reference in New Issue