forked from OSchip/llvm-project
[HIP] Add option -fgpu-allow-device-init
Add this option to allow device side class type global variables with non-trivial ctor/dtor. device side init/fini functions will be emitted, which will be executed by HIP runtime when the fat binary is loaded/unloaded. This feature is to facilitate implementation of device side sanitizer which requires global vars with non-trival ctors. By default this option is disabled. Differential Revision: https://reviews.llvm.org/D69268
This commit is contained in:
parent
4334892e7b
commit
68f5ca4e19
|
@ -304,6 +304,11 @@ def err_arcmt_nsinvocation_ownership : Error<"NSInvocation's %0 is not safe to b
|
|||
def err_openclcxx_not_supported : Error<
|
||||
"'%0' is not supported in C++ for OpenCL">;
|
||||
|
||||
// HIP
|
||||
def warn_ignore_hip_only_option : Warning<
|
||||
"'%0' is ignored since it is only supported for HIP">,
|
||||
InGroup<HIPOnly>;
|
||||
|
||||
// OpenMP
|
||||
def err_omp_more_one_clause : Error<
|
||||
"directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">;
|
||||
|
|
|
@ -1077,6 +1077,10 @@ def SerializedDiagnostics : DiagGroup<"serialized-diagnostics">;
|
|||
// compiling CUDA C/C++ but which is not compatible with the CUDA spec.
|
||||
def CudaCompat : DiagGroup<"cuda-compat">;
|
||||
|
||||
// A warning group for warnings about features supported by HIP but
|
||||
// ignored by CUDA.
|
||||
def HIPOnly : DiagGroup<"hip-only">;
|
||||
|
||||
// Warnings which cause linking of the runtime libraries like
|
||||
// libc and the CRT to be skipped.
|
||||
def AVRRtlibLinkingQuirks : DiagGroup<"avr-rtlib-linking-quirks">;
|
||||
|
|
|
@ -224,6 +224,7 @@ LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA d
|
|||
LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
|
||||
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
|
||||
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
|
||||
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
|
||||
|
||||
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
|
||||
|
||||
|
|
|
@ -602,6 +602,9 @@ def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-scri
|
|||
def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
|
||||
Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
|
||||
def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
|
||||
def fgpu_allow_device_init : Flag<["-"], "fgpu-allow-device-init">,
|
||||
Flags<[CC1Option]>, HelpText<"Allow device side init function in HIP">;
|
||||
def fno_gpu_allow_device_init : Flag<["-"], "fno-gpu-allow-device-init">;
|
||||
def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
|
||||
HelpText<"Path to libomptarget-nvptx libraries">;
|
||||
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
|
||||
|
|
|
@ -437,7 +437,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
|
|||
// that are of class type, cannot have a non-empty constructor. All
|
||||
// the checks have been done in Sema by now. Whatever initializers
|
||||
// are allowed are empty and we just need to ignore them here.
|
||||
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
|
||||
if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit &&
|
||||
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
|
||||
D->hasAttr<CUDASharedAttr>()))
|
||||
return;
|
||||
|
@ -608,6 +608,11 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
|
|||
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
|
||||
}
|
||||
|
||||
if (getLangOpts().HIP) {
|
||||
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
|
||||
Fn->addFnAttr("device-init");
|
||||
}
|
||||
|
||||
CXXGlobalInits.clear();
|
||||
}
|
||||
|
||||
|
|
|
@ -292,6 +292,10 @@ void HIPToolChain::addClangTargetOptions(
|
|||
false))
|
||||
CC1Args.push_back("-fgpu-rdc");
|
||||
|
||||
if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init,
|
||||
options::OPT_fno_gpu_allow_device_init, false))
|
||||
CC1Args.push_back("-fgpu-allow-device-init");
|
||||
|
||||
// Default to "hidden" visibility, as object level linking will not be
|
||||
// supported for the foreseeable future.
|
||||
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
|
||||
|
|
|
@ -2528,6 +2528,13 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
|||
Opts.CUDADeviceApproxTranscendentals = 1;
|
||||
|
||||
Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);
|
||||
if (Args.hasArg(OPT_fgpu_allow_device_init)) {
|
||||
if (Opts.HIP)
|
||||
Opts.GPUAllowDeviceInit = 1;
|
||||
else
|
||||
Diags.Report(diag::warn_ignore_hip_only_option)
|
||||
<< Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args);
|
||||
}
|
||||
Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
|
||||
|
||||
if (Opts.ObjC) {
|
||||
|
|
|
@ -492,6 +492,8 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
|
|||
const Expr *Init = VD->getInit();
|
||||
if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
|
||||
VD->hasAttr<CUDASharedAttr>()) {
|
||||
if (LangOpts.GPUAllowDeviceInit)
|
||||
return;
|
||||
assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
|
||||
bool AllowedInit = false;
|
||||
if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
|
||||
// RUN: -fgpu-allow-device-init -x hip \
|
||||
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
|
||||
// RUN: | FileCheck %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]]
|
||||
// CHECK: attributes #[[ATTR]] = {{.*}}"device-init"
|
||||
|
||||
__device__ void f();
|
||||
|
||||
struct A {
|
||||
__device__ A() { f(); }
|
||||
};
|
||||
|
||||
__device__ A a;
|
|
@ -0,0 +1,8 @@
|
|||
// REQUIRES: nvptx-registered-target
|
||||
|
||||
// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
|
||||
// RUN: -fgpu-allow-device-init \
|
||||
// RUN: %s 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: warning: '-fgpu-allow-device-init' is ignored since it is only supported for HIP
|
||||
|
Loading…
Reference in New Issue