forked from OSchip/llvm-project
[HIP] Add option -fgpu-inline-threshold
Add option -fgpu-inline-threshold for inline threshold for device compilation only. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D99233
This commit is contained in:
parent
ac8b71227c
commit
5a2d78b163
|
@ -956,6 +956,9 @@ def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">,
|
||||||
HelpText<"Default max threads per block for kernel launch bounds for HIP">,
|
HelpText<"Default max threads per block for kernel launch bounds for HIP">,
|
||||||
MarshallingInfoInt<LangOpts<"GPUMaxThreadsPerBlock">, "1024">,
|
MarshallingInfoInt<LangOpts<"GPUMaxThreadsPerBlock">, "1024">,
|
||||||
ShouldParseIf<hip.KeyPath>;
|
ShouldParseIf<hip.KeyPath>;
|
||||||
|
def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
|
||||||
|
Flags<[HelpHidden]>,
|
||||||
|
HelpText<"Inline threshold for device compilation for CUDA/HIP">;
|
||||||
def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">,
|
def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">,
|
||||||
HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing "
|
HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing "
|
||||||
"__cyg_profile_func_enter and __cyg_profile_func_exit">;
|
"__cyg_profile_func_enter and __cyg_profile_func_exit">;
|
||||||
|
|
|
@ -6473,6 +6473,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||||
if (IsHIP)
|
if (IsHIP)
|
||||||
CmdArgs.push_back("-fcuda-allow-variadic-functions");
|
CmdArgs.push_back("-fcuda-allow-variadic-functions");
|
||||||
|
|
||||||
|
if (IsCudaDevice || IsHIPDevice) {
|
||||||
|
StringRef InlineThresh =
|
||||||
|
Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
|
||||||
|
if (!InlineThresh.empty()) {
|
||||||
|
std::string ArgStr =
|
||||||
|
std::string("-inline-threshold=") + InlineThresh.str();
|
||||||
|
CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
|
// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
|
||||||
// to specify the result of the compile phase on the host, so the meaningful
|
// to specify the result of the compile phase on the host, so the meaningful
|
||||||
// device declarations can be identified. Also, -fopenmp-is-device is passed
|
// device declarations can be identified. Also, -fopenmp-is-device is passed
|
||||||
|
|
|
@ -51,3 +51,8 @@
|
||||||
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s
|
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s
|
||||||
// CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
|
// CTA: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
|
||||||
// CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
|
// CTA-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
|
||||||
|
|
||||||
|
// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
|
||||||
|
// RUN: --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
|
||||||
|
// THRESH: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
|
||||||
|
// THRESH-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
|
||||||
|
|
Loading…
Reference in New Issue