forked from OSchip/llvm-project
[hip] Fix device-only relocatable code compilation.
Summary: - In HIP, just as the regular device-only compilation, the device-only relocatable code compilation should not involve offload bundle. - In addition, that device-only relocatable code compilation should have the similar 3 steps, namely preprocessor, compile, and backend, to the regular code generation with `-emit-llvm`. Reviewers: yaxunl, tra Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D81427
This commit is contained in:
parent
4022bc2a6c
commit
8b6821a584
|
@ -2705,9 +2705,7 @@ class OffloadingActionBuilder final {
|
|||
// backend and assemble phases to output LLVM IR. Except for generating
|
||||
// non-relocatable device coee, where we generate fat binary for device
|
||||
// code and pass to host in Backend phase.
|
||||
if (CudaDeviceActions.empty() ||
|
||||
(CurPhase == phases::Backend && Relocatable) ||
|
||||
CurPhase == phases::Assemble)
|
||||
if (CudaDeviceActions.empty())
|
||||
return ABRT_Success;
|
||||
|
||||
assert(((CurPhase == phases::Link && Relocatable) ||
|
||||
|
@ -2781,9 +2779,11 @@ class OffloadingActionBuilder final {
|
|||
}
|
||||
|
||||
// By default, we produce an action for each device arch.
|
||||
for (Action *&A : CudaDeviceActions)
|
||||
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
|
||||
AssociatedOffloadKind);
|
||||
if (!Relocatable || CurPhase <= phases::Backend) {
|
||||
for (Action *&A : CudaDeviceActions)
|
||||
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
|
||||
AssociatedOffloadKind);
|
||||
}
|
||||
|
||||
return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
|
||||
: ABRT_Success;
|
||||
|
@ -3668,7 +3668,10 @@ Action *Driver::ConstructPhaseAction(
|
|||
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
|
||||
return C.MakeAction<BackendJobAction>(Input, Output);
|
||||
}
|
||||
if (Args.hasArg(options::OPT_emit_llvm)) {
|
||||
if (Args.hasArg(options::OPT_emit_llvm) ||
|
||||
(TargetDeviceOffloadKind == Action::OFK_HIP &&
|
||||
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
|
||||
false))) {
|
||||
types::ID Output =
|
||||
Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
|
||||
return C.MakeAction<BackendJobAction>(Input, Output);
|
||||
|
@ -4588,8 +4591,19 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
|
|||
// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
|
||||
// the unoptimized bitcode so that it does not get overwritten by the ".bc"
|
||||
// optimized bitcode output.
|
||||
if (!AtTopLevel && C.getArgs().hasArg(options::OPT_emit_llvm) &&
|
||||
JA.getType() == types::TY_LLVM_BC)
|
||||
auto IsHIPRDCInCompilePhase = [](const JobAction &JA,
|
||||
const llvm::opt::DerivedArgList &Args) {
|
||||
// The relocatable compilation in HIP implies -emit-llvm. Similarly, use a
|
||||
// ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile
|
||||
// phase.)
|
||||
return isa<CompileJobAction>(JA) &&
|
||||
JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
|
||||
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
|
||||
false);
|
||||
};
|
||||
if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC &&
|
||||
(C.getArgs().hasArg(options::OPT_emit_llvm) ||
|
||||
IsHIPRDCInCompilePhase(JA, C.getArgs())))
|
||||
Suffixed += ".tmp";
|
||||
Suffixed += '.';
|
||||
Suffixed += Suffix;
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
// REQUIRES: clang-driver
|
||||
// REQUIRES: x86-registered-target
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
|
||||
|
||||
// With `-emit-llvm`, the output should be the same as the aforementioned line
|
||||
// as `-fgpu-rdc` in HIP implies `-emit-llvm`.
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -c -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITBC %s
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
|
||||
|
||||
// With `-emit-llvm`, the output should be the same as the aforementioned line
|
||||
// as `-fgpu-rdc` in HIP implies `-emit-llvm`.
|
||||
|
||||
// RUN: %clang -### -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -S -emit-llvm -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,EMITLL %s
|
||||
|
||||
// With `-save-temps`, commane lines for each steps are dumped. For assembly
|
||||
// output, there should 3 steps (preprocessor, compile, and backend) per source
|
||||
// and per target, totally 12 steps.
|
||||
|
||||
// RUN: %clang -### -save-temps -target x86_64-linux-gnu \
|
||||
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
|
||||
// RUN: -S -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck -check-prefix=SAVETEMP %s
|
||||
|
||||
// COMMON: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// EMITBC-SAME: "-emit-llvm-bc"
|
||||
// EMITLL-SAME: "-emit-llvm"
|
||||
// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
|
||||
// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
|
||||
// COMMON-SAME: "-fapply-global-visibility-to-externs"
|
||||
// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip"
|
||||
// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip"
|
||||
// CHECK-SAME: {{.*}} {{".*a.cu"}}
|
||||
|
||||
// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// EMITBC-SAME: "-emit-llvm-bc"
|
||||
// EMITLL-SAME: "-emit-llvm"
|
||||
// COMMON-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
|
||||
// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
|
||||
// COMMON-SAME: "-fapply-global-visibility-to-externs"
|
||||
// EMITBC-SAME: {{.*}} "-o" {{"a.*bc"}} "-x" "hip"
|
||||
// EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip"
|
||||
// COMMON-SAME: {{.*}} {{".*a.cu"}}
|
||||
|
||||
// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// EMITBC-SAME: "-emit-llvm-bc"
|
||||
// EMITLL-SAME: "-emit-llvm"
|
||||
// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
|
||||
// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
|
||||
// COMMON-SAME: "-fapply-global-visibility-to-externs"
|
||||
// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip"
|
||||
// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip"
|
||||
// COMMON-SAME: {{.*}} {{".*b.hip"}}
|
||||
|
||||
// COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
|
||||
// COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// EMITBC-SAME: "-emit-llvm-bc"
|
||||
// EMITLL-SAME: "-emit-llvm"
|
||||
// COMMON-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
|
||||
// COMMON-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
|
||||
// COMMON-SAME: "-fapply-global-visibility-to-externs"
|
||||
// EMITBC-SAME: {{.*}} "-o" {{"b.*bc"}} "-x" "hip"
|
||||
// EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip"
|
||||
// COMMON-SAME: {{.*}} {{".*b.hip"}}
|
||||
|
||||
// SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-E"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}}
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm-bc"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX803_CUI]]
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX803_TMP_BC]]
|
||||
|
||||
// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-E"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}}
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm-bc"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX900_CUI]]
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]]
|
||||
|
||||
// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-E"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}}
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm-bc"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX803_CUI]]
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX803_TMP_BC]]
|
||||
|
||||
// SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-E"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}}
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm-bc"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX900_CUI]]
|
||||
// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"
|
||||
// SAVETEMP-SAME: "-emit-llvm"
|
||||
// SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
|
||||
// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]]
|
Loading…
Reference in New Issue