[OpenMP] Add options to only compile the host or device when offloading

OpenMP recently moved to the new offloading driver, this had the effect
of making it more difficult to inspect intermediate code for the device.
This patch adds `-foffload-host-only` and `-foffload-device-only` to
control which sides get compiled. This will allow users to more easily
inspect output without needing the temp files.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D124220
This commit is contained in:
Joseph Huber 2022-04-21 20:13:33 -04:00
parent b3826192fb
commit 47d6625570
4 changed files with 80 additions and 31 deletions

View File

@ -906,14 +906,6 @@ def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group<f_Group>
def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
"'-aux-target-cpu' and '-aux-target-feature'.">;
def cuda_device_only : Flag<["--"], "cuda-device-only">,
HelpText<"Compile CUDA code for device only">;
def cuda_host_only : Flag<["--"], "cuda-host-only">,
HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA "
"compilations.">;
def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">,
HelpText<"Compile CUDA code for both host and device (default). Has no "
"effect on non-CUDA compilations.">;
def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOption]>,
HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
@ -2538,6 +2530,19 @@ def offload_new_driver : Flag<["--"], "offload-new-driver">, Flags<[CC1Option]>,
HelpText<"Use the new driver for offloading compilation.">;
def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Don't Use the new driver for offloading compilation.">;
def offload_device_only : Flag<["--"], "offload-device-only">,
HelpText<"Only compile for the offloading device.">;
def offload_host_only : Flag<["--"], "offload-host-only">,
HelpText<"Only compile for the offloading host.">;
def offload_host_device : Flag<["--"], "offload-host-device">,
HelpText<"Only compile for the offloading host.">;
def cuda_device_only : Flag<["--"], "cuda-device-only">, Alias<offload_device_only>,
HelpText<"Compile CUDA code for device only">;
def cuda_host_only : Flag<["--"], "cuda-host-only">, Alias<offload_host_only>,
HelpText<"Compile CUDA code for host only. Has no effect on non-CUDA compilations.">;
def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, Alias<offload_host_device>,
HelpText<"Compile CUDA code for both host and device (default). Has no "
"effect on non-CUDA compilations.">;
def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Use the new driver for OpenMP offloading.">;
def fno_openmp_new_driver : Flag<["-"], "fno-openmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,

View File

@ -2868,14 +2868,14 @@ class OffloadingActionBuilder final {
: C.getSingleOffloadToolChain<Action::OFK_HIP>());
Arg *PartialCompilationArg = Args.getLastArg(
options::OPT_cuda_host_only, options::OPT_cuda_device_only,
options::OPT_cuda_compile_host_device);
CompileHostOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_host_only);
CompileDeviceOnly = PartialCompilationArg &&
PartialCompilationArg->getOption().matches(
options::OPT_cuda_device_only);
options::OPT_offload_host_only, options::OPT_offload_device_only,
options::OPT_offload_host_device);
CompileHostOnly =
PartialCompilationArg && PartialCompilationArg->getOption().matches(
options::OPT_offload_host_only);
CompileDeviceOnly =
PartialCompilationArg && PartialCompilationArg->getOption().matches(
options::OPT_offload_device_only);
EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
EmitAsm = Args.getLastArg(options::OPT_S);
FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
@ -4055,11 +4055,6 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
break;
}
// Try to build the offloading actions and add the result as a dependency
// to the host.
if (UseNewOffloadingDriver)
Current = BuildOffloadingActions(C, Args, I, Current);
// FIXME: Should we include any prior module file outputs as inputs of
// later actions in the same command line?
@ -4083,6 +4078,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
break;
// Try to build the offloading actions and add the result as a dependency
// to the host.
if (UseNewOffloadingDriver)
Current = BuildOffloadingActions(C, Args, I, Current);
if (Current->getType() == types::TY_Nothing)
break;
}
@ -4204,10 +4204,10 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Claim ignored clang-cl options.
Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
// to non-CUDA compilations and should not trigger warnings there.
Args.ClaimAllArgs(options::OPT_cuda_host_only);
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
// Claim --offload-host-only and --offload-compile-host-device, which may be
// passed to non-CUDA compilations and should not trigger warnings there.
Args.ClaimAllArgs(options::OPT_offload_host_only);
Args.ClaimAllArgs(options::OPT_offload_host_device);
}
/// Returns the canonical name for the offloading architecture when using HIP or
@ -4309,14 +4309,22 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
llvm::opt::DerivedArgList &Args,
const InputTy &Input,
Action *HostAction) const {
if (!isa<CompileJobAction>(HostAction))
const Arg *Mode = Args.getLastArg(options::OPT_offload_host_only,
options::OPT_offload_device_only,
options::OPT_offload_host_device);
const bool HostOnly =
Mode && Mode->getOption().matches(options::OPT_offload_host_only);
const bool DeviceOnly =
Mode && Mode->getOption().matches(options::OPT_offload_device_only);
// Don't build offloading actions if explicitly disabled or we do not have a
// compile action to embed it in. If preprocessing only ignore embedding.
if (HostOnly || !(isa<CompileJobAction>(HostAction) ||
getFinalPhase(Args) == phases::Preprocess))
return HostAction;
OffloadAction::DeviceDependences DDeps;
types::ID InputType = Input.first;
const Arg *InputArg = Input.second;
const Action::OffloadKind OffloadKinds[] = {
Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
@ -4331,6 +4339,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
if (ToolChains.empty())
continue;
types::ID InputType = Input.first;
const Arg *InputArg = Input.second;
// Get the product of all bound architectures and toolchains.
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
for (const ToolChain *TC : ToolChains)
@ -4355,7 +4366,8 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
for (Action *&A : DeviceActions) {
A = ConstructPhaseAction(C, Args, Phase, A, Kind);
if (isa<CompileJobAction>(A) && Kind == Action::OFK_OpenMP) {
if (isa<CompileJobAction>(A) && isa<CompileJobAction>(HostAction) &&
Kind == Action::OFK_OpenMP) {
// OpenMP offloading has a dependency on the host compile action to
// identify which declarations need to be emitted. This shouldn't be
// collapsed with any other actions so we can use it in the device.
@ -4389,6 +4401,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
}
}
if (DeviceOnly)
return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing);
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, DDeps);

View File

@ -16,3 +16,18 @@
// RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
// RDC: error: Using '--offload-new-driver' requires '-fgpu-rdc'
// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
// RUN: | FileCheck -check-prefix BINDINGS-HOST %s
// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[OUTPUT:.+]]"
// BINDINGS-HOST: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
// RUN: | FileCheck -check-prefix BINDINGS-DEVICE %s
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]"
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]"
// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin"

View File

@ -3,7 +3,6 @@
///
// REQUIRES: x86-registered-target
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
// REQUIRES: amdgpu-registered-target
@ -50,3 +49,18 @@
// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s
// DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp,nvptx64-nvidia-cuda,sm_70
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --offload-device-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY
// CHECK-DEVICE-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.*]]"
// CHECK-DEVICE-ONLY: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "{{.*}}-openmp-nvptx64-nvidia-cuda.o"
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --offload-device-only -E -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEVICE-ONLY-PP
// CHECK-DEVICE-ONLY-PP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.*]]"], output: "-"