[HIPSPV][3/4] Enable SPIR-V emission for HIP

This patch enables SPIR-V binary emission for HIP device code via the HIPSPV tool chain. ‘--offload’ option, which is envisioned in [1], is added for specifying offload targets. This option is used to override default device target (amdgcn-amd-amdhsa) for HIP compilation for emitting device code as SPIR-V binary. The option is handled in getHIPOffloadTargetTriple(). getOffloadingDeviceToolChain() function (based on the design in the SYCL repository) is added to select HIPSPVToolChain when HIP offload target is ‘spirv64’. The HIPActionBuilder is modified to produce LLVM IR at the backend phase. HIPSPV tool chain expects to receive HIP device code as LLVM IR so it can run external LLVM passes over them. HIPSPV TC is also responsible for emitting the SPIR-V binary. A Cuda GPU architecture ‘generic’ is added. The name is picked from the LLVM SPIR-V Backend. In the HIPSPV code path the architecture name is inserted to the bundle entry ID as target ID. Target ID is expected to be always present so a component in the target triple is not mistaken as target ID. Tests are added for checking the HIPSPV tool chain. [1]: https://lists.llvm.org/pipermail/cfe-dev/2020-December/067362.html Patch by: Henry Linjamäki Reviewed by: Yaxun Liu, Artem Belevich, Alexey Bader Differential Revision: https://reviews.llvm.org/D110622
2021-12-17 11:01:51 -05:00 · 2021-12-17 11:01:51 -05:00 · a6786cdd57
parent 9cd55c7c34
commit a6786cdd57
21 changed files with 328 additions and 21 deletions
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@ -95,6 +95,8 @@ enum class CudaArch {
  GFX1033,
  GFX1034,
  GFX1035,
+  Generic, // A processor model named 'generic' if the target backend defines a
+           // public one.
  LAST,
 };

@ -103,7 +105,8 @@ static inline bool IsNVIDIAGpuArch(CudaArch A) {
 }

 static inline bool IsAMDGpuArch(CudaArch A) {
-  return A >= CudaArch::GFX600 && A < CudaArch::LAST;
+  // Generic processor model is for testing only.
+  return A >= CudaArch::GFX600 && A < CudaArch::Generic;
 }

 const char *CudaArchToString(CudaArch A);
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@ -621,4 +621,9 @@ def err_cc1_unbounded_vscale_min : Error<

 def err_drv_ssp_missing_offset_argument : Error<
  "'%0' is used without '-mstack-protector-guard-offset', and there is no default">;
+
+def err_drv_only_one_offload_target_supported_in : Error<
+  "Only one offload target is supported in %0.">;
+def err_drv_invalid_or_unsupported_offload_target : Error<
+  "Invalid or unsupported offload target: '%0'.">;
 }
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@ -595,6 +595,21 @@ private:

  /// @}

+  /// Retrieves a ToolChain for a particular device \p Target triple
+  ///
+  /// \param[in] HostTC is the host ToolChain paired with the device
+  ///
+  /// \param[in] Action (e.g. OFK_Cuda/OFK_OpenMP/OFK_SYCL) is an Offloading
+  /// action that is optionally passed to a ToolChain (used by CUDA, to specify
+  /// if it's used in conjunction with OpenMP)
+  ///
+  /// Will cache ToolChains for the life of the driver object, and create them
+  /// on-demand.
+  const ToolChain &getOffloadingDeviceToolChain(
+      const llvm::opt::ArgList &Args, const llvm::Triple &Target,
+      const ToolChain &HostTC,
+      const Action::OffloadKind &TargetDeviceOffloadKind) const;
+
  /// Get bitmasks for which option flags to include and exclude based on
  /// the driver mode.
  std::pair<unsigned, unsigned> getIncludeExcludeOptionFlagMasks(bool IsClCompatMode) const;
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@ -1135,6 +1135,13 @@ defm autolink : BoolFOption<"autolink",
  NegFlag<SetFalse, [CC1Option], "Disable generation of linker directives for automatic library linking">,
  PosFlag<SetTrue>>;

+// In the future this option will be supported by other offloading
+// languages and accept other values such as CPU/GPU architectures,
+// offload kinds and target aliases.
+def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>,
+  HelpText<"Specify comma-separated list of offloading target triples"
+           " (HIP only)">;
+
 // C++ Coroutines TS
 defm coroutines_ts : BoolFOption<"coroutines-ts",
  LangOpts<"Coroutines">, Default<cpp20.KeyPath>,
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@ -123,6 +123,7 @@ static const CudaArchToStringMap arch_names[] = {
    GFX(1033), // gfx1033
    GFX(1034), // gfx1034
    GFX(1035), // gfx1035
+    {CudaArch::Generic, "generic", ""},
    // clang-format on
 };
 #undef SM
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@ -215,6 +215,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
      case CudaArch::GFX1033:
      case CudaArch::GFX1034:
      case CudaArch::GFX1035:
+      case CudaArch::Generic:
      case CudaArch::LAST:
        break;
      case CudaArch::UNUSED:
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@ -121,7 +121,7 @@ public:

  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
    for (int i = static_cast<int>(CudaArch::SM_20);
-         i < static_cast<int>(CudaArch::LAST); ++i)
+         i < static_cast<int>(CudaArch::Generic); ++i)
      Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));
  }

--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@ -3903,6 +3903,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
      case CudaArch::GFX1033:
      case CudaArch::GFX1034:
      case CudaArch::GFX1035:
+      case CudaArch::Generic:
      case CudaArch::UNUSED:
      case CudaArch::UNKNOWN:
        break;
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@ -24,6 +24,7 @@
 #include "ToolChains/Fuchsia.h"
 #include "ToolChains/Gnu.h"
 #include "ToolChains/HIPAMD.h"
+#include "ToolChains/HIPSPV.h"
 #include "ToolChains/Haiku.h"
 #include "ToolChains/Hexagon.h"
 #include "ToolChains/Hurd.h"
@ -99,8 +100,39 @@ using namespace clang::driver;
 using namespace clang;
 using namespace llvm::opt;

-static llvm::Triple getHIPOffloadTargetTriple() {
-  static const llvm::Triple T("amdgcn-amd-amdhsa");
+static llvm::Optional<llvm::Triple>
+getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
+  if (Args.hasArg(options::OPT_offload_EQ)) {
+    auto HIPOffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ);
+
+    // HIP compilation flow does not support multiple targets for now. We need
+    // the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) to
+    // support multiple tool chains first.
+    switch (HIPOffloadTargets.size()) {
+    default:
+      D.Diag(diag::err_drv_only_one_offload_target_supported_in) << "HIP";
+      return llvm::None;
+    case 0:
+      D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << "";
+      return llvm::None;
+    case 1:
+      break;
+    }
+    llvm::Triple TT(HIPOffloadTargets[0]);
+    if (TT.getArch() == llvm::Triple::amdgcn &&
+        TT.getVendor() == llvm::Triple::AMD &&
+        TT.getOS() == llvm::Triple::AMDHSA)
+      return TT;
+    if (TT.getArch() == llvm::Triple::spirv64 &&
+        TT.getVendor() == llvm::Triple::UnknownVendor &&
+        TT.getOS() == llvm::Triple::UnknownOS)
+      return TT;
+    D.Diag(diag::err_drv_invalid_or_unsupported_offload_target)
+        << HIPOffloadTargets[0];
+    return llvm::None;
+  }
+
+  static const llvm::Triple T("amdgcn-amd-amdhsa"); // Default HIP triple.
  return T;
 }

@ -694,17 +726,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
      return;
    }
    const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
-    const llvm::Triple &HostTriple = HostTC->getTriple();
    auto OFK = Action::OFK_HIP;
-    llvm::Triple HIPTriple = getHIPOffloadTargetTriple();
-    // Use the HIP and host triples as the key into the ToolChains map,
-    // because the device toolchain we create depends on both.
-    auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()];
-    if (!HIPTC) {
-      HIPTC = std::make_unique<toolchains::HIPAMDToolChain>(
-          *this, HIPTriple, *HostTC, C.getInputArgs());
-    }
-    C.addOffloadDeviceToolChain(HIPTC.get(), OFK);
+    auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
+    if (!HIPTriple)
+      return;
+    auto *HIPTC = &getOffloadingDeviceToolChain(C.getInputArgs(), *HIPTriple,
+                                                *HostTC, OFK);
+    assert(HIPTC && "Could not create offloading device tool chain.");
+    C.addOffloadDeviceToolChain(HIPTC, OFK);
  }

  //
@ -2729,6 +2758,14 @@ class OffloadingActionBuilder final {
        }
      }

+      // --offload and --offload-arch options are mutually exclusive.
+      if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
+          Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
+                             options::OPT_no_offload_arch_EQ)) {
+        C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch"
+                                                             << "--offload";
+      }
+
      // Collect all cuda_gpu_arch parameters, removing duplicates.
      std::set<StringRef> GpuArchs;
      bool Error = false;
@ -2771,8 +2808,12 @@ class OffloadingActionBuilder final {
      // Default to sm_20 which is the lowest common denominator for
      // supported GPUs.  sm_20 code should work correctly, if
      // suboptimally, on all newer GPUs.
-      if (GpuArchList.empty())
-        GpuArchList.push_back(DefaultCudaArch);
+      if (GpuArchList.empty()) {
+        if (ToolChains.front()->getTriple().isSPIRV())
+          GpuArchList.push_back(CudaArch::Generic);
+        else
+          GpuArchList.push_back(DefaultCudaArch);
+      }

      return Error;
    }
@ -2933,8 +2974,11 @@ class OffloadingActionBuilder final {

    StringRef getCanonicalOffloadArch(StringRef IdStr) override {
      llvm::StringMap<bool> Features;
-      auto ArchStr =
-          parseTargetID(getHIPOffloadTargetTriple(), IdStr, &Features);
+      // getHIPOffloadTargetTriple() is known to return valid value as it has
+      // been called successfully in the CreateOffloadingDeviceToolChains().
+      auto ArchStr = parseTargetID(
+          *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), IdStr,
+          &Features);
      if (!ArchStr) {
        C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
        C.setContainsError();
@ -2988,9 +3032,19 @@ class OffloadingActionBuilder final {
            // When LTO is not enabled, we follow the conventional
            // compiler phases, including backend and assemble phases.
            ActionList AL;
-            auto BackendAction = C.getDriver().ConstructPhaseAction(
-                C, Args, phases::Backend, CudaDeviceActions[I],
-                AssociatedOffloadKind);
+            Action *BackendAction = nullptr;
+            if (ToolChains.front()->getTriple().isSPIRV()) {
+              // Emit LLVM bitcode for SPIR-V targets. SPIR-V device tool chain
+              // (HIPSPVToolChain) runs post-link LLVM IR passes.
+              types::ID Output = Args.hasArg(options::OPT_S)
+                                     ? types::TY_LLVM_IR
+                                     : types::TY_LLVM_BC;
+              BackendAction =
+                  C.MakeAction<BackendJobAction>(CudaDeviceActions[I], Output);
+            } else
+              BackendAction = C.getDriver().ConstructPhaseAction(
+                  C, Args, phases::Backend, CudaDeviceActions[I],
+                  AssociatedOffloadKind);
            auto AssembleAction = C.getDriver().ConstructPhaseAction(
                C, Args, phases::Assemble, BackendAction,
                AssociatedOffloadKind);
@ -5449,6 +5503,38 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
  return *TC;
 }

+const ToolChain &Driver::getOffloadingDeviceToolChain(
+    const ArgList &Args, const llvm::Triple &Target, const ToolChain &HostTC,
+    const Action::OffloadKind &TargetDeviceOffloadKind) const {
+  // Use device / host triples as the key into the ToolChains map because the
+  // device ToolChain we create depends on both.
+  auto &TC = ToolChains[Target.str() + "/" + HostTC.getTriple().str()];
+  if (!TC) {
+    // Categorized by offload kind > arch rather than OS > arch like
+    // the normal getToolChain call, as it seems a reasonable way to categorize
+    // things.
+    switch (TargetDeviceOffloadKind) {
+    case Action::OFK_HIP: {
+      if (Target.getArch() == llvm::Triple::amdgcn &&
+          Target.getVendor() == llvm::Triple::AMD &&
+          Target.getOS() == llvm::Triple::AMDHSA)
+        TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target,
+                                                           HostTC, Args);
+      else if (Target.getArch() == llvm::Triple::spirv64 &&
+               Target.getVendor() == llvm::Triple::UnknownVendor &&
+               Target.getOS() == llvm::Triple::UnknownOS)
+        TC = std::make_unique<toolchains::HIPSPVToolChain>(*this, Target,
+                                                           HostTC, Args);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  return *TC;
+}
+
 bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
  // Say "no" if there is not exactly one input of a type clang understands.
  if (JA.size() != 1 ||
--- a/clang/test/Driver/Inputs/hipspv-dev-lib/a/a.bc
+++ b/clang/test/Driver/Inputs/hipspv-dev-lib/a/a.bc
--- a/clang/test/Driver/Inputs/hipspv-dev-lib/b/b.bc
+++ b/clang/test/Driver/Inputs/hipspv-dev-lib/b/b.bc
--- a/clang/test/Driver/Inputs/hipspv-dev-lib/hipspv-spirv64.bc
+++ b/clang/test/Driver/Inputs/hipspv-dev-lib/hipspv-spirv64.bc
--- a/clang/test/Driver/Inputs/hipspv/bin/.hipVersion
+++ b/clang/test/Driver/Inputs/hipspv/bin/.hipVersion
@ -0,0 +1,2 @@
+HIP_VERSION_MAJOR=3
+HIP_VERSION_MINOR=6
--- a/clang/test/Driver/Inputs/hipspv/lib/hip-device-lib/hipspv-spirv64.bc
+++ b/clang/test/Driver/Inputs/hipspv/lib/hip-device-lib/hipspv-spirv64.bc
--- a/clang/test/Driver/Inputs/hipspv/lib/libLLVMHipSpvPasses.so
+++ b/clang/test/Driver/Inputs/hipspv/lib/libLLVMHipSpvPasses.so
--- a/clang/test/Driver/Inputs/pass-plugin.so
+++ b/clang/test/Driver/Inputs/pass-plugin.so
--- a/clang/test/Driver/hipspv-device-libs.hip
+++ b/clang/test/Driver/hipspv-device-libs.hip
@ -0,0 +1,28 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: system-windows
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:  --hip-path=%S/Inputs/hipspv %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=ALL,HIP-PATH %s
+
+// Test --hip-device-lib-path
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:  --hip-path=%S/Inputs/hipspv \
+// RUN:   --hip-device-lib-path=%S/Inputs/hipspv-dev-lib %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=ALL,HIP-DEV-LIB-PATH %s
+
+// Test --hip-device-lib w/ --hip-device-lib-path and HIP_DEVICE_LIB_PATH.
+// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/hipspv-dev-lib/a \
+// RUN:   %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:    --hip-path=%S/Inputs/hipspv \
+// RUN:    --hip-device-lib-path=%S/Inputs/hipspv-dev-lib/b \
+// RUN:    --hip-device-lib=a.bc --hip-device-lib=b.bc %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=ALL,HIP-DEV-LIB %s
+
+// ALL: {{"[^"]*clang[^"]*"}}
+// HIP-PATH: "-mlink-builtin-bitcode" {{".*/hipspv/lib/hip-device-lib/hipspv-spirv64.bc"}}
+// HIP-DEV-LIB-PATH-NOT: "-mlink-builtin-bitcode" {{".*/hipspv/lib/hip-device-lib/hipspv-spirv64.bc"}}
+// HIP-DEV-LIB-PATH: "-mlink-builtin-bitcode" {{".*/hipspv-dev-lib/hipspv-spirv64.bc"}}
+// HIP-DEV-LIB: "-mlink-builtin-bitcode" {{".*/hipspv-dev-lib/a/a.bc"}}
+// HIP-DEV-LIB-SAME: "-mlink-builtin-bitcode" {{".*/hipspv-dev-lib/b/b.bc"}}
--- a/clang/test/Driver/hipspv-pass-plugin.hip
+++ b/clang/test/Driver/hipspv-pass-plugin.hip
@ -0,0 +1,27 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: system-windows
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:  --hip-path=%S/Inputs/hipspv -nogpuinc %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=FROM-HIP-PATH %s
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN: -nogpuinc -nogpulib --hipspv-pass-plugin=%S/Inputs/pass-plugin.so %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=FROM-OPTION %s
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN: -nogpuinc -nogpulib --hipspv-pass-plugin=foo.so %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=FROM-OPTION-INVALID %s
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN: -nogpuinc -nogpulib %s \
+// RUN: 2>&1 | FileCheck --check-prefixes=NO-PLUGIN %s
+
+// FROM-HIP-PATH: {{".*opt"}} {{".*.bc"}} "-load-pass-plugin"
+// FROM-HIP-PATH-SAME: {{".*/Inputs/hipspv/lib/libLLVMHipSpvPasses.so"}}
+// FROM-OPTION: {{".*opt"}} {{".*.bc"}} "-load-pass-plugin"
+// FROM-OPTION-SAME: {{".*/Inputs/pass-plugin.so"}}
+// FROM-OPTION-INVALID: error: no such file or directory: 'foo.so'
+// NO-PLUGIN-NOT: {{".*opt"}} {{".*.bc"}} "-load-pass-plugin"
+// NO-PLUGIN-NOT: {{".*/Inputs/hipspv/lib/libLLVMHipSpvPasses.so"}}
--- a/clang/test/Driver/hipspv-toolchain-rdc.hip
+++ b/clang/test/Driver/hipspv-toolchain-rdc.hip
@ -0,0 +1,63 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: system-windows
+
+// RUN: %clang -### -x hip -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:   -fgpu-rdc --hip-path=%S/Inputs/hipspv -nohipwrapperinc \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s
+
+// Emit objects for host side path
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" "spirv64"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: "-fgpu-rdc"
+// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" "spirv64"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: "-fgpu-rdc"
+// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
+
+// Emit code (LLVM BC) for device side path.
+// CHECK: [[CLANG]] "-cc1" "-triple" "spirv64"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions"
+// CHECK-SAME: "-fvisibility" "hidden" "-fapply-global-visibility-to-externs"
+// CHECK-SAME: "-fgpu-rdc"
+// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[A_SRC]]
+
+// CHECK: [[CLANG]] "-cc1" "-triple" "spirv64"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions"
+// CHECK-SAME: "-fvisibility" "hidden" "-fapply-global-visibility-to-externs"
+// CHECK-SAME: "-fgpu-rdc"
+// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} [[B_SRC]]
+
+// Link device code, lower it with HIPSPV passes and emit SPIR-V binary.
+// CHECK: {{".*llvm-link.*"}} [[A_BC1]] [[B_BC1]] "-o" [[AB_LINK:".*bc"]]
+// CHECK: {{".*opt.*"}} [[AB_LINK]] "-load-pass-plugin"
+// CHECK-SAME: "{{.*}}/Inputs/hipspv/lib/libLLVMHipSpvPasses.so"
+// CHECK-SAME: "-o" [[AB_LOWER:".*bc"]]
+// CHECK: {{".*llvm-spirv"}} "--spirv-max-version=1.1" "--spirv-ext=+all"
+// CHECK-SAME: [[AB_LOWER]] "-o" "[[AB_SPIRV:.*out]]"
+
+// Construct fat binary object.
+// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" "-bundle-align=4096"
+// CHECK-SAME: "-targets={{.*}},hip-spirv64----generic"
+// CHECK-SAME: "-inputs=/dev/null,[[AB_SPIRV]]"
+// CHECK-SAME: "-outputs=[[AB_FATBIN:.*hipfb]]"
+// CHECK: {{".*llvm-mc.*"}} "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin"
+// CHECK-SAME: "--filetype=obj"
+
+// Output the executable
+// CHECK: {{".*ld.*"}} {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
+// CHECK-SAME: [[OBJBUNDLE]]
--- a/clang/test/Driver/hipspv-toolchain.hip
+++ b/clang/test/Driver/hipspv-toolchain.hip
@ -0,0 +1,37 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: system-windows
+
+// RUN: %clang -### -target x86_64-linux-gnu --offload=spirv64 \
+// RUN:   --hip-path=%S/Inputs/hipspv -nohipwrapperinc %s \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "spirv64"
+// CHECK-SAME: "-aux-triple" "{{.*}}" "-emit-llvm-bc"
+// CHECK-SAME: "-fcuda-is-device"
+// CHECK-SAME: "-fcuda-allow-variadic-functions"
+// CHECK-SAME: "-mlink-builtin-bitcode" {{".*/hipspv/lib/hip-device-lib/hipspv-spirv64.bc"}}
+// CHECK-SAME: "-isystem" {{".*/hipspv/include"}}
+// CHECK-SAME: "-fhip-new-launch-api"
+// CHECK-SAME: "-o" [[DEV_BC:".*bc"]]
+// CHECK-SAME: "-x" "hip"
+
+// CHECK: {{".*llvm-link"}} [[DEV_BC]] "-o" [[LINK_BC:".*bc"]]
+
+// CHECK: {{".*opt"}} [[LINK_BC]] "-load-pass-plugin"
+// CHECK-SAME: {{".*/hipspv/lib/libLLVMHipSpvPasses.so"}}
+// CHECK-SAME: "-passes=hip-post-link-passes" "-o" [[LOWER_BC:".*bc"]]
+
+// CHECK: {{".*llvm-spirv"}} "--spirv-max-version=1.1" "--spirv-ext=+all"
+// CHECK-SAME: [[LOWER_BC]] "-o" "[[SPIRV_OUT:.*out]]"
+
+// CHECK: {{".*clang-offload-bundler"}} "-type=o" "-bundle-align=4096"
+// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-spirv64----generic"
+// CHECK-SAME: "-inputs={{.*}},[[SPIRV_OUT]]" "-outputs=[[BUNDLE:.*hipfb]]"
+
+// CHECK: [[CLANG]] "-cc1" "-triple" {{".*"}} "-aux-triple" "spirv64"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: "-fcuda-include-gpubinary" "[[BUNDLE]]"
+// CHECK-SAME: "-o" [[OBJ_HOST:".*o"]] "-x" "hip"
+
+// CHECK: {{".*ld.*"}} {{.*}}[[OBJ_HOST]]
--- a/clang/test/Driver/invalid-offload-options.cpp
+++ b/clang/test/Driver/invalid-offload-options.cpp
@ -0,0 +1,31 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: system-windows
+
+// RUN: %clang -### -x hip -target x86_64-linux-gnu --offload= \
+// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
+// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s
+// RUN: %clang -### -x hip -target x86_64-linux-gnu --offload=foo \
+// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
+// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s
+
+// INVALID-TARGET: error: Invalid or unsupported offload target: '{{.*}}'
+
+// In the future we should be able to specify multiple targets for HIP
+// compilation but currently it is not supported.
+//
+// RUN: %clang -### -x hip -target x86_64-linux-gnu --offload=foo,bar \
+// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
+// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s
+// RUN: %clang -### -x hip -target x86_64-linux-gnu \
+// RUN:   --offload=foo --offload=bar \
+// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
+// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s
+
+// TOO-MANY-TARGETS: error: Only one offload target is supported in HIP.
+
+// RUN: %clang -### -x hip -target x86_64-linux-gnu -nogpuinc -nogpulib \
+// RUN:   --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \
+// RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s
+
+// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload'