[HIP] Add default header and include path

To support std::complex and some other standard C/C++ functions in HIP device code,
they need to be forced to be __host__ __device__ functions by pragmas. This is done
by some clang standard C++ wrapper headers which are shared between cuda-clang and hip-Clang.

For these standard C++ wapper headers to work properly, specific include path order
has to be enforced:

  clang C++ wrapper include path
  standard C++ include path
  clang include path

Also, these C++ wrapper headers require device version of some standard C/C++ functions
must be declared before including them. This needs to be done by including a default
header which declares or defines these device functions. The default header is always
included before any other headers are included by users.

This patch adds the the default header and include path for HIP.

Differential Revision: https://reviews.llvm.org/D81176
This commit is contained in:
Yaxun (Sam) Liu 2020-06-03 10:05:52 -04:00
parent a3597ecae9
commit 11d06b9511
48 changed files with 1894 additions and 189 deletions

View File

@ -58,7 +58,7 @@ def err_drv_no_cuda_libdevice : Error<
def err_drv_no_rocm_installation : Error<
"cannot find ROCm installation. Provide its path via --rocm-path, or pass "
"-nogpulib.">;
"-nogpulib and -nogpuinc to build without ROCm device library and HIP includes.">;
def err_drv_no_rocm_device_lib : Error<
"cannot find device library for %0. Provide path to different ROCm installation "
"via --rocm-path, or pass -nogpulib to build without linking default libraries.">;

View File

@ -2640,7 +2640,8 @@ def no_pedantic : Flag<["-", "--"], "no-pedantic">, Group<pedantic_Group>;
def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_terms">;
def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option, CoreOption]>,
HelpText<"Disable builtin #include directories">;
def nocudainc : Flag<["-"], "nocudainc">;
def nogpuinc : Flag<["-"], "nogpuinc">;
def : Flag<["-"], "nocudainc">, Alias<nogpuinc>;
def nogpulib : Flag<["-"], "nogpulib">,
HelpText<"Do not link device library for CUDA/HIP device compilation">;
def : Flag<["-"], "nocudalib">, Alias<nogpulib>;

View File

@ -617,6 +617,10 @@ public:
virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
/// Add arguments to use system-specific HIP includes.
virtual void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
/// Add arguments to use MCU GCC toolchain includes.
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;

View File

@ -991,6 +991,9 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}
void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}
void ToolChain::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}

View File

@ -199,6 +199,40 @@ void RocmInstallationDetector::print(raw_ostream &OS) const {
OS << "Found ROCm installation: " << InstallPath << '\n';
}
void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// HIP header includes standard library wrapper headers under clang
// cuda_wrappers directory. Since these wrapper headers include_next
// standard C++ headers, whereas libc++ headers include_next other clang
// headers. The include paths have to follow this order:
// - wrapper include path
// - standard C++ include path
// - other clang include path
// Since standard C++ and other clang include paths are added in other
// places after this function, here we only need to make sure wrapper
// include path is added.
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
llvm::sys::path::append(P, "cuda_wrappers");
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(P));
CC1Args.push_back("-include");
CC1Args.push_back("__clang_hip_runtime_wrapper.h");
}
if (DriverArgs.hasArg(options::OPT_nogpuinc))
return;
if (!isValid()) {
D.Diag(diag::err_drv_no_rocm_installation);
return;
}
CC1Args.push_back("-internal-isystem");
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
}
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,

View File

@ -10,6 +10,7 @@
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
#include "Gnu.h"
#include "ROCm.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
@ -21,148 +22,6 @@
namespace clang {
namespace driver {
/// A class to find a viable ROCM installation
/// TODO: Generalize to handle libclc.
class RocmInstallationDetector {
private:
struct ConditionalLibrary {
SmallString<0> On;
SmallString<0> Off;
bool isValid() const {
return !On.empty() && !Off.empty();
}
StringRef get(bool Enabled) const {
assert(isValid());
return Enabled ? On : Off;
}
};
const Driver &D;
bool IsValid = false;
//RocmVersion Version = RocmVersion::UNKNOWN;
SmallString<0> InstallPath;
//SmallString<0> BinPath;
SmallString<0> LibPath;
SmallString<0> LibDevicePath;
SmallString<0> IncludePath;
llvm::StringMap<std::string> LibDeviceMap;
// Libraries that are always linked.
SmallString<0> OCML;
SmallString<0> OCKL;
// Libraries that are always linked depending on the language
SmallString<0> OpenCL;
SmallString<0> HIP;
// Libraries swapped based on compile flags.
ConditionalLibrary WavefrontSize64;
ConditionalLibrary FiniteOnly;
ConditionalLibrary UnsafeMath;
ConditionalLibrary DenormalsAreZero;
ConditionalLibrary CorrectlyRoundedSqrt;
bool allGenericLibsValid() const {
return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && !HIP.empty() &&
WavefrontSize64.isValid() && FiniteOnly.isValid() &&
UnsafeMath.isValid() && DenormalsAreZero.isValid() &&
CorrectlyRoundedSqrt.isValid();
}
// CUDA architectures for which we have raised an error in
// CheckRocmVersionSupportsArch.
mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
void scanLibDevicePath();
public:
RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args);
/// Add arguments needed to link default bitcode libraries.
void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef LibDeviceFile, bool Wave64,
bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
bool FastRelaxedMath, bool CorrectSqrt) const;
/// Emit an error if Version does not support the given Arch.
///
/// If either Version or Arch is unknown, does not emit an error. Emits at
/// most one error per Arch.
void CheckRocmVersionSupportsArch(CudaArch Arch) const;
/// Check whether we detected a valid Rocm install.
bool isValid() const { return IsValid; }
/// Print information about the detected CUDA installation.
void print(raw_ostream &OS) const;
/// Get the detected Rocm install's version.
// RocmVersion version() const { return Version; }
/// Get the detected Rocm installation path.
StringRef getInstallPath() const { return InstallPath; }
/// Get the detected path to Rocm's bin directory.
// StringRef getBinPath() const { return BinPath; }
/// Get the detected Rocm Include path.
StringRef getIncludePath() const { return IncludePath; }
/// Get the detected Rocm library path.
StringRef getLibPath() const { return LibPath; }
/// Get the detected Rocm device library path.
StringRef getLibDevicePath() const { return LibDevicePath; }
StringRef getOCMLPath() const {
assert(!OCML.empty());
return OCML;
}
StringRef getOCKLPath() const {
assert(!OCKL.empty());
return OCKL;
}
StringRef getOpenCLPath() const {
assert(!OpenCL.empty());
return OpenCL;
}
StringRef getHIPPath() const {
assert(!HIP.empty());
return HIP;
}
StringRef getWavefrontSize64Path(bool Enabled) const {
return WavefrontSize64.get(Enabled);
}
StringRef getFiniteOnlyPath(bool Enabled) const {
return FiniteOnly.get(Enabled);
}
StringRef getUnsafeMathPath(bool Enabled) const {
return UnsafeMath.get(Enabled);
}
StringRef getDenormalsAreZeroPath(bool Enabled) const {
return DenormalsAreZero.get(Enabled);
}
StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
return CorrectlyRoundedSqrt.get(Enabled);
}
/// Get libdevice file for given architecture
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
};
namespace tools {
namespace amdgpu {

View File

@ -1202,12 +1202,14 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_MP);
Args.AddLastArg(CmdArgs, options::OPT_MV);
// Add offload include arguments specific for CUDA. This must happen before
// we -I or -include anything else, because we must pick up the CUDA headers
// from the particular CUDA installation, rather than from e.g.
// /usr/local/include.
// Add offload include arguments specific for CUDA/HIP. This must happen
// before we -I or -include anything else, because we must pick up the
// CUDA/HIP headers from the particular CUDA/ROCm installation, rather than
// from e.g. /usr/local/include.
if (JA.isOffloading(Action::OFK_Cuda))
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
if (JA.isOffloading(Action::OFK_HIP))
getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
// If we are offloading to a target via OpenMP we need to include the
// openmp_wrappers folder which contains alternative system headers.

View File

@ -241,7 +241,7 @@ void CudaInstallationDetector::AddCudaIncludeArgs(
CC1Args.push_back(DriverArgs.MakeArgString(P));
}
if (DriverArgs.hasArg(options::OPT_nocudainc))
if (DriverArgs.hasArg(options::OPT_nogpuinc))
return;
if (!isValid()) {
@ -765,7 +765,7 @@ void CudaToolChain::adjustDebugInfoKind(
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Check our CUDA version if we're going to include the CUDA headers.
if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
assert(!Arch.empty() && "Must have an explicit GPU arch.");

View File

@ -2578,7 +2578,7 @@ bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig(
Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), GCCInstallation(D),
CudaInstallation(D, Triple, Args) {
CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);

View File

@ -10,6 +10,7 @@
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_GNU_H
#include "Cuda.h"
#include "ROCm.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include <set>
@ -278,6 +279,7 @@ public:
protected:
GCCInstallationDetector GCCInstallation;
CudaInstallationDetector CudaInstallation;
RocmInstallationDetector RocmInstallation;
public:
Generic_GCC(const Driver &D, const llvm::Triple &Triple,

View File

@ -427,6 +427,11 @@ void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
}
void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
}
SanitizerMask HIPToolChain::getSupportedSanitizers() const {
// The HIPToolChain only supports sanitizers in the sense that it allows
// sanitizer arguments on the command line if they are supported by the host

View File

@ -107,6 +107,8 @@ public:
llvm::opt::ArgStringList &CC1Args) const override;
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
SanitizerMask getSupportedSanitizers() const override;

View File

@ -797,6 +797,11 @@ void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
void Linux::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
}
void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
if (GCCInstallation.isValid()) {

View File

@ -31,6 +31,8 @@ public:
llvm::opt::ArgStringList &CC1Args) const override;
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
CXXStdlibType GetDefaultCXXStdlibType() const override;

View File

@ -739,7 +739,8 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
: ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args),
RocmInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
@ -797,6 +798,11 @@ void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
void MSVCToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
}
void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const {
CudaInstallation.print(OS);
}

View File

@ -9,6 +9,7 @@
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
#include "AMDGPU.h"
#include "Cuda.h"
#include "clang/Basic/DebugInfoOptions.h"
#include "clang/Driver/Compilation.h"
@ -125,6 +126,9 @@ public:
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
bool getWindowsSDKLibraryPath(std::string &path) const;
/// Check if Universal CRT should be used if available
bool getUniversalCRTLibraryPath(std::string &path) const;
@ -155,6 +159,7 @@ private:
std::string VCToolChainPath;
ToolsetLayout VSLayout = ToolsetLayout::OlderVS;
CudaInstallationDetector CudaInstallation;
RocmInstallationDetector RocmInstallation;
};
} // end namespace toolchains

View File

@ -0,0 +1,166 @@
//===--- ROCm.h - ROCm installation detector --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
#include "clang/Basic/Cuda.h"
#include "clang/Driver/Options.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
namespace clang {
namespace driver {
/// A class to find a viable ROCM installation
/// TODO: Generalize to handle libclc.
class RocmInstallationDetector {
private:
struct ConditionalLibrary {
SmallString<0> On;
SmallString<0> Off;
bool isValid() const { return !On.empty() && !Off.empty(); }
StringRef get(bool Enabled) const {
assert(isValid());
return Enabled ? On : Off;
}
};
const Driver &D;
bool IsValid = false;
// RocmVersion Version = RocmVersion::UNKNOWN;
SmallString<0> InstallPath;
// SmallString<0> BinPath;
SmallString<0> LibPath;
SmallString<0> LibDevicePath;
SmallString<0> IncludePath;
llvm::StringMap<std::string> LibDeviceMap;
// Libraries that are always linked.
SmallString<0> OCML;
SmallString<0> OCKL;
// Libraries that are always linked depending on the language
SmallString<0> OpenCL;
SmallString<0> HIP;
// Libraries swapped based on compile flags.
ConditionalLibrary WavefrontSize64;
ConditionalLibrary FiniteOnly;
ConditionalLibrary UnsafeMath;
ConditionalLibrary DenormalsAreZero;
ConditionalLibrary CorrectlyRoundedSqrt;
bool allGenericLibsValid() const {
return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() && !HIP.empty() &&
WavefrontSize64.isValid() && FiniteOnly.isValid() &&
UnsafeMath.isValid() && DenormalsAreZero.isValid() &&
CorrectlyRoundedSqrt.isValid();
}
// GPU architectures for which we have raised an error in
// CheckRocmVersionSupportsArch.
mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
void scanLibDevicePath();
public:
RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args);
/// Add arguments needed to link default bitcode libraries.
void addCommonBitcodeLibCC1Args(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef LibDeviceFile, bool Wave64,
bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
bool FastRelaxedMath, bool CorrectSqrt) const;
/// Emit an error if Version does not support the given Arch.
///
/// If either Version or Arch is unknown, does not emit an error. Emits at
/// most one error per Arch.
void CheckRocmVersionSupportsArch(CudaArch Arch) const;
/// Check whether we detected a valid Rocm install.
bool isValid() const { return IsValid; }
/// Print information about the detected ROCm installation.
void print(raw_ostream &OS) const;
/// Get the detected Rocm install's version.
// RocmVersion version() const { return Version; }
/// Get the detected Rocm installation path.
StringRef getInstallPath() const { return InstallPath; }
/// Get the detected path to Rocm's bin directory.
// StringRef getBinPath() const { return BinPath; }
/// Get the detected Rocm Include path.
StringRef getIncludePath() const { return IncludePath; }
/// Get the detected Rocm library path.
StringRef getLibPath() const { return LibPath; }
/// Get the detected Rocm device library path.
StringRef getLibDevicePath() const { return LibDevicePath; }
StringRef getOCMLPath() const {
assert(!OCML.empty());
return OCML;
}
StringRef getOCKLPath() const {
assert(!OCKL.empty());
return OCKL;
}
StringRef getOpenCLPath() const {
assert(!OpenCL.empty());
return OpenCL;
}
StringRef getHIPPath() const {
assert(!HIP.empty());
return HIP;
}
StringRef getWavefrontSize64Path(bool Enabled) const {
return WavefrontSize64.get(Enabled);
}
StringRef getFiniteOnlyPath(bool Enabled) const {
return FiniteOnly.get(Enabled);
}
StringRef getUnsafeMathPath(bool Enabled) const {
return UnsafeMath.get(Enabled);
}
StringRef getDenormalsAreZeroPath(bool Enabled) const {
return DenormalsAreZero.get(Enabled);
}
StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
return CorrectlyRoundedSqrt.get(Enabled);
}
/// Get libdevice file for given architecture
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const;
};
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H

View File

@ -45,6 +45,9 @@ set(files
__clang_cuda_libdevice_declares.h
__clang_cuda_math_forward_declares.h
__clang_cuda_runtime_wrapper.h
__clang_hip_libdevice_declares.h
__clang_hip_math.h
__clang_hip_runtime_wrapper.h
cetintrin.h
cet.h
cldemoteintrin.h

View File

@ -8,8 +8,8 @@
*/
#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
#ifndef __CUDA__
#error "This file is for CUDA compilation only."
#if !__CUDA__ && !__HIP__
#error "This file is for CUDA/HIP compilation only."
#endif
// This file forward-declares of some math functions we (or the CUDA headers)

View File

@ -0,0 +1,326 @@
/*===---- __clang_hip_libdevice_declares.h - HIP device library decls -------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_HIP_LIBDEVICE_DECLARES_H__
#define __CLANG_HIP_LIBDEVICE_DECLARES_H__
extern "C" {
// BEGIN FLOAT
__device__ __attribute__((const)) float __ocml_acos_f32(float);
__device__ __attribute__((pure)) float __ocml_acosh_f32(float);
__device__ __attribute__((const)) float __ocml_asin_f32(float);
__device__ __attribute__((pure)) float __ocml_asinh_f32(float);
__device__ __attribute__((const)) float __ocml_atan2_f32(float, float);
__device__ __attribute__((const)) float __ocml_atan_f32(float);
__device__ __attribute__((pure)) float __ocml_atanh_f32(float);
__device__ __attribute__((pure)) float __ocml_cbrt_f32(float);
__device__ __attribute__((const)) float __ocml_ceil_f32(float);
__device__ __attribute__((const)) __device__ float __ocml_copysign_f32(float,
float);
__device__ float __ocml_cos_f32(float);
__device__ float __ocml_native_cos_f32(float);
__device__ __attribute__((pure)) __device__ float __ocml_cosh_f32(float);
__device__ float __ocml_cospi_f32(float);
__device__ float __ocml_i0_f32(float);
__device__ float __ocml_i1_f32(float);
__device__ __attribute__((pure)) float __ocml_erfc_f32(float);
__device__ __attribute__((pure)) float __ocml_erfcinv_f32(float);
__device__ __attribute__((pure)) float __ocml_erfcx_f32(float);
__device__ __attribute__((pure)) float __ocml_erf_f32(float);
__device__ __attribute__((pure)) float __ocml_erfinv_f32(float);
__device__ __attribute__((pure)) float __ocml_exp10_f32(float);
__device__ __attribute__((pure)) float __ocml_native_exp10_f32(float);
__device__ __attribute__((pure)) float __ocml_exp2_f32(float);
__device__ __attribute__((pure)) float __ocml_exp_f32(float);
__device__ __attribute__((pure)) float __ocml_native_exp_f32(float);
__device__ __attribute__((pure)) float __ocml_expm1_f32(float);
__device__ __attribute__((const)) float __ocml_fabs_f32(float);
__device__ __attribute__((const)) float __ocml_fdim_f32(float, float);
__device__ __attribute__((const)) float __ocml_floor_f32(float);
__device__ __attribute__((const)) float __ocml_fma_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
__device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
__device__ __attribute__((const)) __device__ float __ocml_fmod_f32(float,
float);
__device__ float __ocml_frexp_f32(float,
__attribute__((address_space(5))) int *);
__device__ __attribute__((const)) float __ocml_hypot_f32(float, float);
__device__ __attribute__((const)) int __ocml_ilogb_f32(float);
__device__ __attribute__((const)) int __ocml_isfinite_f32(float);
__device__ __attribute__((const)) int __ocml_isinf_f32(float);
__device__ __attribute__((const)) int __ocml_isnan_f32(float);
__device__ float __ocml_j0_f32(float);
__device__ float __ocml_j1_f32(float);
__device__ __attribute__((const)) float __ocml_ldexp_f32(float, int);
__device__ float __ocml_lgamma_f32(float);
__device__ __attribute__((pure)) float __ocml_log10_f32(float);
__device__ __attribute__((pure)) float __ocml_native_log10_f32(float);
__device__ __attribute__((pure)) float __ocml_log1p_f32(float);
__device__ __attribute__((pure)) float __ocml_log2_f32(float);
__device__ __attribute__((pure)) float __ocml_native_log2_f32(float);
__device__ __attribute__((const)) float __ocml_logb_f32(float);
__device__ __attribute__((pure)) float __ocml_log_f32(float);
__device__ __attribute__((pure)) float __ocml_native_log_f32(float);
__device__ float __ocml_modf_f32(float,
__attribute__((address_space(5))) float *);
__device__ __attribute__((const)) float __ocml_nearbyint_f32(float);
__device__ __attribute__((const)) float __ocml_nextafter_f32(float, float);
__device__ __attribute__((const)) float __ocml_len3_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_len4_f32(float, float, float,
float);
__device__ __attribute__((pure)) float __ocml_ncdf_f32(float);
__device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float);
__device__ __attribute__((pure)) float __ocml_pow_f32(float, float);
__device__ __attribute__((pure)) float __ocml_rcbrt_f32(float);
__device__ __attribute__((const)) float __ocml_remainder_f32(float, float);
__device__ float __ocml_remquo_f32(float, float,
__attribute__((address_space(5))) int *);
__device__ __attribute__((const)) float __ocml_rhypot_f32(float, float);
__device__ __attribute__((const)) float __ocml_rint_f32(float);
__device__ __attribute__((const)) float __ocml_rlen3_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_rlen4_f32(float, float, float,
float);
__device__ __attribute__((const)) float __ocml_round_f32(float);
__device__ __attribute__((pure)) float __ocml_rsqrt_f32(float);
__device__ __attribute__((const)) float __ocml_scalb_f32(float, float);
__device__ __attribute__((const)) float __ocml_scalbn_f32(float, int);
__device__ __attribute__((const)) int __ocml_signbit_f32(float);
__device__ float __ocml_sincos_f32(float,
__attribute__((address_space(5))) float *);
__device__ float __ocml_sincospi_f32(float,
__attribute__((address_space(5))) float *);
__device__ float __ocml_sin_f32(float);
__device__ float __ocml_native_sin_f32(float);
__device__ __attribute__((pure)) float __ocml_sinh_f32(float);
__device__ float __ocml_sinpi_f32(float);
__device__ __attribute__((const)) float __ocml_sqrt_f32(float);
__device__ __attribute__((const)) float __ocml_native_sqrt_f32(float);
__device__ float __ocml_tan_f32(float);
__device__ __attribute__((pure)) float __ocml_tanh_f32(float);
__device__ float __ocml_tgamma_f32(float);
__device__ __attribute__((const)) float __ocml_trunc_f32(float);
__device__ float __ocml_y0_f32(float);
__device__ float __ocml_y1_f32(float);
// BEGIN INTRINSICS
__device__ __attribute__((const)) float __ocml_add_rte_f32(float, float);
__device__ __attribute__((const)) float __ocml_add_rtn_f32(float, float);
__device__ __attribute__((const)) float __ocml_add_rtp_f32(float, float);
__device__ __attribute__((const)) float __ocml_add_rtz_f32(float, float);
__device__ __attribute__((const)) float __ocml_sub_rte_f32(float, float);
__device__ __attribute__((const)) float __ocml_sub_rtn_f32(float, float);
__device__ __attribute__((const)) float __ocml_sub_rtp_f32(float, float);
__device__ __attribute__((const)) float __ocml_sub_rtz_f32(float, float);
__device__ __attribute__((const)) float __ocml_mul_rte_f32(float, float);
__device__ __attribute__((const)) float __ocml_mul_rtn_f32(float, float);
__device__ __attribute__((const)) float __ocml_mul_rtp_f32(float, float);
__device__ __attribute__((const)) float __ocml_mul_rtz_f32(float, float);
__device__ __attribute__((const)) float __ocml_div_rte_f32(float, float);
__device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float);
__device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float);
__device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float);
__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float, float);
__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float, float);
__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float, float);
__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float, float);
__device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float);
__device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float);
__device__ __attribute__((const)) float
__llvm_amdgcn_cos_f32(float) __asm("llvm.amdgcn.cos.f32");
__device__ __attribute__((const)) float
__llvm_amdgcn_rcp_f32(float) __asm("llvm.amdgcn.rcp.f32");
__device__ __attribute__((const)) float
__llvm_amdgcn_rsq_f32(float) __asm("llvm.amdgcn.rsq.f32");
__device__ __attribute__((const)) float
__llvm_amdgcn_sin_f32(float) __asm("llvm.amdgcn.sin.f32");
// END INTRINSICS
// END FLOAT
// BEGIN DOUBLE
__device__ __attribute__((const)) double __ocml_acos_f64(double);
__device__ __attribute__((pure)) double __ocml_acosh_f64(double);
__device__ __attribute__((const)) double __ocml_asin_f64(double);
__device__ __attribute__((pure)) double __ocml_asinh_f64(double);
__device__ __attribute__((const)) double __ocml_atan2_f64(double, double);
__device__ __attribute__((const)) double __ocml_atan_f64(double);
__device__ __attribute__((pure)) double __ocml_atanh_f64(double);
__device__ __attribute__((pure)) double __ocml_cbrt_f64(double);
__device__ __attribute__((const)) double __ocml_ceil_f64(double);
__device__ __attribute__((const)) double __ocml_copysign_f64(double, double);
__device__ double __ocml_cos_f64(double);
__device__ __attribute__((pure)) double __ocml_cosh_f64(double);
__device__ double __ocml_cospi_f64(double);
__device__ double __ocml_i0_f64(double);
__device__ double __ocml_i1_f64(double);
__device__ __attribute__((pure)) double __ocml_erfc_f64(double);
__device__ __attribute__((pure)) double __ocml_erfcinv_f64(double);
__device__ __attribute__((pure)) double __ocml_erfcx_f64(double);
__device__ __attribute__((pure)) double __ocml_erf_f64(double);
__device__ __attribute__((pure)) double __ocml_erfinv_f64(double);
__device__ __attribute__((pure)) double __ocml_exp10_f64(double);
__device__ __attribute__((pure)) double __ocml_exp2_f64(double);
__device__ __attribute__((pure)) double __ocml_exp_f64(double);
__device__ __attribute__((pure)) double __ocml_expm1_f64(double);
__device__ __attribute__((const)) double __ocml_fabs_f64(double);
__device__ __attribute__((const)) double __ocml_fdim_f64(double, double);
__device__ __attribute__((const)) double __ocml_floor_f64(double);
__device__ __attribute__((const)) double __ocml_fma_f64(double, double, double);
__device__ __attribute__((const)) double __ocml_fmax_f64(double, double);
__device__ __attribute__((const)) double __ocml_fmin_f64(double, double);
__device__ __attribute__((const)) double __ocml_fmod_f64(double, double);
__device__ double __ocml_frexp_f64(double,
__attribute__((address_space(5))) int *);
__device__ __attribute__((const)) double __ocml_hypot_f64(double, double);
__device__ __attribute__((const)) int __ocml_ilogb_f64(double);
__device__ __attribute__((const)) int __ocml_isfinite_f64(double);
__device__ __attribute__((const)) int __ocml_isinf_f64(double);
__device__ __attribute__((const)) int __ocml_isnan_f64(double);
__device__ double __ocml_j0_f64(double);
__device__ double __ocml_j1_f64(double);
__device__ __attribute__((const)) double __ocml_ldexp_f64(double, int);
__device__ double __ocml_lgamma_f64(double);
__device__ __attribute__((pure)) double __ocml_log10_f64(double);
__device__ __attribute__((pure)) double __ocml_log1p_f64(double);
__device__ __attribute__((pure)) double __ocml_log2_f64(double);
__device__ __attribute__((const)) double __ocml_logb_f64(double);
__device__ __attribute__((pure)) double __ocml_log_f64(double);
__device__ double __ocml_modf_f64(double,
__attribute__((address_space(5))) double *);
__device__ __attribute__((const)) double __ocml_nearbyint_f64(double);
__device__ __attribute__((const)) double __ocml_nextafter_f64(double, double);
__device__ __attribute__((const)) double __ocml_len3_f64(double, double,
double);
__device__ __attribute__((const)) double __ocml_len4_f64(double, double, double,
double);
__device__ __attribute__((pure)) double __ocml_ncdf_f64(double);
__device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double);
__device__ __attribute__((pure)) double __ocml_pow_f64(double, double);
__device__ __attribute__((pure)) double __ocml_rcbrt_f64(double);
__device__ __attribute__((const)) double __ocml_remainder_f64(double, double);
__device__ double __ocml_remquo_f64(double, double,
__attribute__((address_space(5))) int *);
__device__ __attribute__((const)) double __ocml_rhypot_f64(double, double);
__device__ __attribute__((const)) double __ocml_rint_f64(double);
__device__ __attribute__((const)) double __ocml_rlen3_f64(double, double,
double);
__device__ __attribute__((const)) double __ocml_rlen4_f64(double, double,
double, double);
__device__ __attribute__((const)) double __ocml_round_f64(double);
__device__ __attribute__((pure)) double __ocml_rsqrt_f64(double);
__device__ __attribute__((const)) double __ocml_scalb_f64(double, double);
__device__ __attribute__((const)) double __ocml_scalbn_f64(double, int);
__device__ __attribute__((const)) int __ocml_signbit_f64(double);
__device__ double __ocml_sincos_f64(double,
__attribute__((address_space(5))) double *);
__device__ double
__ocml_sincospi_f64(double, __attribute__((address_space(5))) double *);
__device__ double __ocml_sin_f64(double);
__device__ __attribute__((pure)) double __ocml_sinh_f64(double);
__device__ double __ocml_sinpi_f64(double);
__device__ __attribute__((const)) double __ocml_sqrt_f64(double);
__device__ double __ocml_tan_f64(double);
__device__ __attribute__((pure)) double __ocml_tanh_f64(double);
__device__ double __ocml_tgamma_f64(double);
__device__ __attribute__((const)) double __ocml_trunc_f64(double);
__device__ double __ocml_y0_f64(double);
__device__ double __ocml_y1_f64(double);
// BEGIN INTRINSICS
__device__ __attribute__((const)) double __ocml_add_rte_f64(double, double);
__device__ __attribute__((const)) double __ocml_add_rtn_f64(double, double);
__device__ __attribute__((const)) double __ocml_add_rtp_f64(double, double);
__device__ __attribute__((const)) double __ocml_add_rtz_f64(double, double);
__device__ __attribute__((const)) double __ocml_sub_rte_f64(double, double);
__device__ __attribute__((const)) double __ocml_sub_rtn_f64(double, double);
__device__ __attribute__((const)) double __ocml_sub_rtp_f64(double, double);
__device__ __attribute__((const)) double __ocml_sub_rtz_f64(double, double);
__device__ __attribute__((const)) double __ocml_mul_rte_f64(double, double);
__device__ __attribute__((const)) double __ocml_mul_rtn_f64(double, double);
__device__ __attribute__((const)) double __ocml_mul_rtp_f64(double, double);
__device__ __attribute__((const)) double __ocml_mul_rtz_f64(double, double);
__device__ __attribute__((const)) double __ocml_div_rte_f64(double, double);
__device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double);
__device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double);
__device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double);
__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double, double);
__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double, double);
__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double, double);
__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double, double);
__device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double,
double);
__device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double,
double);
__device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double,
double);
__device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double,
double);
__device__ __attribute__((const)) double
__llvm_amdgcn_rcp_f64(double) __asm("llvm.amdgcn.rcp.f64");
__device__ __attribute__((const)) double
__llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
__device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
__device__ _Float16 __ocml_cos_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16,
_Float16);
__device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
__device__ __attribute__((const)) int __ocml_isinf_f16(_Float16);
__device__ __attribute__((const)) int __ocml_isnan_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
__device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
__device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
__device__ _Float16 __ocml_sin_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
__device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
typedef short __2i16 __attribute__((ext_vector_type(2)));
__device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b,
float c, bool s);
__device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
__device__ __2f16 __ocml_cos_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
__device__ __attribute__((const))
__2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
__device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
__device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
__device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
__device__ inline __2f16
__llvm_amdgcn_rcp_2f16(__2f16 __x) // Not currently exposed by ROCDL.
{
return __2f16{__llvm_amdgcn_rcp_f16(__x.x), __llvm_amdgcn_rcp_f16(__x.y)};
}
__device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
__device__ __2f16 __ocml_sin_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
__device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
} // extern "C"
#endif // __CLANG_HIP_LIBDEVICE_DECLARES_H__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
/*===---- __clang_hip_runtime_wrapper.h - HIP runtime support ---------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
/*
* WARNING: This header is intended to be directly -include'd by
* the compiler and is not supposed to be included by users.
*
*/
#ifndef __CLANG_HIP_RUNTIME_WRAPPER_H__
#define __CLANG_HIP_RUNTIME_WRAPPER_H__
#if __HIP__
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
#define __host__ __attribute__((host))
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
#define __shared__ __attribute__((shared))
#define __constant__ __attribute__((constant))
#if __HIP_ENABLE_DEVICE_MALLOC__
extern "C" __device__ void *__hip_malloc(size_t __size);
extern "C" __device__ void *__hip_free(void *__ptr);
static inline __device__ void *malloc(size_t __size) {
return __hip_malloc(__size);
}
static inline __device__ void *free(void *__ptr) { return __hip_free(__ptr); }
#else
static inline __device__ void *malloc(size_t __size) {
__builtin_trap();
return nullptr;
}
static inline __device__ void *free(void *__ptr) {
__builtin_trap();
return nullptr;
}
#endif
#include <__clang_hip_libdevice_declares.h>
#include <__clang_hip_math.h>
#if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
#include <__clang_cuda_math_forward_declares.h>
#include <__clang_cuda_complex_builtins.h>
#include <algorithm>
#include <complex>
#include <new>
#endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__
#define __CLANG_HIP_RUNTIME_WRPPER_INCLUDED__ 1
#endif // __HIP__
#endif // __CLANG_HIP_RUNTIME_WRAPPER_H__

View File

@ -8,7 +8,7 @@
// Test subtarget with flushing on by default.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
@ -16,7 +16,7 @@
// Test subtarget with flushing off by ddefault.
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
@ -25,7 +25,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: -fcuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
@ -34,7 +34,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: -fno-cuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
@ -43,7 +43,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: -fno-cuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
@ -52,7 +52,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: -fcuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
@ -61,7 +61,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
@ -69,7 +69,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: -fcuda-flush-denormals-to-zero -fno-cuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD
@ -77,7 +77,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
@ -85,7 +85,7 @@
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: -fno-cuda-flush-denormals-to-zero -fcuda-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
@ -93,13 +93,13 @@
// Test --hip-device-lib-path flag
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm-device-libs/amdgcn/bitcode \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD
// Test environment variable HIP_DEVICE_LIB_PATH
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm-device-libs/amdgcn/bitcode \
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: %clang -### -target x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \

View File

@ -0,0 +1,31 @@
// REQUIRES: clang-driver
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
// RUN: %clang -c -v -target x86_64-unknown-linux-gnu --cuda-gpu-arch=gfx900 \
// RUN: -std=c++11 --rocm-path=%S/Inputs/rocm -nogpulib %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,WRAP,HIP %s
// RUN: %clang -c -v -target x86_64-unknown-linux-gnu --cuda-gpu-arch=gfx900 \
// RUN: -std=c++11 --rocm-path=%S/Inputs/rocm -nobuiltininc -nogpulib %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,NOWRAP,HIP %s
// RUN: %clang -c -v -target x86_64-unknown-linux-gnu --cuda-gpu-arch=gfx900 \
// RUN: -std=c++11 --rocm-path=%S/Inputs/rocm -nogpuinc -nogpulib %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,WRAP,NOHIP %s
// COMMON-LABEL: clang{{.*}} -cc1 -triple amdgcn-amd-amdhsa
// WRAP: clang/{{.*}}/include/cuda_wrappers
// NOWRAP-NOT: clang/{{.*}}/include/cuda_wrappers
// HIP: {{.*}}Inputs/rocm/include
// NOHIP-NOT: {{.*}}Inputs/rocm/include
// COMMON: {{.*}}include/c++
// COMMON: clang/{{.*}}/include
// COMMON-LABEL: clang{{.*}} -cc1 -triple x86_64-unknown-linux-gnu
// WRAP: clang/{{.*}}/include/cuda_wrappers
// NOWRAP-NOT: clang/{{.*}}/include/cuda_wrappers
// HIP: {{.*}}Inputs/rocm/include
// NOHIP-NOT: {{.*}}Inputs/rocm/include
// COMMON: {{.*}}include/c++
// COMMON: clang/{{.*}}/include

View File

@ -7,12 +7,12 @@
// target not included in the test.
// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
// RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 -nogpulib \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
// RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s

View File

@ -8,17 +8,17 @@
// target not included in the test.
// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
// RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
// Should not interpret -nostdlib as disabling offload libraries.
// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 -nostdlib \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
// RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 -nogpulib \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
// RUN: --rocm-path=%S/Inputs/rocm %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s

View File

@ -6,7 +6,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
@ -15,7 +15,7 @@
// Make sure the different denormal default is respected for gfx8
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
@ -24,7 +24,7 @@
// Make sure the non-canonical name works
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=fiji \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
@ -33,7 +33,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx900 \
// RUN: -cl-denorms-are-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
@ -41,7 +41,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: -cl-denorms-are-zero \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
@ -50,7 +50,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: -cl-finite-math-only \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s
@ -59,7 +59,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: -cl-fp32-correctly-rounded-divide-sqrt \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s
@ -68,7 +68,7 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: -cl-fast-relaxed-math \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s
@ -77,45 +77,45 @@
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 \
// RUN: -cl-unsafe-math-optimizations \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx1010 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx1011 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx1012 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
// Ignore -mno-wavefrontsize64 without wave32 support
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx803 -mno-wavefrontsize64 \
// RUN: --rocm-path=%S/Inputs/rocm-device-libs \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s
@ -124,12 +124,12 @@
// Test --hip-device-lib-path format
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx900 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm-device-libs/amdgcn/bitcode \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: %S/opencl.cl \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
// Test environment variable HIP_DEVICE_LIB_PATH
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm-device-libs/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx900 \
// RUN: %S/opencl.cl \
// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s

View File

@ -5,7 +5,7 @@
// RUN: %clang -### --sysroot=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
// RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib.
// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib and -nogpuinc to build without ROCm device library and HIP includes.
// Accept nogpulib or nostdlib for OpenCL.
// RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK