forked from OSchip/llvm-project
[CUDA] Fix libdevice selection.
This makes clang's libdevice selection match that of NVCC as described in http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#version-selection If required libdevice variant is not found, driver now fails with an error. Differential Revision: https://reviews.llvm.org/D23037 llvm-svn: 277542
This commit is contained in:
parent
765777ce67
commit
02a1e973a8
|
@ -26,6 +26,9 @@ def err_drv_cuda_bad_gpu_arch : Error<"Unsupported CUDA gpu architecture: %0">;
|
|||
def err_drv_no_cuda_installation : Error<
|
||||
"cannot find CUDA installation. Provide its path via --cuda-path, or pass "
|
||||
"-nocudainc to build without CUDA includes.">;
|
||||
def err_drv_no_cuda_libdevice : Error<
|
||||
"cannot find libdevice for %0. Provide path to different CUDA installation "
|
||||
"via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
|
||||
def err_drv_cuda_version_too_low : Error<
|
||||
"GPU arch %1 requires CUDA version at least %3, but installation at %0 is %2. "
|
||||
"Use --cuda-path to specify a different CUDA install, or pass "
|
||||
|
|
|
@ -1791,22 +1791,32 @@ void Generic_GCC::CudaInstallationDetector::init(
|
|||
LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
|
||||
LibDeviceMap[GpuArch] = FilePath.str();
|
||||
// Insert map entries for specifc devices with this compute capability.
|
||||
// NVCC's choice of libdevice library version is rather peculiar:
|
||||
// http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#version-selection
|
||||
// TODO: this will need to be updated once CUDA-8 is released.
|
||||
if (GpuArch == "compute_20") {
|
||||
LibDeviceMap["sm_20"] = FilePath;
|
||||
LibDeviceMap["sm_21"] = FilePath;
|
||||
LibDeviceMap["sm_32"] = FilePath;
|
||||
} else if (GpuArch == "compute_30") {
|
||||
LibDeviceMap["sm_30"] = FilePath;
|
||||
LibDeviceMap["sm_32"] = FilePath;
|
||||
// compute_30 is the fallback libdevice variant for sm_30+,
|
||||
// unless CUDA specifies different version for specific GPU
|
||||
// arch.
|
||||
LibDeviceMap["sm_50"] = FilePath;
|
||||
LibDeviceMap["sm_52"] = FilePath;
|
||||
LibDeviceMap["sm_53"] = FilePath;
|
||||
// sm_6? are currently all aliases for sm_53 in LLVM and
|
||||
// should use compute_30.
|
||||
LibDeviceMap["sm_60"] = FilePath;
|
||||
LibDeviceMap["sm_61"] = FilePath;
|
||||
LibDeviceMap["sm_62"] = FilePath;
|
||||
} else if (GpuArch == "compute_35") {
|
||||
LibDeviceMap["sm_35"] = FilePath;
|
||||
LibDeviceMap["sm_37"] = FilePath;
|
||||
} else if (GpuArch == "compute_50") {
|
||||
LibDeviceMap["sm_50"] = FilePath;
|
||||
LibDeviceMap["sm_52"] = FilePath;
|
||||
LibDeviceMap["sm_53"] = FilePath;
|
||||
LibDeviceMap["sm_60"] = FilePath;
|
||||
LibDeviceMap["sm_61"] = FilePath;
|
||||
LibDeviceMap["sm_62"] = FilePath;
|
||||
// NVCC does not use compute_50 libdevice at all at the moment.
|
||||
// The version that's shipped with CUDA-7.5 is a copy of compute_30.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4759,18 +4769,23 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
|
|||
if (DriverArgs.hasArg(options::OPT_nocudalib))
|
||||
return;
|
||||
|
||||
std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(
|
||||
DriverArgs.getLastArgValue(options::OPT_march_EQ));
|
||||
if (!LibDeviceFile.empty()) {
|
||||
CC1Args.push_back("-mlink-cuda-bitcode");
|
||||
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
|
||||
StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
|
||||
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
|
||||
std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
|
||||
|
||||
// Libdevice in CUDA-7.0 requires PTX version that's more recent
|
||||
// than LLVM defaults to. Use PTX4.2 which is the PTX version that
|
||||
// came with CUDA-7.0.
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back("+ptx42");
|
||||
if (LibDeviceFile.empty()) {
|
||||
getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
|
||||
return;
|
||||
}
|
||||
|
||||
CC1Args.push_back("-mlink-cuda-bitcode");
|
||||
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
|
||||
|
||||
// Libdevice in CUDA-7.0 requires PTX version that's more recent
|
||||
// than LLVM defaults to. Use PTX4.2 which is the PTX version that
|
||||
// came with CUDA-7.0.
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back("+ptx42");
|
||||
}
|
||||
|
||||
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
|
||||
|
|
|
@ -10,15 +10,41 @@
|
|||
// RUN: %clang -v --target=i386-unknown-linux \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
|
||||
|
||||
// Make sure we map libdevice bitcode files to proper GPUs.
|
||||
// Make sure we map libdevice bitcode files to proper GPUs. These
|
||||
// tests use Inputs/CUDA_80 which has full set of libdevice files.
|
||||
// However, libdevice mapping only matches CUDA-7.x at the moment.
|
||||
// sm_2x, sm_32 -> compute_20
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE21
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_32 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
|
||||
// sm_30, sm_5x and sm_6x map to compute_30
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
|
||||
// sm_35 and sm_37 -> compute_35
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_37 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
|
||||
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
|
||||
|
||||
// Verify that -nocudainc prevents adding include path to CUDA headers.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
|
@ -29,12 +55,13 @@
|
|||
// RUN: --cuda-path=%S/no-cuda-there %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
|
||||
|
||||
// Verify that no options related to bitcode linking are passes if
|
||||
// there's no bitcode file.
|
||||
// Verify that we get an error if there's no libdevice library to link with.
|
||||
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_30 for this purpose.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
|
||||
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
|
||||
// .. or if we explicitly passed -nocudalib
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
|
||||
|
||||
// Verify that -nocudalib prevents linking libdevice bitcode in.
|
||||
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
|
||||
// RUN: -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
|
||||
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
|
||||
|
@ -48,16 +75,19 @@
|
|||
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
|
||||
// NOCUDA-NOT: Found CUDA installation:
|
||||
|
||||
// MISSINGLIBDEVICE: error: cannot find libdevice for sm_30.
|
||||
|
||||
// COMMON: "-triple" "nvptx-nvidia-cuda"
|
||||
// COMMON-SAME: "-fcuda-is-device"
|
||||
// LIBDEVICE-SAME: "-mlink-cuda-bitcode"
|
||||
// NOLIBDEVICE-NOT: "-mlink-cuda-bitcode"
|
||||
// LIBDEVICE21-SAME: libdevice.compute_20.10.bc
|
||||
// LIBDEVICE20-SAME: libdevice.compute_20.10.bc
|
||||
// LIBDEVICE30-SAME: libdevice.compute_30.10.bc
|
||||
// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
|
||||
// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
|
||||
// LIBDEVICE-SAME: "-target-feature" "+ptx42"
|
||||
// NOLIBDEVICE-NOT: "-target-feature" "+ptx42"
|
||||
// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include"
|
||||
// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA{{[_0-9]+}}/usr/local/cuda/include"
|
||||
// NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include"
|
||||
// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
|
||||
// NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
|
||||
|
|
Loading…
Reference in New Issue