[CUDA] Added support for CUDA-8

Differential Revision: https://reviews.llvm.org/D24946

llvm-svn: 282610
This commit is contained in:
Artem Belevich 2016-09-28 17:47:40 +00:00
parent fda9905062
commit d4d9dc8252
5 changed files with 55 additions and 33 deletions

View File

@ -1774,8 +1774,7 @@ void Generic_GCC::CudaInstallationDetector::init(
Args.getLastArgValue(options::OPT_cuda_path_EQ)); Args.getLastArgValue(options::OPT_cuda_path_EQ));
else { else {
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda"); CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
// FIXME: Uncomment this once we can compile the cuda 8 headers. CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
// CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5"); CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5");
CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0"); CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0");
} }
@ -1795,6 +1794,16 @@ void Generic_GCC::CudaInstallationDetector::init(
FS.exists(LibDevicePath))) FS.exists(LibDevicePath)))
continue; continue;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
FS.getBufferForFile(InstallPath + "/version.txt");
if (!VersionFile) {
// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
// version.txt isn't present.
Version = CudaVersion::CUDA_70;
} else {
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
}
std::error_code EC; std::error_code EC;
for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
!EC && LI != LE; LI = LI.increment(EC)) { !EC && LI != LE; LI = LI.increment(EC)) {
@ -1807,24 +1816,20 @@ void Generic_GCC::CudaInstallationDetector::init(
StringRef GpuArch = FileName.slice( StringRef GpuArch = FileName.slice(
LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
LibDeviceMap[GpuArch] = FilePath.str(); LibDeviceMap[GpuArch] = FilePath.str();
// Insert map entries for specifc devices with this compute capability. // Insert map entries for specifc devices with this compute
// NVCC's choice of libdevice library version is rather peculiar: // capability. NVCC's choice of the libdevice library version is
// http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#version-selection // rather peculiar and depends on the CUDA version.
// TODO: this will need to be updated once CUDA-8 is released.
if (GpuArch == "compute_20") { if (GpuArch == "compute_20") {
LibDeviceMap["sm_20"] = FilePath; LibDeviceMap["sm_20"] = FilePath;
LibDeviceMap["sm_21"] = FilePath; LibDeviceMap["sm_21"] = FilePath;
LibDeviceMap["sm_32"] = FilePath; LibDeviceMap["sm_32"] = FilePath;
} else if (GpuArch == "compute_30") { } else if (GpuArch == "compute_30") {
LibDeviceMap["sm_30"] = FilePath; LibDeviceMap["sm_30"] = FilePath;
// compute_30 is the fallback libdevice variant for sm_30+, if (Version < CudaVersion::CUDA_80) {
// unless CUDA specifies different version for specific GPU LibDeviceMap["sm_50"] = FilePath;
// arch. LibDeviceMap["sm_52"] = FilePath;
LibDeviceMap["sm_50"] = FilePath; LibDeviceMap["sm_53"] = FilePath;
LibDeviceMap["sm_52"] = FilePath; }
LibDeviceMap["sm_53"] = FilePath;
// sm_6? are currently all aliases for sm_53 in LLVM and
// should use compute_30.
LibDeviceMap["sm_60"] = FilePath; LibDeviceMap["sm_60"] = FilePath;
LibDeviceMap["sm_61"] = FilePath; LibDeviceMap["sm_61"] = FilePath;
LibDeviceMap["sm_62"] = FilePath; LibDeviceMap["sm_62"] = FilePath;
@ -1832,21 +1837,14 @@ void Generic_GCC::CudaInstallationDetector::init(
LibDeviceMap["sm_35"] = FilePath; LibDeviceMap["sm_35"] = FilePath;
LibDeviceMap["sm_37"] = FilePath; LibDeviceMap["sm_37"] = FilePath;
} else if (GpuArch == "compute_50") { } else if (GpuArch == "compute_50") {
// NVCC does not use compute_50 libdevice at all at the moment. if (Version >= CudaVersion::CUDA_80) {
// The version that's shipped with CUDA-7.5 is a copy of compute_30. LibDeviceMap["sm_50"] = FilePath;
LibDeviceMap["sm_52"] = FilePath;
LibDeviceMap["sm_53"] = FilePath;
}
} }
} }
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
FS.getBufferForFile(InstallPath + "/version.txt");
if (!VersionFile) {
// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
// version.txt isn't present.
Version = CudaVersion::CUDA_70;
} else {
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
}
IsValid = true; IsValid = true;
break; break;
} }

View File

@ -62,7 +62,7 @@
#include "cuda.h" #include "cuda.h"
#if !defined(CUDA_VERSION) #if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION" #error "cuda.h did not define CUDA_VERSION"
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050 #elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
#error "Unsupported CUDA version!" #error "Unsupported CUDA version!"
#endif #endif
@ -113,6 +113,7 @@
#undef __cxa_vec_ctor #undef __cxa_vec_ctor
#undef __cxa_vec_cctor #undef __cxa_vec_cctor
#undef __cxa_vec_dtor #undef __cxa_vec_dtor
#undef __cxa_vec_new
#undef __cxa_vec_new2 #undef __cxa_vec_new2
#undef __cxa_vec_new3 #undef __cxa_vec_new3
#undef __cxa_vec_delete2 #undef __cxa_vec_delete2
@ -135,6 +136,21 @@
// the headers we're about to include. // the headers we're about to include.
#define __host__ UNEXPECTED_HOST_ATTRIBUTE #define __host__ UNEXPECTED_HOST_ATTRIBUTE
// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
// Previous versions used to check whether they are defined or not.
// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it
// here to detect the switch.
#if defined(CU_DEVICE_INVALID)
#if !defined(__USE_FAST_MATH__)
#define __USE_FAST_MATH__ 0
#endif
#if !defined(__CUDA_PREC_DIV)
#define __CUDA_PREC_DIV 0
#endif
#endif
// device_functions.hpp and math_functions*.hpp use 'static // device_functions.hpp and math_functions*.hpp use 'static
// __forceinline__' (with no __device__) for definitions of device // __forceinline__' (with no __device__) for definitions of device
// functions. Temporarily redefine __forceinline__ to include // functions. Temporarily redefine __forceinline__ to include
@ -151,7 +167,7 @@
// slow divides), so we need to scope our define carefully here. // slow divides), so we need to scope our define carefully here.
#pragma push_macro("__USE_FAST_MATH__") #pragma push_macro("__USE_FAST_MATH__")
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__) #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#define __USE_FAST_MATH__ #define __USE_FAST_MATH__ 1
#endif #endif
#include "math_functions.hpp" #include "math_functions.hpp"
#pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__USE_FAST_MATH__")

View File

@ -22,13 +22,14 @@
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \ // RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20 // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE20
// sm_30, sm_5x and sm_6x map to compute_30 // sm_30, sm_6x map to compute_30.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \ // RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30 // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
// sm_5x is a special case. Maps to compute_30 for cuda-7.x only.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \ // RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30 // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE30
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_60 \
@ -44,6 +45,12 @@
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \ // RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \ // RUN: | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35 // RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
// sm_5x -> compute_50 for CUDA-8.0 and newer.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_50 \
// RUN: --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON \
// RUN: -check-prefix LIBDEVICE -check-prefix LIBDEVICE50
// Verify that -nocudainc prevents adding include path to CUDA headers. // Verify that -nocudainc prevents adding include path to CUDA headers.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
@ -56,8 +63,8 @@
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC // RUN: | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
// Verify that we get an error if there's no libdevice library to link with. // Verify that we get an error if there's no libdevice library to link with.
// NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_30 for this purpose. // NOTE: Inputs/CUDA deliberately does *not* have libdevice.compute_20 for this purpose.
// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \ // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_20 \
// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE // RUN: | FileCheck %s -check-prefix COMMON -check-prefix MISSINGLIBDEVICE
@ -81,7 +88,7 @@
// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
// NOCUDA-NOT: Found CUDA installation: // NOCUDA-NOT: Found CUDA installation:
// MISSINGLIBDEVICE: error: cannot find libdevice for sm_30. // MISSINGLIBDEVICE: error: cannot find libdevice for sm_20.
// COMMON: "-triple" "nvptx-nvidia-cuda" // COMMON: "-triple" "nvptx-nvidia-cuda"
// COMMON-SAME: "-fcuda-is-device" // COMMON-SAME: "-fcuda-is-device"
@ -90,6 +97,7 @@
// LIBDEVICE20-SAME: libdevice.compute_20.10.bc // LIBDEVICE20-SAME: libdevice.compute_20.10.bc
// LIBDEVICE30-SAME: libdevice.compute_30.10.bc // LIBDEVICE30-SAME: libdevice.compute_30.10.bc
// LIBDEVICE35-SAME: libdevice.compute_35.10.bc // LIBDEVICE35-SAME: libdevice.compute_35.10.bc
// LIBDEVICE50-SAME: libdevice.compute_50.10.bc
// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc // NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
// LIBDEVICE-SAME: "-target-feature" "+ptx42" // LIBDEVICE-SAME: "-target-feature" "+ptx42"
// NOLIBDEVICE-NOT: "-target-feature" "+ptx42" // NOLIBDEVICE-NOT: "-target-feature" "+ptx42"