[Clang][OpenMP] Require CUDA 9.2+ for OpenMP offloading on NVPTX target

In current implementation of `deviceRTLs`, we're using some functions
that are CUDA version dependent (if CUDA_VERSION < 9, it is one; otheriwse, it
is another one). As a result, we have to compile one bitcode library for each
CUDA version supported. A worse problem is forward compatibility. If a new CUDA
version is released, we have to update CMake file as well.

CUDA 9.2 has been released for three years. Instead of using various weird tricks
to make `deviceRTLs` work with different CUDA versions and still have forward
compatibility, we can simply drop support for CUDA 9.1 or lower version. It has at
least two benifits:
- We don't need to generate bitcode libraries for each CUDA version;
- Clang driver doesn't need to search for the bitcode lib based on CUDA version.

We can claim that starting from LLVM 12, OpenMP offloading on NVPTX target requires
CUDA 9.2+.

Reviewed By: jdoerfert, JonChesterfield

Differential Revision: https://reviews.llvm.org/D97003
This commit is contained in:
Shilei Tian 2021-02-22 10:59:55 -05:00
parent daeb70be0b
commit 76151acf89
4 changed files with 30 additions and 11 deletions

View File

@ -263,6 +263,7 @@ def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
def err_drv_omp_offload_target_missingbcruntime : Error< def err_drv_omp_offload_target_missingbcruntime : Error<
"No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-%1-bc-path to specify %1 bitcode library.">; "No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-%1-bc-path to specify %1 bitcode library.">;
def err_drv_omp_offload_target_bcruntime_not_found : Error<"Bitcode library '%0' does not exist.">; def err_drv_omp_offload_target_bcruntime_not_found : Error<"Bitcode library '%0' does not exist.">;
def err_drv_omp_offload_target_cuda_version_not_support : Error<"NVPTX target requires CUDA 9.2 or above. CUDA %0 is detected.">;
def warn_drv_omp_offload_target_duplicate : Warning< def warn_drv_omp_offload_target_duplicate : Warning<
"The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">, "The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
InGroup<OpenMPTarget>; InGroup<OpenMPTarget>;

View File

@ -710,13 +710,14 @@ void CudaToolChain::addClangTargetOptions(
CC1Args.push_back("-mlink-builtin-bitcode"); CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
std::string CudaVersionStr; std::string CudaVersionStr;
// New CUDA versions often introduce new instructions that are only supported // New CUDA versions often introduce new instructions that are only supported
// by new PTX version, so we need to raise PTX level to enable them in NVPTX // by new PTX version, so we need to raise PTX level to enable them in NVPTX
// back-end. // back-end.
const char *PtxFeature = nullptr; const char *PtxFeature = nullptr;
switch (CudaInstallation.version()) { switch (CudaInstallationVersion) {
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
case CudaVersion::CUDA_##CUDA_VER: \ case CudaVersion::CUDA_##CUDA_VER: \
CudaVersionStr = #CUDA_VER; \ CudaVersionStr = #CUDA_VER; \
@ -743,12 +744,19 @@ void CudaToolChain::addClangTargetOptions(
options::OPT_fno_cuda_short_ptr, false)) options::OPT_fno_cuda_short_ptr, false))
CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
if (CudaInstallation.version() >= CudaVersion::UNKNOWN) if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
CC1Args.push_back(DriverArgs.MakeArgString( CC1Args.push_back(
Twine("-target-sdk-version=") + DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
CudaVersionToString(CudaInstallation.version()))); CudaVersionToString(CudaInstallationVersion)));
if (DeviceOffloadingKind == Action::OFK_OpenMP) { if (DeviceOffloadingKind == Action::OFK_OpenMP) {
if (CudaInstallationVersion < CudaVersion::CUDA_92) {
getDriver().Diag(
diag::err_drv_omp_offload_target_cuda_version_not_support)
<< CudaVersionToString(CudaInstallationVersion);
return;
}
std::string BitcodeSuffix = std::string BitcodeSuffix =
"nvptx-cuda_" + CudaVersionStr + "-" + GpuArch.str(); "nvptx-cuda_" + CudaVersionStr + "-" + GpuArch.str();
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,

View File

@ -154,17 +154,17 @@
/// Check that the runtime bitcode library is part of the compile line. Create a bogus /// Check that the runtime bitcode library is part of the compile line. Create a bogus
/// bitcode library and add it to the LIBRARY_PATH. /// bitcode library and add it to the LIBRARY_PATH.
// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ // RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB %s // RUN: | FileCheck -check-prefix=CHK-BCLIB %s
/// The user can override default detection using --libomptarget-nvptx-bc-path=. /// The user can override default detection using --libomptarget-nvptx-bc-path=.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ // RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-USER %s // RUN: | FileCheck -check-prefix=CHK-BCLIB-USER %s
// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-cuda_80-sm_35.bc // CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-cuda_102-sm_35.bc
// CHK-BCLIB-USER: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc // CHK-BCLIB-USER: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
// CHK-BCLIB-NOT: {{error:|warning:}} // CHK-BCLIB-NOT: {{error:|warning:}}
@ -173,23 +173,33 @@
/// Check that the warning is thrown when the libomptarget bitcode library is not found. /// Check that the warning is thrown when the libomptarget bitcode library is not found.
/// Libomptarget requires sm_35 or newer so an sm_35 bitcode library should never exist. /// Libomptarget requires sm_35 or newer so an sm_35 bitcode library should never exist.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ // RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-WARN %s // RUN: | FileCheck -check-prefix=CHK-BCLIB-WARN %s
// CHK-BCLIB-WARN: No library 'libomptarget-nvptx-cuda_80-sm_35.bc' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-nvptx-bc-path to specify nvptx bitcode library. // CHK-BCLIB-WARN: No library 'libomptarget-nvptx-cuda_102-sm_35.bc' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-nvptx-bc-path to specify nvptx bitcode library.
/// ########################################################################### /// ###########################################################################
/// Check that the error is thrown when the libomptarget bitcode library does not exist. /// Check that the error is thrown when the libomptarget bitcode library does not exist.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda \ // RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=not-exist.bc \ // RUN: --libomptarget-nvptx-bc-path=not-exist.bc \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s // RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s
// CHK-BCLIB-ERROR: Bitcode library 'not-exist.bc' does not exist. // CHK-BCLIB-ERROR: Bitcode library 'not-exist.bc' does not exist.
/// ###########################################################################
/// Check that the error is thrown when CUDA 9.1 or lower version is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s
// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above. CUDA 9.0 is detected.
/// Check that debug info is emitted in dwarf-2 /// Check that debug info is emitted in dwarf-2
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \ // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s // RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s