forked from OSchip/llvm-project
[OpenMP] NVPTX: Set default/minimum compute capability to sm_35
The current implementation of the nvptx runtime (to be upstreamed shortly) uses the atomicMax operation on 64-bit integers. This is only supported in compute capabilities 3.5 and later. I've changed the clang default to sm_35. Differential Revision: https://reviews.llvm.org/D40977 llvm-svn: 320082
This commit is contained in:
parent
dfc79c7c33
commit
145c54721b
|
@ -241,14 +241,15 @@ set(CLANG_DEFAULT_OBJCOPY "objcopy" CACHE STRING
|
|||
set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
|
||||
"Default OpenMP runtime used by -fopenmp.")
|
||||
|
||||
# OpenMP offloading requires at least sm_30 because we use shuffle instructions
|
||||
# to generate efficient code for reductions.
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
|
||||
# OpenMP offloading requires at least sm_35 because we use shuffle instructions
|
||||
# to generate efficient code for reductions and the atomicMax instruction on
|
||||
# 64-bit integers in the implementation of conditional lastprivate.
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
|
||||
"Default architecture for OpenMP offloading to Nvidia GPUs.")
|
||||
string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}")
|
||||
if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 30)
|
||||
message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_30")
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_30" CACHE STRING
|
||||
if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35)
|
||||
message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35")
|
||||
set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING
|
||||
"Default architecture for OpenMP offloading to Nvidia GPUs." FORCE)
|
||||
endif()
|
||||
|
||||
|
|
Loading…
Reference in New Issue