forked from OSchip/llvm-project
[OpenMP][libomptarget] Enable the compilation of multiple bc libraries for runtime inlining
Summary: Different NVIDIA GPUs support different compute capabilities. To enable the inlining of runtime functions and the best performance on different generations of NVIDIA GPUs, a bc library for each compute capability needs to be compiled. The same compiler build will then be usable in conjunction with multiple generations of NVIDIA GPUs. To differentiate between versions of the same bc lib, the output file name will contain the compute capability ID. Depends on D14254 Reviewers: Hahnfeld, hfinkel, carlo.bertolli, caomhin, ABataev, grokos Reviewed By: Hahnfeld, grokos Subscribers: guansong, mgorny, openmp-commits Differential Revision: https://reviews.llvm.org/D41724 llvm-svn: 324904
This commit is contained in:
parent
7dc0f1ec45
commit
d5ae4e6501
|
@ -280,10 +280,10 @@ Options for ``NVPTX device RTL``
|
||||||
compatible with NVCC, this option can be use to pass to NVCC a valid compiler
|
compatible with NVCC, this option can be use to pass to NVCC a valid compiler
|
||||||
to avoid the error.
|
to avoid the error.
|
||||||
|
|
||||||
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY** = ``35``
|
**LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35``
|
||||||
CUDA compute capability that should be supported by the NVPTX device RTL. E.g.
|
List of CUDA compute capabilities that should be supported by the NVPTX
|
||||||
for compute capability 6.0, the option "60" should be used. Compute capability
|
device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70"
|
||||||
3.5 is the minimum required.
|
should be used. Compute capability 3.5 is the minimum required.
|
||||||
|
|
||||||
**LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON``
|
**LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON``
|
||||||
Enable printing of debug messages from the NVPTX device RTL.
|
Enable printing of debug messages from the NVPTX device RTL.
|
||||||
|
|
|
@ -60,9 +60,18 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||||
|
|
||||||
# Get the compute capability the user requested or use SM_35 by default.
|
# Get the compute capability the user requested or use SM_35 by default.
|
||||||
# SM_35 is what clang uses by default.
|
# SM_35 is what clang uses by default.
|
||||||
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING
|
set(default_capabilities 35)
|
||||||
"CUDA Compute Capability to be used to compile the NVPTX device RTL.")
|
if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY)
|
||||||
set(CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
|
set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
|
||||||
|
libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")
|
||||||
|
endif()
|
||||||
|
set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
|
||||||
|
"List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
|
||||||
|
string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
|
||||||
|
|
||||||
|
foreach(sm ${nvptx_sm_list})
|
||||||
|
set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
# Activate RTL message dumps if requested by the user.
|
# Activate RTL message dumps if requested by the user.
|
||||||
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
|
set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
|
||||||
|
@ -152,46 +161,47 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||||
|
|
||||||
# Get the compute capability the user requested or use SM_35 by default.
|
# Get the compute capability the user requested or use SM_35 by default.
|
||||||
set(CUDA_ARCH "")
|
set(CUDA_ARCH "")
|
||||||
set(CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY})
|
foreach(sm ${nvptx_sm_list})
|
||||||
|
set(CUDA_ARCH --cuda-gpu-arch=sm_${sm})
|
||||||
|
|
||||||
# Compile cuda files to bitcode.
|
# Compile cuda files to bitcode.
|
||||||
set(bc_files "")
|
set(bc_files "")
|
||||||
foreach(src ${cuda_src_files})
|
foreach(src ${cuda_src_files})
|
||||||
get_filename_component(infile ${src} ABSOLUTE)
|
get_filename_component(infile ${src} ABSOLUTE)
|
||||||
get_filename_component(outfile ${src} NAME)
|
get_filename_component(outfile ${src} NAME)
|
||||||
|
|
||||||
add_custom_command(OUTPUT ${outfile}.bc
|
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
|
||||||
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
|
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
|
||||||
-c ${infile} -o ${outfile}.bc
|
-c ${infile} -o ${outfile}-sm_${sm}.bc
|
||||||
DEPENDS ${infile}
|
DEPENDS ${infile}
|
||||||
IMPLICIT_DEPENDS CXX ${infile}
|
IMPLICIT_DEPENDS CXX ${infile}
|
||||||
COMMENT "Building LLVM bitcode ${outfile}.bc"
|
COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
|
||||||
VERBATIM
|
VERBATIM
|
||||||
|
)
|
||||||
|
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)
|
||||||
|
|
||||||
|
list(APPEND bc_files ${outfile}-sm_${sm}.bc)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
# Link to a bitcode library.
|
||||||
|
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
|
||||||
|
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
|
||||||
|
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
|
||||||
|
DEPENDS ${bc_files}
|
||||||
|
COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
|
||||||
)
|
)
|
||||||
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}.bc)
|
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
|
||||||
|
|
||||||
list(APPEND bc_files ${outfile}.bc)
|
add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
|
||||||
|
|
||||||
|
# Copy library to destination.
|
||||||
|
add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
|
||||||
|
$<TARGET_FILE_DIR:omptarget-nvptx>)
|
||||||
|
|
||||||
|
# Install device RTL under the lib destination folder.
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib")
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
# Link to a bitcode library.
|
|
||||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
|
|
||||||
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
|
|
||||||
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc ${bc_files}
|
|
||||||
DEPENDS ${bc_files}
|
|
||||||
COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc"
|
|
||||||
)
|
|
||||||
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc)
|
|
||||||
|
|
||||||
add_custom_target(omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc)
|
|
||||||
|
|
||||||
# Copy library to destination.
|
|
||||||
add_custom_command(TARGET omptarget-nvptx-bc POST_BUILD
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc
|
|
||||||
$<TARGET_FILE_DIR:omptarget-nvptx>)
|
|
||||||
|
|
||||||
# Install device RTL under the lib destination folder.
|
|
||||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc DESTINATION "lib")
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue