[libomptarget-nvptx] Test bitcode compiler flags and enable by default

Move all logic related to selecting the bitcode compiler and linker
into a new file and dynamically test required compiler flags. This
also adds -fcuda-rdc for Clang trunk as previously attempted in D44992
which fixes the build.

As a result this change also enables building the library by default
if all prerequisites are met.

Differential Revision: https://reviews.llvm.org/D46901

llvm-svn: 332494
This commit is contained in:
Jonas Hahnfeld 2018-05-16 17:20:21 +00:00
parent b7972f88c7
commit 37bbe1a698
3 changed files with 183 additions and 104 deletions

View File

@ -257,9 +257,11 @@ Options for ``libomptarget``
Options for ``NVPTX device RTL``
--------------------------------
**LIBOMPTARGET_NVPTX_ENABLE_BCLIB** = ``OFF|ON``
**LIBOMPTARGET_NVPTX_ENABLE_BCLIB** = ``ON|OFF``
Enable CUDA LLVM bitcode offloading device RTL. This is used for link time
optimization of the OMP runtime and application code.
optimization of the OMP runtime and application code. This option is enabled
by default if the build system determines that `CMAKE_C_COMPILER` is able to
compile and link the library.
**LIBOMPTARGET_NVPTX_CUDA_COMPILER** = ``""``
Location of a CUDA compiler capable of emitting LLVM bitcode. Currently only

View File

@ -0,0 +1,112 @@
#
#//===----------------------------------------------------------------------===//
#//
#// The LLVM Compiler Infrastructure
#//
#// This file is dual licensed under the MIT and the University of Illinois Open
#// Source Licenses. See LICENSE.txt for details.
#//
#//===----------------------------------------------------------------------===//
#
# We use the compiler and linker provided by the user, attempt to use the one
# used to build libomptarget or just fail.
set(LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED FALSE)
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "")
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${LIBOMPTARGET_NVPTX_CUDA_COMPILER})
elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${CMAKE_C_COMPILER})
else()
return()
endif()
# Get compiler directory to try to locate a suitable linker.
get_filename_component(compiler_dir ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} DIRECTORY)
set(llvm_link "${compiler_dir}/llvm-link")
if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "")
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${LIBOMPTARGET_NVPTX_BC_LINKER})
elseif (EXISTS "${llvm_link}")
# Use llvm-link from the compiler directory.
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER "${llvm_link}")
else()
return()
endif()
function(try_compile_bitcode output source)
set(srcfile ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.cu)
file(WRITE ${srcfile} "${source}\n")
set(bcfile ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/out.bc)
# The remaining arguments are the flags to be tested.
# FIXME: Don't hardcode GPU version. This is currently required because
# Clang refuses to compile its default of sm_20 with CUDA 9.
execute_process(
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${ARGN}
--cuda-gpu-arch=sm_35 -c ${srcfile} -o ${bcfile}
RESULT_VARIABLE result
OUTPUT_QUIET ERROR_QUIET)
if (result EQUAL 0)
set(${output} TRUE PARENT_SCOPE)
else()
set(${output} FALSE PARENT_SCOPE)
endif()
endfunction()
# Save for which compiler we are going to do the following checks so that we
# can discard cached values if the user specifies a different value.
set(discard_cached FALSE)
if (DEFINED LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER AND
NOT("${LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER}" STREQUAL "${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER}"))
set(discard_cached TRUE)
endif()
set(LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER "${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER}" CACHE INTERNAL "" FORCE)
function(check_bitcode_compilation output source)
if (${discard_cached} OR NOT DEFINED ${output})
message(STATUS "Performing Test ${output}")
# Forward additional arguments which contain the flags.
try_compile_bitcode(result "${source}" ${ARGN})
set(${output} ${result} CACHE INTERNAL "" FORCE)
if(${result})
message(STATUS "Performing Test ${output} - Success")
else()
message(STATUS "Performing Test ${output} - Failed")
endif()
endif()
endfunction()
# These flags are required to emit LLVM Bitcode. We check them together because
# if any of them are not supported, there is no point in finding out which are.
set(compiler_flags_required -emit-llvm -O1 --cuda-device-only)
set(compiler_flags_required_src "extern \"C\" __device__ int thread() { return threadIdx.x; }")
check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FLAGS_REQUIRED "${compiler_flags_required_src}" ${compiler_flags_required})
# It makes no sense to continue given that the compiler doesn't support
# emitting basic LLVM Bitcode
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FLAGS_REQUIRED)
return()
endif()
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS ${compiler_flags_required})
# Declaring external shared device variables might need an additional flag
# since Clang 7.0 and was entirely unsupported since version 4.0.
set(extern_device_shared_src "extern __device__ __shared__ int test;")
check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_EXTERN_SHARED "${extern_device_shared_src}" ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS})
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_EXTERN_SHARED)
set(compiler_flag_fcuda_rdc -fcuda-rdc)
set(compiler_flag_fcuda_rdc_full ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS} ${compiler_flag_fcuda_rdc})
check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FCUDA_RDC "${extern_device_shared_src}" ${compiler_flag_fcuda_rdc_full})
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FCUDA_RDC)
return()
endif()
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS "${compiler_flag_fcuda_rdc_full}")
endif()
# We can compile LLVM Bitcode from CUDA source code!
set(LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED TRUE)

View File

@ -93,122 +93,87 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
target_link_libraries(omptarget-nvptx ${CUDA_LIBRARIES})
# Check if we can create an LLVM bitcode implementation of the runtime library
# that could be inlined in the user implementation.
set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB FALSE CACHE BOOL
# that could be inlined in the user application. For that we need to find
# a Clang compiler capable of compiling our CUDA files to LLVM bitcode and
# an LLVM linker.
set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING
"Location of a CUDA compiler capable of emitting LLVM bitcode.")
set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING
"Location of a linker capable of linking LLVM bitcode objects.")
include(LibomptargetNVPTXBitcodeLibrary)
set(bclib_default FALSE)
if (${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
set(bclib_default TRUE)
endif()
set(LIBOMPTARGET_NVPTX_ENABLE_BCLIB ${bclib_default} CACHE BOOL
"Enable CUDA LLVM bitcode offloading device RTL.")
if (${LIBOMPTARGET_NVPTX_ENABLE_BCLIB})
if (NOT ${LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED})
libomptarget_error_say("Cannot build CUDA LLVM bitcode offloading device RTL!")
endif()
libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
# Find a clang compiler capable of compiling cuda files to LLVM bitcode and
# an LLVM linker.
# We use the one provided by the user, attempt to use the one used to build
# libomptarget or just fail.
set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING
"Location of a CUDA compiler capable of emitting LLVM bitcode.")
set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING
"Location of a linker capable of linking LLVM bitcode objects.")
if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "")
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${LIBOMPTARGET_NVPTX_CUDA_COMPILER})
elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${CMAKE_C_COMPILER})
# Set flags for LLVM Bitcode compilation.
set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS} -DOMPTARGET_NVPTX_TEST=0)
if(${LIBOMPTARGET_NVPTX_DEBUG})
set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=-1)
else()
libomptarget_error_say("Cannot find a CUDA compiler capable of emitting LLVM bitcode.")
libomptarget_error_say("Please configure with flag -DLIBOMPTARGET_NVPTX_CUDA_COMPILER")
set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=0)
endif()
# Get compiler directory to try to locate a suitable linker
get_filename_component(COMPILER_DIR ${CMAKE_C_COMPILER} DIRECTORY)
if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "")
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${LIBOMPTARGET_NVPTX_BC_LINKER})
elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND EXISTS "${COMPILER_DIR}/llvm-link")
# Use llvm-link from the directory containing clang
set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${COMPILER_DIR}/llvm-link)
else()
libomptarget_error_say("Cannot find a linker capable of linking LLVM bitcode objects.")
libomptarget_error_say("Please configure with flag -DLIBOMPTARGET_NVPTX_BC_LINKER")
# CUDA 9 header files use the nv_weak attribute which clang is not yet prepared
# to handle. Therefore, we use 'weak' instead. We are compiling only for the
# device, so it should be equivalent.
if(CUDA_VERSION_MAJOR GREATER 8)
set(bc_flags ${bc_flags} -Dnv_weak=weak)
endif()
if(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER AND LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER)
libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
# Generate a Bitcode library for all the compute capabilities the user requested.
foreach(sm ${nvptx_sm_list})
set(cuda_arch --cuda-gpu-arch=sm_${sm})
# Decide which ptx version to use. Same choices as Clang.
if(CUDA_VERSION_MAJOR GREATER 9 OR CUDA_VERSION_MAJOR EQUAL 9)
set(CUDA_PTX_VERSION ptx60)
else()
set(CUDA_PTX_VERSION ptx42)
endif()
set(BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=0)
if(${LIBOMPTARGET_NVPTX_DEBUG})
set(BC_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
endif()
# Compile CUDA files to bitcode.
set(bc_files "")
foreach(src ${cuda_src_files})
get_filename_component(infile ${src} ABSOLUTE)
get_filename_component(outfile ${src} NAME)
# Set flags for Clang cuda compilation. Only Clang is supported because there is
# no other compiler capable of generating bitcode from cuda sources.
set(CUDA_FLAGS
-emit-llvm
-O1
-Xclang -target-feature
-Xclang +${CUDA_PTX_VERSION}
--cuda-device-only
-DOMPTARGET_NVPTX_TEST=0
${BC_DEBUG}
)
# CUDA 9 header files use the nv_weak attribute which clang is not yet prepared
# to handle. Therefore, we use 'weak' instead. We are compiling only for the
# device, so it should be equivalent.
if(CUDA_VERSION_MAJOR EQUAL 9)
set(CUDA_FLAGS ${CUDA_FLAGS} -Dnv_weak=weak)
endif()
# Get the compute capability the user requested or use SM_35 by default.
set(CUDA_ARCH "")
foreach(sm ${nvptx_sm_list})
set(CUDA_ARCH --cuda-gpu-arch=sm_${sm})
# Compile cuda files to bitcode.
set(bc_files "")
foreach(src ${cuda_src_files})
get_filename_component(infile ${src} ABSOLUTE)
get_filename_component(outfile ${src} NAME)
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES}
-c ${infile} -o ${outfile}-sm_${sm}.bc
DEPENDS ${infile}
IMPLICIT_DEPENDS CXX ${infile}
COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
VERBATIM
)
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)
list(APPEND bc_files ${outfile}-sm_${sm}.bc)
endforeach()
# Link to a bitcode library.
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
DEPENDS ${bc_files}
COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} ${cuda_arch}
-c ${infile} -o ${outfile}-sm_${sm}.bc
DEPENDS ${infile}
IMPLICIT_DEPENDS CXX ${infile}
COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc"
VERBATIM
)
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc)
add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
# Copy library to destination.
add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
$<TARGET_FILE_DIR:omptarget-nvptx>)
# Install device RTL under the lib destination folder.
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib")
list(APPEND bc_files ${outfile}-sm_${sm}.bc)
endforeach()
endif()
# Link to a bitcode library.
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER}
-o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files}
DEPENDS ${bc_files}
COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc"
)
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc)
add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc)
# Copy library to destination.
add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc
$<TARGET_FILE_DIR:omptarget-nvptx>)
# Install device RTL under the lib destination folder.
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib")
endforeach()
endif()
else()