From e82e07d74a7d9f262aa85fa4aebf99b5a62f386c Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 21 Jul 2022 14:12:01 -0400 Subject: [PATCH] [Libomptarget] Build the DeviceRTL BC using clang directly Currently the bitcode library is build using the clang front-end manually. This was originally done because we did not support device only compilation. Now we support device only compilation, at least for a single offloading toolchain, so we can instead use clang directly rather than using the front-end. This saves us needing to define things like `aux_triple`. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D130293 --- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 31 +++++--------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 3ac3f59e8678..0c7400604df4 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -43,21 +43,6 @@ else() return() endif() -# TODO: This part needs to be refined when libomptarget is going to support -# Windows! -# TODO: This part can also be removed if we can change the clang driver to make -# it support device only compilation. -if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - set(aux_triple x86_64-unknown-linux-gnu) -elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "ppc64le") - set(aux_triple powerpc64le-unknown-linux-gnu) -elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") - set(aux_triple aarch64-unknown-linux-gnu) -else() - libomptarget_say("Not building DeviceRTL: unknown host arch: ${CMAKE_HOST_SYSTEM_PROCESSOR}") - return() -endif() - set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) set(include_directory ${devicertl_base_directory}/include) set(source_directory ${devicertl_base_directory}/src) @@ -91,7 +76,6 @@ if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) endif() - # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_DEVICE_DEBUG FALSE CACHE BOOL "Activate DeviceRTL debug messages.") @@ -130,11 +114,10 @@ set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS} list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I") # Set flags for LLVM Bitcode compilation. -set(bc_flags -S -x c++ -std=c++17 -fvisibility=hidden - ${clang_opt_flags} - -Xclang -emit-llvm-bc - -Xclang -aux-triple -Xclang ${aux_triple} - -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device +set(bc_flags -c -emit-llvm -std=c++17 -fvisibility=hidden + ${clang_opt_flags} --offload-device-only + -nocudalib -nogpulib -nostdinc + -fopenmp -fopenmp-cuda-mode -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} @@ -158,7 +141,7 @@ function(compileDeviceRTLLibrary target_cpu target_name) add_custom_command(OUTPUT ${outfile} COMMAND ${CLANG_TOOL} ${bc_flags} - -Xclang -target-cpu -Xclang ${target_cpu} + --offload-arch=${target_cpu} ${target_bc_flags} ${infile} -o ${outfile} DEPENDS ${infile} ${include_files} @@ -229,12 +212,12 @@ endfunction() # Generate a Bitcode library for all the compute capabilities the user requested add_custom_target(omptarget.devicertl.nvptx) foreach(sm ${nvptx_sm_list}) - compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0") + compileDeviceRTLLibrary(sm_${sm} nvptx -fopenmp-targets=nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET --cuda-feature=+ptx61) endforeach() add_custom_target(omptarget.devicertl.amdgpu) foreach(mcpu ${amdgpu_mcpus}) - compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib) + compileDeviceRTLLibrary(${mcpu} amdgpu -fopenmp-targets=amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib) endforeach() # Set the flags to build the device runtime from clang.