forked from OSchip/llvm-project
[Libomptarget] Do not use retaining attributes for the static library
When we build the libomptarget device runtime library targeting bitcode, we need special care to make sure that certain functions are not optimized out. This is because we manually internalize and optimize these definitions, ignoring their standard linkage semantics. When we build with the static library, we can maintain these semantics and we do not need these to be kept-alive. Furthermore, if they are kept-alive it prevents them from being removed during LTO. This prevents us from completely internalizing `IsSPMDMode` and removing several other functions. This patch removes these for the static library target by using a macro definition to enable them. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D126701
This commit is contained in:
parent
a083f3caa1
commit
421b1f55c6
|
@ -234,12 +234,12 @@ endfunction()
|
|||
# Generate a Bitcode library for all the compute capabilities the user requested
|
||||
add_custom_target(omptarget.devicertl.nvptx)
|
||||
foreach(sm ${nvptx_sm_list})
|
||||
compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0")
|
||||
compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0")
|
||||
endforeach()
|
||||
|
||||
add_custom_target(omptarget.devicertl.amdgpu)
|
||||
foreach(mcpu ${amdgpu_mcpus})
|
||||
compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -nogpulib)
|
||||
compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib)
|
||||
endforeach()
|
||||
|
||||
set(LIBOMPTARGET_LLVM_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
|
||||
|
|
|
@ -209,6 +209,13 @@ enum OMPTgtExecModeFlags : int8_t {
|
|||
#define CONSTANT(NAME) \
|
||||
NAME [[clang::loader_uninitialized, clang::address_space(4)]]
|
||||
|
||||
// Attribute to keep alive certain definition for the bitcode library.
|
||||
#ifdef LIBOMPTARGET_BC_TARGET
|
||||
#define KEEP_ALIVE __attribute__((used, retain))
|
||||
#else
|
||||
#define KEEP_ALIVE
|
||||
#endif
|
||||
|
||||
///}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -276,7 +276,7 @@ uint32_t mapping::getNumberOfProcessorElements() {
|
|||
|
||||
// TODO: This is a workaround for initialization coming from kernels outside of
|
||||
// the TU. We will need to solve this more correctly in the future.
|
||||
int __attribute__((used, retain, weak)) SHARED(IsSPMDMode);
|
||||
int __attribute__((weak)) KEEP_ALIVE SHARED(IsSPMDMode);
|
||||
|
||||
void mapping::init(bool IsSPMD) {
|
||||
if (mapping::isInitialThreadInLevel0(IsSPMD))
|
||||
|
|
|
@ -21,7 +21,7 @@ using namespace _OMP;
|
|||
|
||||
namespace _OMP {
|
||||
/// Helper to keep code alive without introducing a performance penalty.
|
||||
__attribute__((used, retain, weak, optnone, cold)) void keepAlive() {
|
||||
__attribute__((weak, optnone, cold)) KEEP_ALIVE void keepAlive() {
|
||||
__kmpc_get_hardware_thread_id_in_block();
|
||||
__kmpc_get_hardware_num_threads_in_block();
|
||||
__kmpc_get_warp_size();
|
||||
|
|
Loading…
Reference in New Issue