forked from OSchip/llvm-project
[libomptarget] Build a minimal deviceRTL for amdgcn
Summary: [libomptarget] Build a minimal deviceRTL for amdgcn Repeat of D70414, with an include path fixed. Diff for sanity checking. The CMakeLists.txt file is functionally identical to the one used in the aomp fork. Whitespace changes were made based on nvptx/CMakeLists.txt, plus the copyright notice updated to match (Greg was the original author so would like his sign off on that here). This change will build a small subset of the deviceRTL if an appropriate toolchain is available, e.g. a local install of rocm. Support.h is moved from nvptx as a dependency of debug.h. Reviewers: ABataev, jdoerfert Reviewed By: ABataev Subscribers: jvesely, mgorny, jfb, openmp-commits, jdoerfert Tags: #openmp Differential Revision: https://reviews.llvm.org/D70971
This commit is contained in:
parent
9b962d83ec
commit
3ada8d2a87
|
@ -6,8 +6,9 @@
|
|||
#
|
||||
# ##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a device RTL for each available machine available.
|
||||
# Build a device RTL for each available machine.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
add_subdirectory(amdgcn)
|
||||
add_subdirectory(nvptx)
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build the AMDGCN Device RTL if the ROCM tools are available
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
find_package(LLVM QUIET CONFIG
|
||||
PATHS
|
||||
$ENV{AOMP}
|
||||
$ENV{HOME}/rocm/aomp
|
||||
/opt/rocm/aomp
|
||||
/usr/lib/rocm/aomp
|
||||
${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
|
||||
${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
|
||||
${CMAKE_CXX_COMPILER_DIR}
|
||||
NO_DEFAULT_PATH)
|
||||
|
||||
if (LLVM_DIR)
|
||||
libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
|
||||
else()
|
||||
libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})
|
||||
|
||||
if (AOMP_INSTALL_PREFIX)
|
||||
set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
|
||||
else()
|
||||
set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
|
||||
endif()
|
||||
|
||||
libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")
|
||||
|
||||
project(omptarget-amdgcn)
|
||||
|
||||
add_custom_target(omptarget-amdgcn ALL)
|
||||
|
||||
#optimization level
|
||||
set(optimization_level 2)
|
||||
|
||||
# Activate RTL message dumps if requested by the user.
|
||||
if(LIBOMPTARGET_NVPTX_DEBUG)
|
||||
set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
|
||||
endif()
|
||||
|
||||
get_filename_component(devicertl_base_directory
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DIRECTORY)
|
||||
|
||||
set(cuda_sources
|
||||
${devicertl_base_directory}/common/src/cancel.cu
|
||||
${devicertl_base_directory}/common/src/critical.cu)
|
||||
|
||||
set(h_files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
|
||||
${devicertl_base_directory}/common/debug.h
|
||||
${devicertl_base_directory}/common/device_environment.h
|
||||
${devicertl_base_directory}/common/state-queue.h
|
||||
${devicertl_base_directory}/common/state-queuei.h
|
||||
${devicertl_base_directory}/common/support.h)
|
||||
|
||||
# for both in-tree and out-of-tree build
|
||||
if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
|
||||
set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
else()
|
||||
set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
|
||||
endif()
|
||||
|
||||
# create libraries
|
||||
set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
|
||||
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
|
||||
set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
|
||||
endif()
|
||||
|
||||
macro(add_cuda_bc_library)
|
||||
set(cu_cmd ${AOMP_BINDIR}/clang++
|
||||
-std=c++11
|
||||
-fcuda-rdc
|
||||
-fvisibility=default
|
||||
--cuda-device-only
|
||||
-Wno-unused-value
|
||||
-x hip
|
||||
-O${optimization_level}
|
||||
--cuda-gpu-arch=${mcpu}
|
||||
${CUDA_DEBUG}
|
||||
-I${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
-I${devicertl_base_directory})
|
||||
|
||||
set(bc1_files)
|
||||
|
||||
foreach(file ${ARGN})
|
||||
get_filename_component(fname ${file} NAME_WE)
|
||||
set(bc1_filename ${fname}.${mcpu}.bc)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${bc1_filename}
|
||||
COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
|
||||
DEPENDS ${file} ${h_files})
|
||||
|
||||
list(APPEND bc1_files ${bc1_filename})
|
||||
endforeach()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT linkout.cuda.${mcpu}.bc
|
||||
COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
|
||||
DEPENDS ${bc1_files})
|
||||
|
||||
list(APPEND bc_files linkout.cuda.${mcpu}.bc)
|
||||
endmacro()
|
||||
|
||||
set(libname "omptarget-amdgcn")
|
||||
|
||||
foreach(mcpu ${mcpus})
|
||||
set(bc_files)
|
||||
add_cuda_bc_library(${cuda_sources})
|
||||
|
||||
set(bc_libname lib${libname}-${mcpu}.bc)
|
||||
add_custom_command(
|
||||
OUTPUT ${bc_libname}
|
||||
COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
|
||||
DEPENDS ${bc_files})
|
||||
|
||||
add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})
|
||||
|
||||
install(FILES ${OUTPUTDIR}/${bc_libname}
|
||||
DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
|
||||
)
|
||||
endforeach()
|
|
@ -72,8 +72,6 @@ EXTERN uint64_t __lanemask_lt();
|
|||
// thread's lane number in the warp
|
||||
EXTERN uint64_t __lanemask_gt();
|
||||
|
||||
EXTERN void llvm_amdgcn_s_barrier();
|
||||
|
||||
// CU id
|
||||
EXTERN unsigned __smid();
|
||||
|
||||
|
@ -101,25 +99,21 @@ INLINE uint32_t __kmpc_impl_smid() {
|
|||
return __smid();
|
||||
}
|
||||
|
||||
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }
|
||||
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
|
||||
|
||||
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }
|
||||
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
|
||||
|
||||
INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
|
||||
return __ballot64(1);
|
||||
}
|
||||
|
||||
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
int32_t SrcLane) {
|
||||
return __shfl(Var, SrcLane, WARPSIZE);
|
||||
}
|
||||
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
int32_t SrcLane);
|
||||
|
||||
INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
uint32_t Delta, int32_t Width) {
|
||||
return __shfl_down(Var, Delta, Width);
|
||||
}
|
||||
EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
|
||||
uint32_t Delta, int32_t Width);
|
||||
|
||||
INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }
|
||||
INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
|
||||
|
||||
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
|
||||
// we have protected the master warp from releasing from its barrier
|
||||
|
@ -128,4 +122,15 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
|
|||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
// DEVICE versions of part of libc
|
||||
extern "C" {
|
||||
DEVICE __attribute__((noreturn)) void
|
||||
__assertfail(const char *, const char *, unsigned, const char *, size_t);
|
||||
INLINE static void __assert_fail(const char *__message, const char *__file,
|
||||
unsigned int __line, const char *__function) {
|
||||
__assertfail(__message, __file, __line, __function, sizeof(char));
|
||||
}
|
||||
DEVICE int printf(const char *, ...);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#ifndef _OMPTARGET_NVPTX_DEBUG_H_
|
||||
#define _OMPTARGET_NVPTX_DEBUG_H_
|
||||
|
||||
#include "device_environment.h"
|
||||
#include "common/device_environment.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// set desired level of debugging
|
||||
|
@ -128,7 +128,7 @@
|
|||
|
||||
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
|
||||
#include <stdio.h>
|
||||
#include "support.h"
|
||||
#include "common/support.h"
|
||||
|
||||
template <typename... Arguments>
|
||||
NOINLINE static void log(const char *fmt, Arguments... parameters) {
|
||||
|
|
|
@ -19,6 +19,6 @@ struct omptarget_device_environmentTy {
|
|||
int32_t debug_level;
|
||||
};
|
||||
|
||||
extern __device__ omptarget_device_environmentTy omptarget_device_environment;
|
||||
extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
|
||||
|
||||
#endif
|
|
@ -11,7 +11,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "omptarget-nvptx.h"
|
||||
#include "device_environment.h"
|
||||
#include "common/device_environment.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// global device environment
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "common/debug.h" // debug
|
||||
#include "interface.h" // interfaces with omp, compiler, and user
|
||||
#include "common/state-queue.h"
|
||||
#include "support.h"
|
||||
#include "common/support.h"
|
||||
|
||||
#define OMPTARGET_NVPTX_VERSION 1.1
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "support.h"
|
||||
#include "common/support.h"
|
||||
#include "common/debug.h"
|
||||
#include "omptarget-nvptx.h"
|
||||
|
||||
|
|
Loading…
Reference in New Issue