forked from OSchip/llvm-project
[OpenMP] Add time profiling for libomptarget
Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93055
This commit is contained in:
parent
7698a01808
commit
e007b32864
|
@ -82,6 +82,8 @@ endif()
|
|||
|
||||
option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
|
||||
${ENABLE_LIBOMPTARGET})
|
||||
option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
|
||||
${ENABLE_LIBOMPTARGET})
|
||||
if (OPENMP_ENABLE_LIBOMPTARGET)
|
||||
# Check that the library can actually be built.
|
||||
if (APPLE OR WIN32)
|
||||
|
|
|
@ -21,11 +21,23 @@ set(src_files
|
|||
omptarget.cpp
|
||||
)
|
||||
|
||||
# Build libomptarget library with libdl dependency.
|
||||
add_library(omptarget SHARED ${src_files})
|
||||
target_link_libraries(omptarget
|
||||
${CMAKE_DL_LIBS}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports")
|
||||
# Build libomptarget library with libdl dependency. Add LLVMSupport
|
||||
# dependency if building in-tree with profiling enabled.
|
||||
if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING))
|
||||
add_library(omptarget SHARED ${src_files})
|
||||
target_link_libraries(omptarget
|
||||
${CMAKE_DL_LIBS}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports")
|
||||
else()
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Support
|
||||
)
|
||||
add_llvm_library(omptarget SHARED ${src_files}
|
||||
LINK_LIBS ${CMAKE_DL_LIBS}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports"
|
||||
)
|
||||
target_compile_definitions(omptarget PUBLIC OMPTARGET_PROFILE_ENABLED)
|
||||
endif()
|
||||
|
||||
# Install libomptarget under the lib destination folder.
|
||||
install(TARGETS omptarget LIBRARY COMPONENT omptarget
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <cstdlib>
|
||||
|
||||
EXTERN int omp_get_num_devices(void) {
|
||||
TIMESCOPE();
|
||||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
|
@ -29,12 +30,14 @@ EXTERN int omp_get_num_devices(void) {
|
|||
}
|
||||
|
||||
EXTERN int omp_get_initial_device(void) {
|
||||
TIMESCOPE();
|
||||
int hostDevice = omp_get_num_devices();
|
||||
DP("Call to omp_get_initial_device returning %d\n", hostDevice);
|
||||
return hostDevice;
|
||||
}
|
||||
|
||||
EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_alloc for device %d requesting %zu bytes\n",
|
||||
device_num, size);
|
||||
|
||||
|
@ -62,6 +65,7 @@ EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
|||
}
|
||||
|
||||
EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_free for device %d and address " DPxMOD "\n",
|
||||
device_num, DPxPTR(device_ptr));
|
||||
|
||||
|
@ -86,6 +90,7 @@ EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
|||
}
|
||||
|
||||
EXTERN int omp_target_is_present(void *ptr, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
|
||||
device_num, DPxPTR(ptr));
|
||||
|
||||
|
@ -125,6 +130,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
|
|||
|
||||
EXTERN int omp_target_memcpy(void *dst, void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset, int dst_device, int src_device) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_memcpy, dst device %d, src device %d, "
|
||||
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
|
||||
"src offset %zu, length %zu\n", dst_device, src_device, DPxPTR(dst),
|
||||
|
@ -190,6 +196,7 @@ EXTERN int omp_target_memcpy_rect(void *dst, void *src, size_t element_size,
|
|||
int num_dims, const size_t *volume, const size_t *dst_offsets,
|
||||
const size_t *src_offsets, const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions, int dst_device, int src_device) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
|
||||
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
|
||||
"src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
|
||||
|
@ -244,6 +251,7 @@ EXTERN int omp_target_memcpy_rect(void *dst, void *src, size_t element_size,
|
|||
|
||||
EXTERN int omp_target_associate_ptr(void *host_ptr, void *device_ptr,
|
||||
size_t size, size_t device_offset, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", "
|
||||
"device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n",
|
||||
DPxPTR(host_ptr), DPxPTR(device_ptr), size, device_offset, device_num);
|
||||
|
@ -271,6 +279,7 @@ EXTERN int omp_target_associate_ptr(void *host_ptr, void *device_ptr,
|
|||
}
|
||||
|
||||
EXTERN int omp_target_disassociate_ptr(void *host_ptr, int device_num) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", "
|
||||
"device_num %d\n", DPxPTR(host_ptr), device_num);
|
||||
|
||||
|
|
|
@ -81,18 +81,21 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds requires flags
|
||||
EXTERN void __tgt_register_requires(int64_t flags) {
|
||||
TIMESCOPE();
|
||||
PM->RTLs.RegisterRequires(flags);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds a target shared library to the target execution image
|
||||
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
|
||||
TIMESCOPE();
|
||||
PM->RTLs.RegisterLib(desc);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// unloads a target shared library
|
||||
EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
|
||||
TIMESCOPE();
|
||||
PM->RTLs.UnregisterLib(desc);
|
||||
}
|
||||
|
||||
|
@ -101,6 +104,7 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
|
|||
/// and passes the data to the device.
|
||||
EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
}
|
||||
|
@ -109,6 +113,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
|
|||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
|
||||
|
||||
|
@ -122,6 +127,7 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
|
|||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled()) return;
|
||||
|
||||
DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
|
||||
|
@ -160,6 +166,7 @@ EXTERN void __tgt_target_data_begin_nowait_mapper(
|
|||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
|
||||
|
||||
|
@ -172,6 +179,7 @@ EXTERN void __tgt_target_data_begin_nowait_mapper(
|
|||
/// created by the last __tgt_target_data_begin.
|
||||
EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
}
|
||||
|
@ -180,6 +188,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
|
|||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
|
||||
|
||||
|
@ -193,6 +202,7 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
|
|||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled()) return;
|
||||
DP("Entering data end region with %d mappings\n", arg_num);
|
||||
|
||||
|
@ -236,6 +246,7 @@ EXTERN void __tgt_target_data_end_nowait_mapper(
|
|||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
|
||||
|
||||
|
@ -245,6 +256,7 @@ EXTERN void __tgt_target_data_end_nowait_mapper(
|
|||
|
||||
EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
}
|
||||
|
@ -253,6 +265,7 @@ EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
|
|||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
|
||||
|
||||
|
@ -266,6 +279,7 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
|
|||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled()) return;
|
||||
DP("Entering data update with %d mappings\n", arg_num);
|
||||
|
||||
|
@ -291,6 +305,7 @@ EXTERN void __tgt_target_data_update_nowait_mapper(
|
|||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
|
||||
|
||||
|
@ -300,6 +315,7 @@ EXTERN void __tgt_target_data_update_nowait_mapper(
|
|||
|
||||
EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
|
||||
TIMESCOPE();
|
||||
return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
|
||||
args, arg_sizes, arg_types, nullptr, nullptr);
|
||||
}
|
||||
|
@ -308,6 +324,7 @@ EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
|
|||
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
|
||||
|
||||
|
@ -319,6 +336,7 @@ EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
|||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
|
||||
DP("Entering target region with entry point " DPxMOD " and device Id %"
|
||||
PRId64 "\n", DPxPTR(host_ptr), device_id);
|
||||
|
@ -353,6 +371,7 @@ EXTERN int __tgt_target_nowait_mapper(
|
|||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
|
||||
|
||||
|
@ -363,6 +382,7 @@ EXTERN int __tgt_target_nowait_mapper(
|
|||
EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
|
||||
TIMESCOPE();
|
||||
return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
|
||||
args_base, args, arg_sizes, arg_types,
|
||||
nullptr, nullptr, team_num, thread_limit);
|
||||
|
@ -372,6 +392,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
|
|||
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
|
||||
|
||||
|
@ -387,6 +408,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
|
|||
map_var_info_t *arg_names,
|
||||
void **arg_mappers, int32_t team_num,
|
||||
int32_t thread_limit) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
|
||||
DP("Entering target region with entry point " DPxMOD " and device Id %"
|
||||
PRId64 "\n", DPxPTR(host_ptr), device_id);
|
||||
|
@ -424,6 +446,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
|
|||
map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
|
||||
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE();
|
||||
if (depNum + noAliasDepNum > 0)
|
||||
__kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
|
||||
|
||||
|
@ -434,6 +457,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
|
|||
|
||||
// Get the current number of components for a user-defined mapper.
|
||||
EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
|
||||
TIMESCOPE();
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
|
||||
int64_t size = MapperComponentsPtr->Components.size();
|
||||
DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
|
||||
|
@ -445,6 +469,7 @@ EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
|
|||
EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
|
||||
void *begin, int64_t size,
|
||||
int64_t type) {
|
||||
TIMESCOPE();
|
||||
DP("__tgt_push_mapper_component(Handle=" DPxMOD
|
||||
") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ").\n",
|
||||
|
@ -456,6 +481,7 @@ EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
|
|||
|
||||
EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
|
||||
uint64_t loop_tripcount) {
|
||||
TIMESCOPE();
|
||||
if (IsOffloadDisabled())
|
||||
return;
|
||||
|
||||
|
|
|
@ -107,4 +107,11 @@ static inline void dumpTargetPointerMappings(const DeviceTy &Device) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef OMPTARGET_PROFILE_ENABLED
|
||||
#include "llvm/Support/TimeProfiler.h"
|
||||
#define TIMESCOPE() llvm::TimeTraceScope TimeScope(__FUNCTION__)
|
||||
#else
|
||||
#define TIMESCOPE()
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,14 +33,35 @@ static const char *RTLNames[] = {
|
|||
|
||||
PluginManager *PM;
|
||||
|
||||
#if OMPTARGET_PROFILE_ENABLED
|
||||
static char *ProfileTraceFile = nullptr;
|
||||
#endif
|
||||
|
||||
__attribute__((constructor(101))) void init() {
|
||||
DP("Init target library!\n");
|
||||
PM = new PluginManager();
|
||||
|
||||
#ifdef OMPTARGET_PROFILE_ENABLED
|
||||
ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
|
||||
// TODO: add a configuration option for time granularity
|
||||
if (ProfileTraceFile)
|
||||
llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");
|
||||
#endif
|
||||
}
|
||||
|
||||
__attribute__((destructor(101))) void deinit() {
|
||||
DP("Deinit target library!\n");
|
||||
delete PM;
|
||||
|
||||
#ifdef OMPTARGET_PROFILE_ENABLED
|
||||
if (ProfileTraceFile) {
|
||||
// TODO: add env var for file output
|
||||
if (auto E = llvm::timeTraceProfilerWrite(ProfileTraceFile, "-"))
|
||||
fprintf(stderr, "Error writing out the time trace\n");
|
||||
|
||||
llvm::timeTraceProfilerCleanup();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void RTLsTy::LoadRTLs() {
|
||||
|
|
Loading…
Reference in New Issue