forked from OSchip/llvm-project
[Libomptarget][NFC] Make Libomptarget use the LLVM naming convention
Libomptarget grew out of a project that was originally not in LLVM. As we develop libomptarget this has led to an increasingly large clash between the naming conventions used. This patch fixes most of the variable names that did not confrom to the LLVM standard, that is `VariableName` for variables and `functionName` for functions. This patch was primarily done using my editor's linting messages, if there are any issues I missed arising from the automation let me know. Reviewed By: saiislam Differential Revision: https://reviews.llvm.org/D128997
This commit is contained in:
parent
3121167488
commit
d27d0a673c
|
@ -247,17 +247,17 @@ struct HostDataToTargetMapKeyTy {
|
|||
: KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
|
||||
HostDataToTargetTy *HDTT;
|
||||
};
|
||||
inline bool operator<(const HostDataToTargetMapKeyTy &lhs,
|
||||
const uintptr_t &rhs) {
|
||||
return lhs.KeyValue < rhs;
|
||||
inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
|
||||
const uintptr_t &RHS) {
|
||||
return LHS.KeyValue < RHS;
|
||||
}
|
||||
inline bool operator<(const uintptr_t &lhs,
|
||||
const HostDataToTargetMapKeyTy &rhs) {
|
||||
return lhs < rhs.KeyValue;
|
||||
inline bool operator<(const uintptr_t &LHS,
|
||||
const HostDataToTargetMapKeyTy &RHS) {
|
||||
return LHS < RHS.KeyValue;
|
||||
}
|
||||
inline bool operator<(const HostDataToTargetMapKeyTy &lhs,
|
||||
const HostDataToTargetMapKeyTy &rhs) {
|
||||
return lhs.KeyValue < rhs.KeyValue;
|
||||
inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
|
||||
const HostDataToTargetMapKeyTy &RHS) {
|
||||
return LHS.KeyValue < RHS.KeyValue;
|
||||
}
|
||||
|
||||
struct LookupResult {
|
||||
|
@ -395,7 +395,7 @@ struct DeviceTy {
|
|||
|
||||
// calls to RTL
|
||||
int32_t initOnce();
|
||||
__tgt_target_table *load_binary(void *Img);
|
||||
__tgt_target_table *loadBinary(void *Img);
|
||||
|
||||
// device memory allocation/deallocation routines
|
||||
/// Allocates \p Size bytes on the device, host or shared memory space
|
||||
|
@ -469,7 +469,7 @@ private:
|
|||
void deinit();
|
||||
};
|
||||
|
||||
extern bool device_is_ready(int device_num);
|
||||
extern bool deviceIsReady(int DeviceNum);
|
||||
|
||||
/// Struct for the data required to handle plugins
|
||||
struct PluginManager {
|
||||
|
|
|
@ -204,164 +204,161 @@ extern "C" {
|
|||
int omp_get_num_devices(void);
|
||||
int omp_get_device_num(void);
|
||||
int omp_get_initial_device(void);
|
||||
void *omp_target_alloc(size_t size, int device_num);
|
||||
void omp_target_free(void *device_ptr, int device_num);
|
||||
int omp_target_is_present(const void *ptr, int device_num);
|
||||
int omp_target_memcpy(void *dst, const void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset, int dst_device,
|
||||
int src_device);
|
||||
int omp_target_memcpy_rect(void *dst, const void *src, size_t element_size,
|
||||
int num_dims, const size_t *volume,
|
||||
const size_t *dst_offsets, const size_t *src_offsets,
|
||||
const size_t *dst_dimensions,
|
||||
const size_t *src_dimensions, int dst_device,
|
||||
int src_device);
|
||||
int omp_target_associate_ptr(const void *host_ptr, const void *device_ptr,
|
||||
size_t size, size_t device_offset, int device_num);
|
||||
int omp_target_disassociate_ptr(const void *host_ptr, int device_num);
|
||||
void *omp_target_alloc(size_t Size, int DeviceNum);
|
||||
void omp_target_free(void *DevicePtr, int DeviceNum);
|
||||
int omp_target_is_present(const void *Ptr, int DeviceNum);
|
||||
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
|
||||
size_t DstOffset, size_t SrcOffset, int DstDevice,
|
||||
int SrcDevice);
|
||||
int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
|
||||
int NumDims, const size_t *Volume,
|
||||
const size_t *DstOffsets, const size_t *SrcOffsets,
|
||||
const size_t *DstDimensions,
|
||||
const size_t *SrcDimensions, int DstDevice,
|
||||
int SrcDevice);
|
||||
int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
|
||||
size_t Size, size_t DeviceOffset, int DeviceNum);
|
||||
int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
|
||||
|
||||
/// Explicit target memory allocators
|
||||
/// Using the llvm_ prefix until they become part of the OpenMP standard.
|
||||
void *llvm_omp_target_alloc_device(size_t size, int device_num);
|
||||
void *llvm_omp_target_alloc_host(size_t size, int device_num);
|
||||
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
|
||||
void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
|
||||
void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum);
|
||||
void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
|
||||
|
||||
/// Dummy target so we have a symbol for generating host fallback.
|
||||
void *llvm_omp_target_dynamic_shared_alloc();
|
||||
|
||||
/// add the clauses of the requires directives in a given file
|
||||
void __tgt_register_requires(int64_t flags);
|
||||
void __tgt_register_requires(int64_t Flags);
|
||||
|
||||
/// adds a target shared library to the target execution image
|
||||
void __tgt_register_lib(__tgt_bin_desc *desc);
|
||||
void __tgt_register_lib(__tgt_bin_desc *Desc);
|
||||
|
||||
/// Initialize all RTLs at once
|
||||
void __tgt_init_all_rtls();
|
||||
|
||||
/// removes a target shared library from the target execution image
|
||||
void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
||||
void __tgt_unregister_lib(__tgt_bin_desc *Desc);
|
||||
|
||||
// creates the host to target data mapping, stores it in the
|
||||
// libomptarget.so internal structure (an entry in a stack of data maps) and
|
||||
// passes the data to the device;
|
||||
void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types);
|
||||
void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum,
|
||||
void *noAliasDepList);
|
||||
void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers);
|
||||
void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
|
||||
void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers);
|
||||
void __tgt_target_data_begin_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
|
||||
// passes data from the target, release target memory and destroys the
|
||||
// host-target mapping (top entry from the stack of data maps) created by
|
||||
// the last __tgt_target_data_begin
|
||||
void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types);
|
||||
void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList);
|
||||
void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, map_var_info_t *arg_names,
|
||||
void **arg_mappers);
|
||||
void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
|
||||
void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList);
|
||||
void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers);
|
||||
void __tgt_target_data_end_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
|
||||
/// passes data to/from the target
|
||||
void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types);
|
||||
void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum,
|
||||
void *noAliasDepList);
|
||||
void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers);
|
||||
void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes);
|
||||
void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames,
|
||||
void **ArgMappers);
|
||||
void __tgt_target_data_update_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
|
||||
// Performs the same actions as data_begin in case arg_num is non-zero
|
||||
// and initiates run of offloaded region on target platform; if arg_num
|
||||
// Performs the same actions as data_begin in case ArgNum is non-zero
|
||||
// and initiates run of offloaded region on target platform; if ArgNum
|
||||
// is non-zero after the region execution is done it also performs the
|
||||
// same action as data_end above. The following types are used; this
|
||||
// function returns 0 if it was able to transfer the execution to a
|
||||
// target and an int different from zero otherwise.
|
||||
int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types);
|
||||
int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList);
|
||||
int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers);
|
||||
int __tgt_target_nowait_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList);
|
||||
int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes);
|
||||
int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList);
|
||||
int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers);
|
||||
int __tgt_target_nowait_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList);
|
||||
|
||||
int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t num_teams,
|
||||
int32_t thread_limit);
|
||||
int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t num_teams, int32_t thread_limit,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList);
|
||||
int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers,
|
||||
int32_t num_teams, int32_t thread_limit);
|
||||
int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t NumTeams,
|
||||
int32_t ThreadLimit);
|
||||
int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t NumTeams,
|
||||
int32_t ThreadLimit, int32_t DepNum,
|
||||
void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
int32_t NumTeams, int32_t ThreadLimit);
|
||||
int __tgt_target_teams_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams,
|
||||
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList);
|
||||
ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers, int32_t NumTeams,
|
||||
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList);
|
||||
|
||||
void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
|
||||
void __kmpc_push_target_tripcount(int64_t DeviceId, uint64_t LoopTripcount);
|
||||
|
||||
void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
|
||||
uint64_t loop_tripcount);
|
||||
void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
uint64_t LoopTripcount);
|
||||
|
||||
void __tgt_set_info_flag(uint32_t);
|
||||
|
||||
int __tgt_print_device_info(int64_t device_id);
|
||||
int __tgt_print_device_info(int64_t DeviceId);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -131,12 +131,12 @@ int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
|
|||
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
||||
ptrdiff_t *Offsets, int32_t NumArgs,
|
||||
int32_t NumTeams, int32_t ThreadLimit,
|
||||
uint64_t loop_tripcount);
|
||||
uint64_t LoopTripcount);
|
||||
|
||||
// Asynchronous version of __tgt_rtl_run_target_team_region
|
||||
int32_t __tgt_rtl_run_target_team_region_async(
|
||||
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
|
||||
int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
|
||||
int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripcount,
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
// Device synchronization. In case of success, return zero. Otherwise, return an
|
||||
|
|
|
@ -116,7 +116,7 @@ struct RTLInfoTy {
|
|||
release_async_info_ty *release_async_info = nullptr;
|
||||
|
||||
// Are there images associated with this RTL.
|
||||
bool isUsed = false;
|
||||
bool IsUsed = false;
|
||||
|
||||
// Mutex for thread-safety when calling RTL interface functions.
|
||||
// It is easier to enforce thread-safety at the libomptarget level,
|
||||
|
@ -138,7 +138,7 @@ struct RTLsTy {
|
|||
explicit RTLsTy() = default;
|
||||
|
||||
// Register the clauses of the requires directive.
|
||||
void RegisterRequires(int64_t flags);
|
||||
void registerRequires(int64_t Flags);
|
||||
|
||||
// Initialize RTL if it has not been initialized
|
||||
void initRTLonce(RTLInfoTy &RTL);
|
||||
|
@ -147,15 +147,15 @@ struct RTLsTy {
|
|||
void initAllRTLs();
|
||||
|
||||
// Register a shared library with all (compatible) RTLs.
|
||||
void RegisterLib(__tgt_bin_desc *desc);
|
||||
void registerLib(__tgt_bin_desc *Desc);
|
||||
|
||||
// Unregister a shared library from all RTLs.
|
||||
void UnregisterLib(__tgt_bin_desc *desc);
|
||||
void unregisterLib(__tgt_bin_desc *Desc);
|
||||
|
||||
// Mutex-like object to guarantee thread-safety and unique initialization
|
||||
// (i.e. the library attempts to load the RTLs (plugins) only once).
|
||||
std::once_flag initFlag;
|
||||
void LoadRTLs(); // not thread-safe
|
||||
std::once_flag InitFlag;
|
||||
void loadRTLs(); // not thread-safe
|
||||
};
|
||||
|
||||
/// Map between the host entry begin and the translation table. Each
|
||||
|
@ -179,8 +179,8 @@ struct TableMap {
|
|||
TranslationTable *Table = nullptr; // table associated with the host ptr.
|
||||
uint32_t Index = 0; // index in which the host ptr translated entry is found.
|
||||
TableMap() = default;
|
||||
TableMap(TranslationTable *table, uint32_t index)
|
||||
: Table(table), Index(index) {}
|
||||
TableMap(TranslationTable *Table, uint32_t Index)
|
||||
: Table(Table), Index(Index) {}
|
||||
};
|
||||
typedef std::map<void *, TableMap> HostPtrToTableMapTy;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -67,22 +67,22 @@ static int32_t withBytesAsElf(char *BytesBegin, char *BytesEnd, F Callback) {
|
|||
}
|
||||
|
||||
// Check whether an image is valid for execution on target_id
|
||||
int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id) {
|
||||
auto CheckMachine = [target_id](const ELFObjectFileBase *Object) {
|
||||
return target_id == Object->getEMachine();
|
||||
int32_t elf_check_machine(__tgt_device_image *Image, uint16_t TargetId) {
|
||||
auto CheckMachine = [TargetId](const ELFObjectFileBase *Object) {
|
||||
return TargetId == Object->getEMachine();
|
||||
};
|
||||
return withBytesAsElf(reinterpret_cast<char *>(image->ImageStart),
|
||||
reinterpret_cast<char *>(image->ImageEnd),
|
||||
return withBytesAsElf(reinterpret_cast<char *>(Image->ImageStart),
|
||||
reinterpret_cast<char *>(Image->ImageEnd),
|
||||
CheckMachine);
|
||||
}
|
||||
|
||||
int32_t elf_is_dynamic(__tgt_device_image *image) {
|
||||
int32_t elf_is_dynamic(__tgt_device_image *Image) {
|
||||
auto CheckDynType = [](const ELFObjectFileBase *Object) {
|
||||
uint16_t Type = Object->getEType();
|
||||
DP("ELF Type: %d\n", Type);
|
||||
return Type == ET_DYN;
|
||||
};
|
||||
return withBytesAsElf(reinterpret_cast<char *>(image->ImageStart),
|
||||
reinterpret_cast<char *>(image->ImageEnd),
|
||||
return withBytesAsElf(reinterpret_cast<char *>(Image->ImageStart),
|
||||
reinterpret_cast<char *>(Image->ImageEnd),
|
||||
CheckDynType);
|
||||
}
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
|
||||
/// Return non-zero, if the given \p image is an ELF object, which
|
||||
/// e_machine matches \p target_id; return zero otherwise.
|
||||
EXTERN int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id);
|
||||
EXTERN int32_t elf_check_machine(__tgt_device_image *Image, uint16_t TargetId);
|
||||
|
||||
/// Return non-zero, if the given \p image is an ET_DYN ELF object;
|
||||
/// return zero otherwise.
|
||||
EXTERN int32_t elf_is_dynamic(__tgt_device_image *image);
|
||||
EXTERN int32_t elf_is_dynamic(__tgt_device_image *Image);
|
||||
|
||||
#endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_COMMON_H
|
||||
|
|
|
@ -86,8 +86,8 @@ struct KernelTy {
|
|||
/// Maximal number of threads per block for this kernel.
|
||||
int MaxThreadsPerBlock = 0;
|
||||
|
||||
KernelTy(CUfunction _Func, llvm::omp::OMPTgtExecModeFlags _ExecutionMode)
|
||||
: Func(_Func), ExecutionMode(_ExecutionMode) {}
|
||||
KernelTy(CUfunction Func, llvm::omp::OMPTgtExecModeFlags ExecutionMode)
|
||||
: Func(Func), ExecutionMode(ExecutionMode) {}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
@ -437,9 +437,9 @@ class DeviceRTLTy {
|
|||
bool UseMemoryManager = true;
|
||||
|
||||
// Record entry point associated with device
|
||||
void addOffloadEntry(const int DeviceId, const __tgt_offload_entry entry) {
|
||||
void addOffloadEntry(const int DeviceId, const __tgt_offload_entry Entry) {
|
||||
FuncOrGblEntryTy &E = DeviceData[DeviceId].FuncGblEntries.back();
|
||||
E.Entries.push_back(entry);
|
||||
E.Entries.push_back(Entry);
|
||||
}
|
||||
|
||||
// Return a pointer to the entry associated with the pointer
|
||||
|
@ -1255,19 +1255,19 @@ public:
|
|||
return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
void printDeviceInfo(int32_t device_id) {
|
||||
void printDeviceInfo(int32_t DeviceId) {
|
||||
char TmpChar[1000];
|
||||
std::string TmpStr;
|
||||
size_t TmpSt;
|
||||
int TmpInt, TmpInt2, TmpInt3;
|
||||
|
||||
CUdevice Device;
|
||||
checkResult(cuDeviceGet(&Device, device_id),
|
||||
checkResult(cuDeviceGet(&Device, DeviceId),
|
||||
"Error returned from cuCtxGetDevice\n");
|
||||
|
||||
cuDriverGetVersion(&TmpInt);
|
||||
printf(" CUDA Driver Version: \t\t%d \n", TmpInt);
|
||||
printf(" CUDA Device Number: \t\t%d \n", device_id);
|
||||
printf(" CUDA Device Number: \t\t%d \n", DeviceId);
|
||||
checkResult(cuDeviceGetName(TmpChar, 1000, Device),
|
||||
"Error returned from cuDeviceGetName\n");
|
||||
printf(" Device Name: \t\t\t%s \n", TmpChar);
|
||||
|
@ -1515,8 +1515,8 @@ DeviceRTLTy DeviceRTL;
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
|
||||
return elf_check_machine(image, /* EM_CUDA */ 190);
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
|
||||
return elf_check_machine(Image, /* EM_CUDA */ 190);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }
|
||||
|
@ -1527,211 +1527,204 @@ int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
|
|||
return RequiresFlags;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_is_data_exchangable(int32_t src_dev_id, int dst_dev_id) {
|
||||
if (DeviceRTL.isValidDeviceId(src_dev_id) &&
|
||||
DeviceRTL.isValidDeviceId(dst_dev_id))
|
||||
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int DstDevId) {
|
||||
if (DeviceRTL.isValidDeviceId(SrcDevId) &&
|
||||
DeviceRTL.isValidDeviceId(DstDevId))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_init_device(int32_t device_id) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_init_device(int32_t DeviceId) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set when init the device.
|
||||
|
||||
return DeviceRTL.initDevice(device_id);
|
||||
return DeviceRTL.initDevice(DeviceId);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_deinit_device(int32_t device_id) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_deinit_device(int32_t DeviceId) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set when deinit the device.
|
||||
|
||||
return DeviceRTL.deinitDevice(device_id);
|
||||
return DeviceRTL.deinitDevice(DeviceId);
|
||||
}
|
||||
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
||||
__tgt_device_image *image) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
||||
__tgt_device_image *Image) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return nullptr;
|
||||
|
||||
return DeviceRTL.loadBinary(device_id, image);
|
||||
return DeviceRTL.loadBinary(DeviceId, Image);
|
||||
}
|
||||
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *,
|
||||
int32_t kind) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *,
|
||||
int32_t Kind) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return nullptr;
|
||||
|
||||
return DeviceRTL.dataAlloc(device_id, size, (TargetAllocTy)kind);
|
||||
return DeviceRTL.dataAlloc(DeviceId, Size, (TargetAllocTy)Kind);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int64_t size) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
int64_t Size) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set in __tgt_rtl_data_submit_async.
|
||||
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr,
|
||||
size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
const int32_t Rc =
|
||||
__tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, &AsyncInfo);
|
||||
if (Rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
return __tgt_rtl_synchronize(DeviceId, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr,
|
||||
void *hst_ptr, int64_t size,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
|
||||
void *HstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfoPtr && "async_info_ptr is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.dataSubmit(device_id, tgt_ptr, hst_ptr, size,
|
||||
async_info_ptr);
|
||||
return DeviceRTL.dataSubmit(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
||||
int64_t size) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
int64_t Size) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set in __tgt_rtl_data_retrieve_async.
|
||||
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr,
|
||||
size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
const int32_t Rc =
|
||||
__tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, &AsyncInfo);
|
||||
if (Rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
return __tgt_rtl_synchronize(DeviceId, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
|
||||
void *tgt_ptr, int64_t size,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
|
||||
void *TgtPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfoPtr && "async_info_ptr is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.dataRetrieve(device_id, hst_ptr, tgt_ptr, size,
|
||||
async_info_ptr);
|
||||
return DeviceRTL.dataRetrieve(DeviceId, HstPtr, TgtPtr, Size, AsyncInfoPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_exchange_async(int32_t src_dev_id, void *src_ptr,
|
||||
int dst_dev_id, void *dst_ptr,
|
||||
int64_t size,
|
||||
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr,
|
||||
int DstDevId, void *DstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(SrcDevId) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(DstDevId) && "dst_dev_id is invalid");
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(src_dev_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(SrcDevId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
|
||||
return DeviceRTL.dataExchange(SrcDevId, SrcPtr, DstDevId, DstPtr, Size,
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr,
|
||||
int32_t dst_dev_id, void *dst_ptr,
|
||||
int64_t size) {
|
||||
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
|
||||
int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr,
|
||||
int32_t DstDevId, void *DstPtr, int64_t Size) {
|
||||
assert(DeviceRTL.isValidDeviceId(SrcDevId) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(DstDevId) && "dst_dev_id is invalid");
|
||||
// Context is set in __tgt_rtl_data_exchange_async.
|
||||
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_exchange_async(
|
||||
src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
const int32_t Rc = __tgt_rtl_data_exchange_async(SrcDevId, SrcPtr, DstDevId,
|
||||
DstPtr, Size, &AsyncInfo);
|
||||
if (Rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(src_dev_id, &AsyncInfo);
|
||||
return __tgt_rtl_synchronize(SrcDevId, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.dataDelete(device_id, tgt_ptr);
|
||||
return DeviceRTL.dataDelete(DeviceId, TgtPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
void **tgt_args,
|
||||
ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num, int32_t team_num,
|
||||
int32_t thread_limit,
|
||||
uint64_t loop_tripcount) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum,
|
||||
int32_t ThreadLimit,
|
||||
uint64_t LoopTripcount) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set in __tgt_rtl_run_target_team_region_async.
|
||||
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_run_target_team_region_async(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
|
||||
thread_limit, loop_tripcount, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
const int32_t Rc = __tgt_rtl_run_target_team_region_async(
|
||||
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
|
||||
LoopTripcount, &AsyncInfo);
|
||||
if (Rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
return __tgt_rtl_synchronize(DeviceId, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_team_region_async(
|
||||
int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
|
||||
ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
|
||||
int32_t thread_limit, uint64_t loop_tripcount,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
|
||||
uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.runTargetTeamRegion(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
|
||||
thread_limit, loop_tripcount, async_info_ptr);
|
||||
return DeviceRTL.runTargetTeamRegion(DeviceId, TgtEntryPtr, TgtArgs,
|
||||
TgtOffsets, ArgNum, TeamNum, ThreadLimit,
|
||||
LoopTripcount, AsyncInfoPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
void **tgt_args, ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set in __tgt_rtl_run_target_region_async.
|
||||
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_run_target_region_async(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
const int32_t Rc = __tgt_rtl_run_target_region_async(
|
||||
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, &AsyncInfo);
|
||||
if (Rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
return __tgt_rtl_synchronize(DeviceId, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
|
||||
void *tgt_entry_ptr, void **tgt_args,
|
||||
ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// Context is set in __tgt_rtl_run_target_team_region_async.
|
||||
return __tgt_rtl_run_target_team_region_async(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num,
|
||||
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum,
|
||||
/* team num*/ 1, /* thread_limit */ 1, /* loop_tripcount */ 0,
|
||||
async_info_ptr);
|
||||
AsyncInfoPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_synchronize(int32_t device_id,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
|
||||
int32_t __tgt_rtl_synchronize(int32_t DeviceId,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfoPtr && "async_info_ptr is nullptr");
|
||||
assert(AsyncInfoPtr->Queue && "async_info_ptr->Queue is nullptr");
|
||||
// NOTE: We don't need to set context for stream sync.
|
||||
return DeviceRTL.synchronize(device_id, async_info_ptr);
|
||||
return DeviceRTL.synchronize(DeviceId, AsyncInfoPtr);
|
||||
}
|
||||
|
||||
void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
|
||||
|
@ -1739,89 +1732,88 @@ void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
|
|||
InfoLevel.store(NewInfoLevel);
|
||||
}
|
||||
|
||||
void __tgt_rtl_print_device_info(int32_t device_id) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
void __tgt_rtl_print_device_info(int32_t DeviceId) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
// NOTE: We don't need to set context for print device info.
|
||||
DeviceRTL.printDeviceInfo(device_id);
|
||||
DeviceRTL.printDeviceInfo(DeviceId);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
|
||||
assert(event && "event is nullptr");
|
||||
int32_t __tgt_rtl_create_event(int32_t DeviceId, void **Event) {
|
||||
assert(Event && "event is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.createEvent(device_id, event);
|
||||
return DeviceRTL.createEvent(DeviceId, Event);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
|
||||
assert(event_ptr && "event_ptr is nullptr");
|
||||
int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(AsyncInfoPtr && "async_info_ptr is nullptr");
|
||||
assert(AsyncInfoPtr->Queue && "async_info_ptr->Queue is nullptr");
|
||||
assert(EventPtr && "event_ptr is nullptr");
|
||||
// NOTE: We might not need to set context for event record.
|
||||
return recordEvent(event_ptr, async_info_ptr);
|
||||
return recordEvent(EventPtr, AsyncInfoPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
assert(event_ptr && "event is nullptr");
|
||||
int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfoPtr && "async_info_ptr is nullptr");
|
||||
assert(EventPtr && "event is nullptr");
|
||||
// If we don't have a queue we need to set the context.
|
||||
if (!async_info_ptr->Queue &&
|
||||
DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (!AsyncInfoPtr->Queue && DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr);
|
||||
return DeviceRTL.waitEvent(DeviceId, AsyncInfoPtr, EventPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) {
|
||||
assert(event_ptr && "event is nullptr");
|
||||
int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) {
|
||||
assert(EventPtr && "event is nullptr");
|
||||
// NOTE: We might not need to set context for event sync.
|
||||
return syncEvent(event_ptr);
|
||||
return syncEvent(EventPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
|
||||
assert(event_ptr && "event is nullptr");
|
||||
int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) {
|
||||
assert(EventPtr && "event is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.destroyEvent(device_id, event_ptr);
|
||||
return DeviceRTL.destroyEvent(DeviceId, EventPtr);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_release_async_info(int32_t device_id,
|
||||
__tgt_async_info *async_info) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info && "async_info is nullptr");
|
||||
int32_t __tgt_rtl_release_async_info(int32_t DeviceId,
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfo && "async_info is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.releaseAsyncInfo(device_id, async_info);
|
||||
return DeviceRTL.releaseAsyncInfo(DeviceId, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_init_async_info(int32_t device_id,
|
||||
__tgt_async_info **async_info) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(async_info && "async_info is nullptr");
|
||||
int32_t __tgt_rtl_init_async_info(int32_t DeviceId,
|
||||
__tgt_async_info **AsyncInfo) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(AsyncInfo && "async_info is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.initAsyncInfo(device_id, async_info);
|
||||
return DeviceRTL.initAsyncInfo(DeviceId, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_init_device_info(int32_t device_id,
|
||||
__tgt_device_info *device_info_ptr,
|
||||
const char **err_str) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
assert(device_info_ptr && "device_info_ptr is nullptr");
|
||||
int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
|
||||
__tgt_device_info *DeviceInfoPtr,
|
||||
const char **ErrStr) {
|
||||
assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
|
||||
assert(DeviceInfoPtr && "device_info_ptr is nullptr");
|
||||
|
||||
if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS)
|
||||
if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return DeviceRTL.initDeviceInfo(device_id, device_info_ptr, err_str);
|
||||
return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -58,26 +58,27 @@ public:
|
|||
std::list<DynLibTy> DynLibs;
|
||||
|
||||
// Record entry point associated with device.
|
||||
void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin,
|
||||
__tgt_offload_entry *end) {
|
||||
assert(device_id < (int32_t)FuncGblEntries.size() &&
|
||||
void createOffloadTable(int32_t DeviceId, __tgt_offload_entry *Begin,
|
||||
__tgt_offload_entry *End) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncGblEntries[device_id].emplace_back();
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
|
||||
FuncGblEntries[DeviceId].emplace_back();
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
E.Table.EntriesBegin = begin;
|
||||
E.Table.EntriesEnd = end;
|
||||
E.Table.EntriesBegin = Begin;
|
||||
E.Table.EntriesEnd = End;
|
||||
}
|
||||
|
||||
// Return true if the entry is associated with device.
|
||||
bool findOffloadEntry(int32_t device_id, void *addr) {
|
||||
assert(device_id < (int32_t)FuncGblEntries.size() &&
|
||||
bool findOffloadEntry(int32_t DeviceId, void *Addr) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd;
|
||||
i < e; ++i) {
|
||||
if (i->addr == addr)
|
||||
for (__tgt_offload_entry *I = E.Table.EntriesBegin,
|
||||
*End = E.Table.EntriesEnd;
|
||||
I < End; ++I) {
|
||||
if (I->addr == Addr)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -85,22 +86,22 @@ public:
|
|||
}
|
||||
|
||||
// Return the pointer to the target entries table.
|
||||
__tgt_target_table *getOffloadEntriesTable(int32_t device_id) {
|
||||
assert(device_id < (int32_t)FuncGblEntries.size() &&
|
||||
__tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[device_id].back();
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
return &E.Table;
|
||||
}
|
||||
|
||||
RTLDeviceInfoTy(int32_t num_devices) { FuncGblEntries.resize(num_devices); }
|
||||
RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
|
||||
|
||||
~RTLDeviceInfoTy() {
|
||||
// Close dynamic libraries
|
||||
for (auto &lib : DynLibs) {
|
||||
if (lib.Handle) {
|
||||
dlclose(lib.Handle);
|
||||
remove(lib.FileName.c_str());
|
||||
for (auto &Lib : DynLibs) {
|
||||
if (Lib.Handle) {
|
||||
dlclose(Lib.Handle);
|
||||
remove(Lib.FileName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -112,29 +113,29 @@ static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
|
||||
// If we don't have a valid ELF ID we can just fail.
|
||||
#if TARGET_ELF_ID < 1
|
||||
return 0;
|
||||
#else
|
||||
return elf_check_machine(image, TARGET_ELF_ID);
|
||||
return elf_check_machine(Image, TARGET_ELF_ID);
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
|
||||
|
||||
int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; }
|
||||
int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
|
||||
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
||||
__tgt_device_image *image) {
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
||||
__tgt_device_image *Image) {
|
||||
|
||||
DP("Dev %d: load binary from " DPxMOD " image\n", device_id,
|
||||
DPxPTR(image->ImageStart));
|
||||
DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
|
||||
DPxPTR(Image->ImageStart));
|
||||
|
||||
assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id");
|
||||
assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
|
||||
|
||||
size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart;
|
||||
size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin);
|
||||
size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
|
||||
size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin);
|
||||
DP("Expecting to have %zd entries defined.\n", NumEntries);
|
||||
|
||||
// Is the library version incompatible with the header file?
|
||||
|
@ -144,47 +145,47 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
|||
}
|
||||
|
||||
// Obtain elf handler
|
||||
Elf *e = elf_memory((char *)image->ImageStart, ImageSize);
|
||||
if (!e) {
|
||||
Elf *E = elf_memory((char *)Image->ImageStart, ImageSize);
|
||||
if (!E) {
|
||||
DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (elf_kind(e) != ELF_K_ELF) {
|
||||
if (elf_kind(E) != ELF_K_ELF) {
|
||||
DP("Invalid Elf kind!\n");
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Find the entries section offset
|
||||
Elf_Scn *section = 0;
|
||||
Elf64_Off entries_offset = 0;
|
||||
Elf_Scn *Section = 0;
|
||||
Elf64_Off EntriesOffset = 0;
|
||||
|
||||
size_t shstrndx;
|
||||
size_t Shstrndx;
|
||||
|
||||
if (elf_getshdrstrndx(e, &shstrndx)) {
|
||||
if (elf_getshdrstrndx(E, &Shstrndx)) {
|
||||
DP("Unable to get ELF strings index!\n");
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
while ((section = elf_nextscn(e, section))) {
|
||||
GElf_Shdr hdr;
|
||||
gelf_getshdr(section, &hdr);
|
||||
while ((Section = elf_nextscn(E, Section))) {
|
||||
GElf_Shdr Hdr;
|
||||
gelf_getshdr(Section, &Hdr);
|
||||
|
||||
if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) {
|
||||
entries_offset = hdr.sh_addr;
|
||||
if (!strcmp(elf_strptr(E, Shstrndx, Hdr.sh_name), OFFLOADSECTIONNAME)) {
|
||||
EntriesOffset = Hdr.sh_addr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!entries_offset) {
|
||||
if (!EntriesOffset) {
|
||||
DP("Entries Section Offset Not Found\n");
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset));
|
||||
DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(EntriesOffset));
|
||||
|
||||
// load dynamic library and get the entry points. We use the dl library
|
||||
// to do the loading of the library, but we could do it directly to avoid the
|
||||
|
@ -192,148 +193,147 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
|
|||
//
|
||||
// 1) Create tmp file with the library contents.
|
||||
// 2) Use dlopen to load the file and dlsym to retrieve the symbols.
|
||||
char tmp_name[] = "/tmp/tmpfile_XXXXXX";
|
||||
int tmp_fd = mkstemp(tmp_name);
|
||||
char TmpName[] = "/tmp/tmpfile_XXXXXX";
|
||||
int TmpFd = mkstemp(TmpName);
|
||||
|
||||
if (tmp_fd == -1) {
|
||||
elf_end(e);
|
||||
if (TmpFd == -1) {
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FILE *ftmp = fdopen(tmp_fd, "wb");
|
||||
FILE *Ftmp = fdopen(TmpFd, "wb");
|
||||
|
||||
if (!ftmp) {
|
||||
elf_end(e);
|
||||
if (!Ftmp) {
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fwrite(image->ImageStart, ImageSize, 1, ftmp);
|
||||
fclose(ftmp);
|
||||
fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
|
||||
fclose(Ftmp);
|
||||
|
||||
DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)};
|
||||
DynLibTy Lib = {TmpName, dlopen(TmpName, RTLD_LAZY)};
|
||||
|
||||
if (!Lib.Handle) {
|
||||
DP("Target library loading error: %s\n", dlerror());
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DeviceInfo.DynLibs.push_back(Lib);
|
||||
|
||||
struct link_map *libInfo = (struct link_map *)Lib.Handle;
|
||||
struct link_map *LibInfo = (struct link_map *)Lib.Handle;
|
||||
|
||||
// The place where the entries info is loaded is the library base address
|
||||
// plus the offset determined from the ELF file.
|
||||
Elf64_Addr entries_addr = libInfo->l_addr + entries_offset;
|
||||
Elf64_Addr EntriesAddr = LibInfo->l_addr + EntriesOffset;
|
||||
|
||||
DP("Pointer to first entry to be loaded is (" DPxMOD ").\n",
|
||||
DPxPTR(entries_addr));
|
||||
DPxPTR(EntriesAddr));
|
||||
|
||||
// Table of pointers to all the entries in the target.
|
||||
__tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr;
|
||||
__tgt_offload_entry *EntriesTable = (__tgt_offload_entry *)EntriesAddr;
|
||||
|
||||
__tgt_offload_entry *entries_begin = &entries_table[0];
|
||||
__tgt_offload_entry *entries_end = entries_begin + NumEntries;
|
||||
__tgt_offload_entry *EntriesBegin = &EntriesTable[0];
|
||||
__tgt_offload_entry *EntriesEnd = EntriesBegin + NumEntries;
|
||||
|
||||
if (!entries_begin) {
|
||||
if (!EntriesBegin) {
|
||||
DP("Can't obtain entries begin\n");
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n",
|
||||
DPxPTR(entries_begin), DPxPTR(entries_end));
|
||||
DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end);
|
||||
DPxPTR(EntriesBegin), DPxPTR(EntriesEnd));
|
||||
DeviceInfo.createOffloadTable(DeviceId, EntriesBegin, EntriesEnd);
|
||||
|
||||
elf_end(e);
|
||||
elf_end(E);
|
||||
|
||||
return DeviceInfo.getOffloadEntriesTable(device_id);
|
||||
return DeviceInfo.getOffloadEntriesTable(DeviceId);
|
||||
}
|
||||
|
||||
void __tgt_rtl_print_device_info(int32_t device_id) {
|
||||
void __tgt_rtl_print_device_info(int32_t DeviceId) {
|
||||
printf(" This is a generic-elf-64bit device\n");
|
||||
}
|
||||
|
||||
// Sample implementation of explicit memory allocator. For this plugin all kinds
|
||||
// are equivalent to each other.
|
||||
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr,
|
||||
int32_t kind) {
|
||||
void *ptr = NULL;
|
||||
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
|
||||
int32_t Kind) {
|
||||
void *Ptr = NULL;
|
||||
|
||||
switch (kind) {
|
||||
switch (Kind) {
|
||||
case TARGET_ALLOC_DEVICE:
|
||||
case TARGET_ALLOC_HOST:
|
||||
case TARGET_ALLOC_SHARED:
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
ptr = malloc(size);
|
||||
Ptr = malloc(Size);
|
||||
break;
|
||||
default:
|
||||
REPORT("Invalid target data allocation kind");
|
||||
}
|
||||
|
||||
return ptr;
|
||||
return Ptr;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int64_t size) {
|
||||
memcpy(tgt_ptr, hst_ptr, size);
|
||||
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
int64_t Size) {
|
||||
memcpy(TgtPtr, HstPtr, Size);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
||||
int64_t size) {
|
||||
memcpy(hst_ptr, tgt_ptr, size);
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
int64_t Size) {
|
||||
memcpy(HstPtr, TgtPtr, Size);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
||||
free(tgt_ptr);
|
||||
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
|
||||
free(TgtPtr);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
void **tgt_args,
|
||||
ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num, int32_t team_num,
|
||||
int32_t thread_limit,
|
||||
uint64_t loop_tripcount /*not used*/) {
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum,
|
||||
int32_t ThreadLimit,
|
||||
uint64_t LoopTripcount /*not used*/) {
|
||||
// ignore team num and thread limit.
|
||||
|
||||
// Use libffi to launch execution.
|
||||
ffi_cif cif;
|
||||
ffi_cif Cif;
|
||||
|
||||
// All args are references.
|
||||
std::vector<ffi_type *> args_types(arg_num, &ffi_type_pointer);
|
||||
std::vector<void *> args(arg_num);
|
||||
std::vector<void *> ptrs(arg_num);
|
||||
std::vector<ffi_type *> ArgsTypes(ArgNum, &ffi_type_pointer);
|
||||
std::vector<void *> Args(ArgNum);
|
||||
std::vector<void *> Ptrs(ArgNum);
|
||||
|
||||
for (int32_t i = 0; i < arg_num; ++i) {
|
||||
ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]);
|
||||
args[i] = &ptrs[i];
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
|
||||
Args[I] = &Ptrs[I];
|
||||
}
|
||||
|
||||
ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num,
|
||||
&ffi_type_void, &args_types[0]);
|
||||
ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, ArgNum,
|
||||
&ffi_type_void, &ArgsTypes[0]);
|
||||
|
||||
assert(status == FFI_OK && "Unable to prepare target launch!");
|
||||
assert(Status == FFI_OK && "Unable to prepare target launch!");
|
||||
|
||||
if (status != FFI_OK)
|
||||
if (Status != FFI_OK)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr));
|
||||
DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
|
||||
|
||||
void (*entry)(void);
|
||||
*((void **)&entry) = tgt_entry_ptr;
|
||||
ffi_call(&cif, entry, NULL, &args[0]);
|
||||
void (*Entry)(void);
|
||||
*((void **)&Entry) = TgtEntryPtr;
|
||||
ffi_call(&Cif, Entry, NULL, &Args[0]);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
void **tgt_args, ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num) {
|
||||
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum) {
|
||||
// use one team and one thread.
|
||||
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
|
||||
tgt_offsets, arg_num, 1, 1, 0);
|
||||
return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs,
|
||||
TgtOffsets, ArgNum, 1, 1, 0);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -41,66 +41,66 @@ EXTERN int omp_get_device_num(void) {
|
|||
|
||||
EXTERN int omp_get_initial_device(void) {
|
||||
TIMESCOPE();
|
||||
int hostDevice = omp_get_num_devices();
|
||||
DP("Call to omp_get_initial_device returning %d\n", hostDevice);
|
||||
return hostDevice;
|
||||
int HostDevice = omp_get_num_devices();
|
||||
DP("Call to omp_get_initial_device returning %d\n", HostDevice);
|
||||
return HostDevice;
|
||||
}
|
||||
|
||||
EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEFAULT, __func__);
|
||||
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
|
||||
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
|
||||
}
|
||||
|
||||
EXTERN void *llvm_omp_target_alloc_device(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEVICE, __func__);
|
||||
EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) {
|
||||
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
|
||||
}
|
||||
|
||||
EXTERN void *llvm_omp_target_alloc_host(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_HOST, __func__);
|
||||
EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) {
|
||||
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__);
|
||||
}
|
||||
|
||||
EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
|
||||
return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
|
||||
EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) {
|
||||
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__);
|
||||
}
|
||||
|
||||
EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
|
||||
EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
|
||||
|
||||
EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
||||
EXTERN void omp_target_free(void *DevicePtr, int DeviceNum) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_free for device %d and address " DPxMOD "\n",
|
||||
device_num, DPxPTR(device_ptr));
|
||||
DeviceNum, DPxPTR(DevicePtr));
|
||||
|
||||
if (!device_ptr) {
|
||||
if (!DevicePtr) {
|
||||
DP("Call to omp_target_free with NULL ptr\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
free(device_ptr);
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
free(DevicePtr);
|
||||
DP("omp_target_free deallocated host ptr\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
if (!deviceIsReady(DeviceNum)) {
|
||||
DP("omp_target_free returns, nothing to do\n");
|
||||
return;
|
||||
}
|
||||
|
||||
PM->Devices[device_num]->deleteData(device_ptr);
|
||||
PM->Devices[DeviceNum]->deleteData(DevicePtr);
|
||||
DP("omp_target_free deallocated device ptr\n");
|
||||
}
|
||||
|
||||
EXTERN int omp_target_is_present(const void *ptr, int device_num) {
|
||||
EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
|
||||
device_num, DPxPTR(ptr));
|
||||
DeviceNum, DPxPTR(Ptr));
|
||||
|
||||
if (!ptr) {
|
||||
if (!Ptr) {
|
||||
DP("Call to omp_target_is_present with NULL ptr, returning false\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
DP("Call to omp_target_is_present on host, returning true\n");
|
||||
return true;
|
||||
}
|
||||
|
@ -108,13 +108,13 @@ EXTERN int omp_target_is_present(const void *ptr, int device_num) {
|
|||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
if (DevicesSize <= (size_t)device_num) {
|
||||
if (DevicesSize <= (size_t)DeviceNum) {
|
||||
DP("Call to omp_target_is_present with invalid device ID, returning "
|
||||
"false\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_num];
|
||||
DeviceTy &Device = *PM->Devices[DeviceNum];
|
||||
bool IsLast; // not used
|
||||
bool IsHostPtr;
|
||||
// omp_target_is_present tests whether a host pointer refers to storage that
|
||||
|
@ -122,32 +122,32 @@ EXTERN int omp_target_is_present(const void *ptr, int device_num) {
|
|||
// only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
|
||||
// lengh array) is mapped instead of the referred storage.
|
||||
TargetPointerResultTy TPR =
|
||||
Device.getTgtPtrBegin(const_cast<void *>(ptr), 1, IsLast,
|
||||
Device.getTgtPtrBegin(const_cast<void *>(Ptr), 1, IsLast,
|
||||
/*UpdateRefCount=*/false,
|
||||
/*UseHoldRefCount=*/false, IsHostPtr);
|
||||
int rc = (TPR.TargetPointer != NULL);
|
||||
int Rc = (TPR.TargetPointer != NULL);
|
||||
// Under unified memory the host pointer can be returned by the
|
||||
// getTgtPtrBegin() function which means that there is no device
|
||||
// corresponding point for ptr. This function should return false
|
||||
// in that situation.
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
rc = !IsHostPtr;
|
||||
DP("Call to omp_target_is_present returns %d\n", rc);
|
||||
return rc;
|
||||
Rc = !IsHostPtr;
|
||||
DP("Call to omp_target_is_present returns %d\n", Rc);
|
||||
return Rc;
|
||||
}
|
||||
|
||||
EXTERN int omp_target_memcpy(void *dst, const void *src, size_t length,
|
||||
size_t dst_offset, size_t src_offset,
|
||||
int dst_device, int src_device) {
|
||||
EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
|
||||
size_t DstOffset, size_t SrcOffset, int DstDevice,
|
||||
int SrcDevice) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_memcpy, dst device %d, src device %d, "
|
||||
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
|
||||
"src offset %zu, length %zu\n",
|
||||
dst_device, src_device, DPxPTR(dst), DPxPTR(src), dst_offset, src_offset,
|
||||
length);
|
||||
DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
|
||||
Length);
|
||||
|
||||
if (!dst || !src || length <= 0) {
|
||||
if (length == 0) {
|
||||
if (!Dst || !Src || Length <= 0) {
|
||||
if (Length == 0) {
|
||||
DP("Call to omp_target_memcpy with zero length, nothing to do\n");
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
@ -156,180 +156,180 @@ EXTERN int omp_target_memcpy(void *dst, const void *src, size_t length,
|
|||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (src_device != omp_get_initial_device() && !device_is_ready(src_device)) {
|
||||
if (SrcDevice != omp_get_initial_device() && !deviceIsReady(SrcDevice)) {
|
||||
REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (dst_device != omp_get_initial_device() && !device_is_ready(dst_device)) {
|
||||
if (DstDevice != omp_get_initial_device() && !deviceIsReady(DstDevice)) {
|
||||
REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
int rc = OFFLOAD_SUCCESS;
|
||||
void *srcAddr = (char *)const_cast<void *>(src) + src_offset;
|
||||
void *dstAddr = (char *)dst + dst_offset;
|
||||
int Rc = OFFLOAD_SUCCESS;
|
||||
void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset;
|
||||
void *DstAddr = (char *)Dst + DstOffset;
|
||||
|
||||
if (src_device == omp_get_initial_device() &&
|
||||
dst_device == omp_get_initial_device()) {
|
||||
if (SrcDevice == omp_get_initial_device() &&
|
||||
DstDevice == omp_get_initial_device()) {
|
||||
DP("copy from host to host\n");
|
||||
const void *p = memcpy(dstAddr, srcAddr, length);
|
||||
if (p == NULL)
|
||||
rc = OFFLOAD_FAIL;
|
||||
} else if (src_device == omp_get_initial_device()) {
|
||||
const void *P = memcpy(DstAddr, SrcAddr, Length);
|
||||
if (P == NULL)
|
||||
Rc = OFFLOAD_FAIL;
|
||||
} else if (SrcDevice == omp_get_initial_device()) {
|
||||
DP("copy from host to device\n");
|
||||
DeviceTy &DstDev = *PM->Devices[dst_device];
|
||||
DeviceTy &DstDev = *PM->Devices[DstDevice];
|
||||
AsyncInfoTy AsyncInfo(DstDev);
|
||||
rc = DstDev.submitData(dstAddr, srcAddr, length, AsyncInfo);
|
||||
} else if (dst_device == omp_get_initial_device()) {
|
||||
Rc = DstDev.submitData(DstAddr, SrcAddr, Length, AsyncInfo);
|
||||
} else if (DstDevice == omp_get_initial_device()) {
|
||||
DP("copy from device to host\n");
|
||||
DeviceTy &SrcDev = *PM->Devices[src_device];
|
||||
DeviceTy &SrcDev = *PM->Devices[SrcDevice];
|
||||
AsyncInfoTy AsyncInfo(SrcDev);
|
||||
rc = SrcDev.retrieveData(dstAddr, srcAddr, length, AsyncInfo);
|
||||
Rc = SrcDev.retrieveData(DstAddr, SrcAddr, Length, AsyncInfo);
|
||||
} else {
|
||||
DP("copy from device to device\n");
|
||||
DeviceTy &SrcDev = *PM->Devices[src_device];
|
||||
DeviceTy &DstDev = *PM->Devices[dst_device];
|
||||
DeviceTy &SrcDev = *PM->Devices[SrcDevice];
|
||||
DeviceTy &DstDev = *PM->Devices[DstDevice];
|
||||
// First try to use D2D memcpy which is more efficient. If fails, fall back
|
||||
// to unefficient way.
|
||||
if (SrcDev.isDataExchangable(DstDev)) {
|
||||
AsyncInfoTy AsyncInfo(SrcDev);
|
||||
rc = SrcDev.dataExchange(srcAddr, DstDev, dstAddr, length, AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
Rc = SrcDev.dataExchange(SrcAddr, DstDev, DstAddr, Length, AsyncInfo);
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
void *buffer = malloc(length);
|
||||
void *Buffer = malloc(Length);
|
||||
{
|
||||
AsyncInfoTy AsyncInfo(SrcDev);
|
||||
rc = SrcDev.retrieveData(buffer, srcAddr, length, AsyncInfo);
|
||||
Rc = SrcDev.retrieveData(Buffer, SrcAddr, Length, AsyncInfo);
|
||||
}
|
||||
if (rc == OFFLOAD_SUCCESS) {
|
||||
if (Rc == OFFLOAD_SUCCESS) {
|
||||
AsyncInfoTy AsyncInfo(SrcDev);
|
||||
rc = DstDev.submitData(dstAddr, buffer, length, AsyncInfo);
|
||||
Rc = DstDev.submitData(DstAddr, Buffer, Length, AsyncInfo);
|
||||
}
|
||||
free(buffer);
|
||||
free(Buffer);
|
||||
}
|
||||
|
||||
DP("omp_target_memcpy returns %d\n", rc);
|
||||
return rc;
|
||||
DP("omp_target_memcpy returns %d\n", Rc);
|
||||
return Rc;
|
||||
}
|
||||
|
||||
EXTERN int omp_target_memcpy_rect(
|
||||
void *dst, const void *src, size_t element_size, int num_dims,
|
||||
const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets,
|
||||
const size_t *dst_dimensions, const size_t *src_dimensions, int dst_device,
|
||||
int src_device) {
|
||||
EXTERN int
|
||||
omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
|
||||
int NumDims, const size_t *Volume,
|
||||
const size_t *DstOffsets, const size_t *SrcOffsets,
|
||||
const size_t *DstDimensions, const size_t *SrcDimensions,
|
||||
int DstDevice, int SrcDevice) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
|
||||
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
|
||||
"src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
|
||||
"volume " DPxMOD ", element size %zu, num_dims %d\n",
|
||||
dst_device, src_device, DPxPTR(dst), DPxPTR(src), DPxPTR(dst_offsets),
|
||||
DPxPTR(src_offsets), DPxPTR(dst_dimensions), DPxPTR(src_dimensions),
|
||||
DPxPTR(volume), element_size, num_dims);
|
||||
DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
|
||||
DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
|
||||
DPxPTR(Volume), ElementSize, NumDims);
|
||||
|
||||
if (!(dst || src)) {
|
||||
if (!(Dst || Src)) {
|
||||
DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
|
||||
INT_MAX);
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
if (!dst || !src || element_size < 1 || num_dims < 1 || !volume ||
|
||||
!dst_offsets || !src_offsets || !dst_dimensions || !src_dimensions) {
|
||||
if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume ||
|
||||
!DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) {
|
||||
REPORT("Call to omp_target_memcpy_rect with invalid arguments\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
int rc;
|
||||
if (num_dims == 1) {
|
||||
rc = omp_target_memcpy(
|
||||
dst, src, element_size * volume[0], element_size * dst_offsets[0],
|
||||
element_size * src_offsets[0], dst_device, src_device);
|
||||
int Rc;
|
||||
if (NumDims == 1) {
|
||||
Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0],
|
||||
ElementSize * DstOffsets[0],
|
||||
ElementSize * SrcOffsets[0], DstDevice, SrcDevice);
|
||||
} else {
|
||||
size_t dst_slice_size = element_size;
|
||||
size_t src_slice_size = element_size;
|
||||
for (int i = 1; i < num_dims; ++i) {
|
||||
dst_slice_size *= dst_dimensions[i];
|
||||
src_slice_size *= src_dimensions[i];
|
||||
size_t DstSliceSize = ElementSize;
|
||||
size_t SrcSliceSize = ElementSize;
|
||||
for (int I = 1; I < NumDims; ++I) {
|
||||
DstSliceSize *= DstDimensions[I];
|
||||
SrcSliceSize *= SrcDimensions[I];
|
||||
}
|
||||
|
||||
size_t dst_off = dst_offsets[0] * dst_slice_size;
|
||||
size_t src_off = src_offsets[0] * src_slice_size;
|
||||
for (size_t i = 0; i < volume[0]; ++i) {
|
||||
rc = omp_target_memcpy_rect(
|
||||
(char *)dst + dst_off + dst_slice_size * i,
|
||||
(char *)const_cast<void *>(src) + src_off + src_slice_size * i,
|
||||
element_size, num_dims - 1, volume + 1, dst_offsets + 1,
|
||||
src_offsets + 1, dst_dimensions + 1, src_dimensions + 1, dst_device,
|
||||
src_device);
|
||||
size_t DstOff = DstOffsets[0] * DstSliceSize;
|
||||
size_t SrcOff = SrcOffsets[0] * SrcSliceSize;
|
||||
for (size_t I = 0; I < Volume[0]; ++I) {
|
||||
Rc = omp_target_memcpy_rect(
|
||||
(char *)Dst + DstOff + DstSliceSize * I,
|
||||
(char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I,
|
||||
ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1,
|
||||
DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice);
|
||||
|
||||
if (rc) {
|
||||
if (Rc) {
|
||||
DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n");
|
||||
return rc;
|
||||
return Rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DP("omp_target_memcpy_rect returns %d\n", rc);
|
||||
return rc;
|
||||
DP("omp_target_memcpy_rect returns %d\n", Rc);
|
||||
return Rc;
|
||||
}
|
||||
|
||||
EXTERN int omp_target_associate_ptr(const void *host_ptr,
|
||||
const void *device_ptr, size_t size,
|
||||
size_t device_offset, int device_num) {
|
||||
EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
|
||||
size_t Size, size_t DeviceOffset,
|
||||
int DeviceNum) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", "
|
||||
"device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n",
|
||||
DPxPTR(host_ptr), DPxPTR(device_ptr), size, device_offset, device_num);
|
||||
DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum);
|
||||
|
||||
if (!host_ptr || !device_ptr || size <= 0) {
|
||||
if (!HostPtr || !DevicePtr || Size <= 0) {
|
||||
REPORT("Call to omp_target_associate_ptr with invalid arguments\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
REPORT("omp_target_associate_ptr: no association possible on the host\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
if (!deviceIsReady(DeviceNum)) {
|
||||
REPORT("omp_target_associate_ptr returns OFFLOAD_FAIL\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_num];
|
||||
void *device_addr = (void *)((uint64_t)device_ptr + (uint64_t)device_offset);
|
||||
int rc = Device.associatePtr(const_cast<void *>(host_ptr),
|
||||
const_cast<void *>(device_addr), size);
|
||||
DP("omp_target_associate_ptr returns %d\n", rc);
|
||||
return rc;
|
||||
DeviceTy &Device = *PM->Devices[DeviceNum];
|
||||
void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset);
|
||||
int Rc = Device.associatePtr(const_cast<void *>(HostPtr),
|
||||
const_cast<void *>(DeviceAddr), Size);
|
||||
DP("omp_target_associate_ptr returns %d\n", Rc);
|
||||
return Rc;
|
||||
}
|
||||
|
||||
EXTERN int omp_target_disassociate_ptr(const void *host_ptr, int device_num) {
|
||||
EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) {
|
||||
TIMESCOPE();
|
||||
DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", "
|
||||
"device_num %d\n",
|
||||
DPxPTR(host_ptr), device_num);
|
||||
DPxPTR(HostPtr), DeviceNum);
|
||||
|
||||
if (!host_ptr) {
|
||||
if (!HostPtr) {
|
||||
REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
REPORT(
|
||||
"omp_target_disassociate_ptr: no association possible on the host\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
if (!deviceIsReady(DeviceNum)) {
|
||||
REPORT("omp_target_disassociate_ptr returns OFFLOAD_FAIL\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_num];
|
||||
int rc = Device.disassociatePtr(const_cast<void *>(host_ptr));
|
||||
DP("omp_target_disassociate_ptr returns %d\n", rc);
|
||||
return rc;
|
||||
DeviceTy &Device = *PM->Devices[DeviceNum];
|
||||
int Rc = Device.disassociatePtr(const_cast<void *>(HostPtr));
|
||||
DP("omp_target_disassociate_ptr returns %d\n", Rc);
|
||||
return Rc;
|
||||
}
|
||||
|
|
|
@ -58,8 +58,8 @@ DeviceTy::~DeviceTy() {
|
|||
if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
|
||||
return;
|
||||
|
||||
ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
|
||||
dumpTargetPointerMappings(&loc, *this);
|
||||
ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
|
||||
dumpTargetPointerMappings(&Loc, *this);
|
||||
}
|
||||
|
||||
int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
|
||||
|
@ -70,21 +70,20 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
|
|||
if (It != HDTTMap->end()) {
|
||||
HostDataToTargetTy &HDTT = *It->HDTT;
|
||||
// Mapping already exists
|
||||
bool isValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
|
||||
bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
|
||||
HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin;
|
||||
if (isValid) {
|
||||
if (IsValid) {
|
||||
DP("Attempt to re-associate the same device ptr+offset with the same "
|
||||
"host ptr, nothing to do\n");
|
||||
return OFFLOAD_SUCCESS;
|
||||
} else {
|
||||
REPORT("Not allowed to re-associate a different device ptr+offset with "
|
||||
"the same host ptr\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
REPORT("Not allowed to re-associate a different device ptr+offset with "
|
||||
"the same host ptr\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
// Mapping does not exist, allocate it with refCount=INF
|
||||
const HostDataToTargetTy &newEntry =
|
||||
const HostDataToTargetTy &NewEntry =
|
||||
*HDTTMap
|
||||
->emplace(new HostDataToTargetTy(
|
||||
/*HstPtrBase=*/(uintptr_t)HstPtrBegin,
|
||||
|
@ -97,10 +96,10 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
|
|||
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD
|
||||
", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, "
|
||||
"HoldRefCount=%s\n",
|
||||
DPxPTR(newEntry.HstPtrBase), DPxPTR(newEntry.HstPtrBegin),
|
||||
DPxPTR(newEntry.HstPtrEnd), DPxPTR(newEntry.TgtPtrBegin),
|
||||
newEntry.dynRefCountToStr().c_str(), newEntry.holdRefCountToStr().c_str());
|
||||
(void)newEntry;
|
||||
DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin),
|
||||
DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin),
|
||||
NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str());
|
||||
(void)NewEntry;
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
@ -141,71 +140,71 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) {
|
|||
LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
|
||||
void *HstPtrBegin, int64_t Size) {
|
||||
|
||||
uintptr_t hp = (uintptr_t)HstPtrBegin;
|
||||
LookupResult lr;
|
||||
uintptr_t HP = (uintptr_t)HstPtrBegin;
|
||||
LookupResult LR;
|
||||
|
||||
DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n",
|
||||
DPxPTR(hp), Size);
|
||||
DPxPTR(HP), Size);
|
||||
|
||||
if (HDTTMap->empty())
|
||||
return lr;
|
||||
return LR;
|
||||
|
||||
auto upper = HDTTMap->upper_bound(hp);
|
||||
auto Upper = HDTTMap->upper_bound(HP);
|
||||
|
||||
if (Size == 0) {
|
||||
// specification v5.1 Pointer Initialization for Device Data Environments
|
||||
// upper_bound satisfies
|
||||
// std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
|
||||
if (upper != HDTTMap->begin()) {
|
||||
lr.Entry = std::prev(upper)->HDTT;
|
||||
auto &HT = *lr.Entry;
|
||||
if (Upper != HDTTMap->begin()) {
|
||||
LR.Entry = std::prev(Upper)->HDTT;
|
||||
auto &HT = *LR.Entry;
|
||||
// the left side of extended address range is satisified.
|
||||
// hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase
|
||||
lr.Flags.IsContained = hp < HT.HstPtrEnd || hp < HT.HstPtrBase;
|
||||
LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase;
|
||||
}
|
||||
|
||||
if (!lr.Flags.IsContained && upper != HDTTMap->end()) {
|
||||
lr.Entry = upper->HDTT;
|
||||
auto &HT = *lr.Entry;
|
||||
if (!LR.Flags.IsContained && Upper != HDTTMap->end()) {
|
||||
LR.Entry = Upper->HDTT;
|
||||
auto &HT = *LR.Entry;
|
||||
// the right side of extended address range is satisified.
|
||||
// hp < HT.HstPtrEnd || hp < HT.HstPtrBase
|
||||
lr.Flags.IsContained = hp >= HT.HstPtrBase;
|
||||
LR.Flags.IsContained = HP >= HT.HstPtrBase;
|
||||
}
|
||||
} else {
|
||||
// check the left bin
|
||||
if (upper != HDTTMap->begin()) {
|
||||
lr.Entry = std::prev(upper)->HDTT;
|
||||
auto &HT = *lr.Entry;
|
||||
if (Upper != HDTTMap->begin()) {
|
||||
LR.Entry = std::prev(Upper)->HDTT;
|
||||
auto &HT = *LR.Entry;
|
||||
// Is it contained?
|
||||
lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd &&
|
||||
(hp + Size) <= HT.HstPtrEnd;
|
||||
LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd &&
|
||||
(HP + Size) <= HT.HstPtrEnd;
|
||||
// Does it extend beyond the mapped region?
|
||||
lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
|
||||
LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
|
||||
}
|
||||
|
||||
// check the right bin
|
||||
if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) &&
|
||||
upper != HDTTMap->end()) {
|
||||
lr.Entry = upper->HDTT;
|
||||
auto &HT = *lr.Entry;
|
||||
if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) &&
|
||||
Upper != HDTTMap->end()) {
|
||||
LR.Entry = Upper->HDTT;
|
||||
auto &HT = *LR.Entry;
|
||||
// Does it extend into an already mapped region?
|
||||
lr.Flags.ExtendsBefore =
|
||||
hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin;
|
||||
LR.Flags.ExtendsBefore =
|
||||
HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin;
|
||||
// Does it extend beyond the mapped region?
|
||||
lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd;
|
||||
LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
|
||||
}
|
||||
|
||||
if (lr.Flags.ExtendsBefore) {
|
||||
if (LR.Flags.ExtendsBefore) {
|
||||
DP("WARNING: Pointer is not mapped but section extends into already "
|
||||
"mapped data\n");
|
||||
}
|
||||
if (lr.Flags.ExtendsAfter) {
|
||||
if (LR.Flags.ExtendsAfter) {
|
||||
DP("WARNING: Pointer is already mapped but section extends beyond mapped "
|
||||
"region\n");
|
||||
}
|
||||
}
|
||||
|
||||
return lr;
|
||||
return LR;
|
||||
}
|
||||
|
||||
TargetPointerResultTy DeviceTy::getTargetPointer(
|
||||
|
@ -368,11 +367,11 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|||
bool IsNew = false;
|
||||
IsHostPtr = false;
|
||||
IsLast = false;
|
||||
LookupResult lr = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
||||
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
||||
|
||||
if (lr.Flags.IsContained ||
|
||||
(!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) {
|
||||
auto &HT = *lr.Entry;
|
||||
if (LR.Flags.IsContained ||
|
||||
(!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) {
|
||||
auto &HT = *LR.Entry;
|
||||
IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete);
|
||||
|
||||
if (ForceDelete) {
|
||||
|
@ -403,13 +402,13 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|||
}
|
||||
const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;
|
||||
const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";
|
||||
uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
|
||||
uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
|
||||
INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
|
||||
"Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
|
||||
"Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
|
||||
DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(),
|
||||
DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(),
|
||||
DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
|
||||
TargetPointer = (void *)tp;
|
||||
TargetPointer = (void *)TP;
|
||||
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
// If the value isn't found in the mapping and unified shared memory
|
||||
// is on then it means we have stumbled upon a value which we need to
|
||||
|
@ -421,18 +420,18 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|||
TargetPointer = HstPtrBegin;
|
||||
}
|
||||
|
||||
return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer};
|
||||
return {{IsNew, IsHostPtr}, LR.Entry, TargetPointer};
|
||||
}
|
||||
|
||||
// Return the target pointer begin (where the data will be moved).
|
||||
void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
|
||||
int64_t Size) {
|
||||
uintptr_t hp = (uintptr_t)HstPtrBegin;
|
||||
LookupResult lr = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
||||
if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) {
|
||||
auto &HT = *lr.Entry;
|
||||
uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin);
|
||||
return (void *)tp;
|
||||
uintptr_t HP = (uintptr_t)HstPtrBegin;
|
||||
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
||||
if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) {
|
||||
auto &HT = *LR.Entry;
|
||||
uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin);
|
||||
return (void *)TP;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -500,8 +499,7 @@ int32_t DeviceTy::initOnce() {
|
|||
// is still false, return OFFLOAD_FAIL.
|
||||
if (IsInit)
|
||||
return OFFLOAD_SUCCESS;
|
||||
else
|
||||
return OFFLOAD_FAIL;
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
void DeviceTy::deinit() {
|
||||
|
@ -510,10 +508,9 @@ void DeviceTy::deinit() {
|
|||
}
|
||||
|
||||
// Load binary to device.
|
||||
__tgt_target_table *DeviceTy::load_binary(void *Img) {
|
||||
__tgt_target_table *DeviceTy::loadBinary(void *Img) {
|
||||
std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx);
|
||||
__tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img);
|
||||
return rc;
|
||||
return RTL->load_binary(RTLDeviceID, Img);
|
||||
}
|
||||
|
||||
void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
|
||||
|
@ -542,9 +539,8 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
|
|||
|
||||
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
|
||||
return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
|
||||
else
|
||||
return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
|
||||
AsyncInfo);
|
||||
return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
// Retrieve data from device
|
||||
|
@ -564,9 +560,8 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
|
|||
|
||||
if (!RTL->data_retrieve_async || !RTL->synchronize)
|
||||
return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
|
||||
else
|
||||
return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
|
||||
AsyncInfo);
|
||||
return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
// Copy data from current device to destination device directly
|
||||
|
@ -576,9 +571,9 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
|
|||
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
|
||||
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
|
||||
Size);
|
||||
} else
|
||||
return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
|
||||
DstPtr, Size, AsyncInfo);
|
||||
}
|
||||
return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
|
||||
DstPtr, Size, AsyncInfo);
|
||||
}
|
||||
|
||||
// Run region on device
|
||||
|
@ -588,9 +583,8 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr,
|
|||
if (!RTL->run_region || !RTL->synchronize)
|
||||
return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
||||
TgtVarsSize);
|
||||
else
|
||||
return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||
TgtOffsets, TgtVarsSize, AsyncInfo);
|
||||
return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
||||
TgtVarsSize, AsyncInfo);
|
||||
}
|
||||
|
||||
// Run region on device
|
||||
|
@ -611,10 +605,9 @@ int32_t DeviceTy::runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr,
|
|||
return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||
TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
|
||||
LoopTripCount);
|
||||
else
|
||||
return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||
TgtOffsets, TgtVarsSize, NumTeams,
|
||||
ThreadLimit, LoopTripCount, AsyncInfo);
|
||||
return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||
TgtOffsets, TgtVarsSize, NumTeams,
|
||||
ThreadLimit, LoopTripCount, AsyncInfo);
|
||||
}
|
||||
|
||||
// Whether data can be copied to DstDevice directly
|
||||
|
@ -672,8 +665,8 @@ int32_t DeviceTy::destroyEvent(void *Event) {
|
|||
|
||||
/// Check whether a device has an associated RTL and initialize it if it's not
|
||||
/// already initialized.
|
||||
bool device_is_ready(int device_num) {
|
||||
DP("Checking whether device %d is ready.\n", device_num);
|
||||
bool deviceIsReady(int DeviceNum) {
|
||||
DP("Checking whether device %d is ready.\n", DeviceNum);
|
||||
// Devices.size() can only change while registering a new
|
||||
// library, so try to acquire the lock of RTLs' mutex.
|
||||
size_t DevicesSize;
|
||||
|
@ -681,24 +674,24 @@ bool device_is_ready(int device_num) {
|
|||
std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
|
||||
DevicesSize = PM->Devices.size();
|
||||
}
|
||||
if (DevicesSize <= (size_t)device_num) {
|
||||
DP("Device ID %d does not have a matching RTL\n", device_num);
|
||||
if (DevicesSize <= (size_t)DeviceNum) {
|
||||
DP("Device ID %d does not have a matching RTL\n", DeviceNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get device info
|
||||
DeviceTy &Device = *PM->Devices[device_num];
|
||||
DeviceTy &Device = *PM->Devices[DeviceNum];
|
||||
|
||||
DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
|
||||
DP("Is the device %d (local ID %d) initialized? %d\n", DeviceNum,
|
||||
Device.RTLDeviceID, Device.IsInit);
|
||||
|
||||
// Init the device if not done before
|
||||
if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
|
||||
DP("Failed to init device %d\n", device_num);
|
||||
DP("Failed to init device %d\n", DeviceNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
DP("Device %d is ready to use.\n", device_num);
|
||||
DP("Device %d is ready to use.\n", DeviceNum);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -23,24 +23,24 @@
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds requires flags
|
||||
EXTERN void __tgt_register_requires(int64_t flags) {
|
||||
EXTERN void __tgt_register_requires(int64_t Flags) {
|
||||
TIMESCOPE();
|
||||
PM->RTLs.RegisterRequires(flags);
|
||||
PM->RTLs.registerRequires(Flags);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds a target shared library to the target execution image
|
||||
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
|
||||
EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
|
||||
TIMESCOPE();
|
||||
std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
|
||||
std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs);
|
||||
for (auto &RTL : PM->RTLs.AllRTLs) {
|
||||
if (RTL.register_lib) {
|
||||
if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
|
||||
if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) {
|
||||
DP("Could not register library with %s", RTL.RTLName.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
PM->RTLs.RegisterLib(desc);
|
||||
PM->RTLs.registerLib(Desc);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -49,12 +49,12 @@ EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// unloads a target shared library
|
||||
EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
|
||||
EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
|
||||
TIMESCOPE();
|
||||
PM->RTLs.UnregisterLib(desc);
|
||||
PM->RTLs.unregisterLib(Desc);
|
||||
for (auto &RTL : PM->RTLs.UsedRTLs) {
|
||||
if (RTL->unregister_lib) {
|
||||
if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
|
||||
if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) {
|
||||
DP("Could not register library with %s", RTL->RTLName.c_str());
|
||||
}
|
||||
}
|
||||
|
@ -64,384 +64,383 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
|
|||
/// creates host-to-target data mapping, stores it in the
|
||||
/// libomptarget.so internal structure (an entry in a stack of data maps)
|
||||
/// and passes the data to the device.
|
||||
EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types) {
|
||||
EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE();
|
||||
|
||||
__tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames,
|
||||
void **ArgMappers) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
|
||||
device_id, arg_num);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
DeviceId, ArgNum);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
|
||||
arg_names, "Entering OpenMP data region");
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
"Entering OpenMP data region");
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int i = 0; i < arg_num; ++i) {
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
|
||||
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
|
||||
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
|
||||
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers, AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_begin_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
|
||||
__tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, arg_names, arg_mappers);
|
||||
__tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, ArgNames, ArgMappers);
|
||||
}
|
||||
|
||||
/// passes data from the target, releases target memory and destroys
|
||||
/// the host-target mapping (top entry from the stack of data maps)
|
||||
/// created by the last __tgt_target_data_begin.
|
||||
EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types) {
|
||||
EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE();
|
||||
|
||||
__tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
DP("Entering data end region with %d mappings\n", arg_num);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames,
|
||||
void **ArgMappers) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
DP("Entering data end region with %d mappings\n", ArgNum);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
|
||||
arg_names, "Exiting OpenMP data region");
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
"Exiting OpenMP data region");
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int i = 0; i < arg_num; ++i) {
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
|
||||
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
|
||||
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
|
||||
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers, AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_end_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
|
||||
__tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, arg_names, arg_mappers);
|
||||
__tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types) {
|
||||
EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes) {
|
||||
TIMESCOPE();
|
||||
__tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_update_nowait(
|
||||
int64_t device_id, int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList) {
|
||||
TIMESCOPE();
|
||||
|
||||
__tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, nullptr, nullptr);
|
||||
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
DP("Entering data update with %d mappings\n", arg_num);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames,
|
||||
void **ArgMappers) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
DP("Entering data update with %d mappings\n", ArgNum);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
|
||||
arg_names, "Updating OpenMP data");
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
"Updating OpenMP data");
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers, AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
|
||||
}
|
||||
|
||||
EXTERN void __tgt_target_data_update_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
|
||||
__tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, arg_names, arg_mappers);
|
||||
__tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, ArgNames, ArgMappers);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types) {
|
||||
EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes) {
|
||||
TIMESCOPE();
|
||||
return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
|
||||
args, arg_sizes, arg_types, nullptr, nullptr);
|
||||
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t depNum, void *depList,
|
||||
int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList) {
|
||||
TIMESCOPE();
|
||||
|
||||
return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
|
||||
args, arg_sizes, arg_types, nullptr, nullptr);
|
||||
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, nullptr, nullptr);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
|
||||
"\n",
|
||||
DPxPTR(host_ptr), device_id);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
DPxPTR(HostPtr), DeviceId);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return OMP_TGT_FAIL;
|
||||
}
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
|
||||
arg_names, "Entering OpenMP kernel");
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
"Entering OpenMP kernel");
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int i = 0; i < arg_num; ++i) {
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
|
||||
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
|
||||
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
|
||||
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers, 0, 0, false /*team*/,
|
||||
AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
|
||||
int Rc =
|
||||
target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes,
|
||||
ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo);
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
|
||||
assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!");
|
||||
return OMP_TGT_SUCCESS;
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
|
||||
int32_t NoAliasDepNum, void *NoAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
|
||||
return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
|
||||
arg_sizes, arg_types, arg_names, arg_mappers);
|
||||
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
|
||||
ArgSizes, ArgTypes, ArgNames, ArgMappers);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
int32_t team_num, int32_t thread_limit) {
|
||||
EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t TeamNum,
|
||||
int32_t ThreadLimit) {
|
||||
TIMESCOPE();
|
||||
return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
|
||||
args_base, args, arg_sizes, arg_types,
|
||||
nullptr, nullptr, team_num, thread_limit);
|
||||
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
|
||||
Args, ArgSizes, ArgTypes, nullptr, nullptr,
|
||||
TeamNum, ThreadLimit);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
|
||||
int32_t arg_num, void **args_base,
|
||||
void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, int32_t team_num,
|
||||
int32_t thread_limit, int32_t depNum,
|
||||
void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgsBase,
|
||||
void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, int32_t TeamNum,
|
||||
int32_t ThreadLimit, int32_t DepNum,
|
||||
void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE();
|
||||
|
||||
return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
|
||||
args_base, args, arg_sizes, arg_types,
|
||||
nullptr, nullptr, team_num, thread_limit);
|
||||
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
|
||||
Args, ArgSizes, ArgTypes, nullptr, nullptr,
|
||||
TeamNum, ThreadLimit);
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
|
||||
void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args,
|
||||
int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names,
|
||||
void **arg_mappers, int32_t team_num,
|
||||
int32_t thread_limit) {
|
||||
EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t TeamNum,
|
||||
int32_t ThreadLimit) {
|
||||
DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
|
||||
"\n",
|
||||
DPxPTR(host_ptr), device_id);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
DPxPTR(HostPtr), DeviceId);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return OMP_TGT_FAIL;
|
||||
}
|
||||
|
||||
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
|
||||
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
|
||||
arg_names, "Entering OpenMP kernel");
|
||||
printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
|
||||
"Entering OpenMP kernel");
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int i = 0; i < arg_num; ++i) {
|
||||
for (int I = 0; I < ArgNum; ++I) {
|
||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s\n",
|
||||
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
|
||||
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
|
||||
I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
|
||||
(ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
|
||||
arg_types, arg_names, arg_mappers, team_num, thread_limit,
|
||||
int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit,
|
||||
true /*team*/, AsyncInfo);
|
||||
if (rc == OFFLOAD_SUCCESS)
|
||||
rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
|
||||
assert(rc == OFFLOAD_SUCCESS &&
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
|
||||
assert(Rc == OFFLOAD_SUCCESS &&
|
||||
"__tgt_target_teams_mapper unexpected failure!");
|
||||
return OMP_TGT_SUCCESS;
|
||||
}
|
||||
|
||||
EXTERN int __tgt_target_teams_nowait_mapper(
|
||||
ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||
map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
|
||||
int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
|
||||
void *noAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
|
||||
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
|
||||
void *NoAliasDepList) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
|
||||
return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
|
||||
args, arg_sizes, arg_types, arg_names,
|
||||
arg_mappers, team_num, thread_limit);
|
||||
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
|
||||
Args, ArgSizes, ArgTypes, ArgNames,
|
||||
ArgMappers, TeamNum, ThreadLimit);
|
||||
}
|
||||
|
||||
// Get the current number of components for a user-defined mapper.
|
||||
EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
|
||||
EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
|
||||
TIMESCOPE();
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
|
||||
int64_t size = MapperComponentsPtr->Components.size();
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
|
||||
int64_t Size = MapperComponentsPtr->Components.size();
|
||||
DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
|
||||
DPxPTR(rt_mapper_handle), size);
|
||||
return size;
|
||||
DPxPTR(RtMapperHandle), Size);
|
||||
return Size;
|
||||
}
|
||||
|
||||
// Push back one component for a user-defined mapper.
|
||||
EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
|
||||
void *begin, int64_t size, int64_t type,
|
||||
void *name) {
|
||||
EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
|
||||
void *Begin, int64_t Size, int64_t Type,
|
||||
void *Name) {
|
||||
TIMESCOPE();
|
||||
DP("__tgt_push_mapper_component(Handle=" DPxMOD
|
||||
") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||
", Type=0x%" PRIx64 ", Name=%s).\n",
|
||||
DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type,
|
||||
(name) ? getNameFromMapping(name).c_str() : "unknown");
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
|
||||
DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
|
||||
(Name) ? getNameFromMapping(Name).c_str() : "unknown");
|
||||
auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
|
||||
MapperComponentsPtr->Components.push_back(
|
||||
MapComponentInfoTy(base, begin, size, type, name));
|
||||
MapComponentInfoTy(Base, Begin, Size, Type, Name));
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
|
||||
uint64_t loop_tripcount) {
|
||||
__kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
|
||||
EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId,
|
||||
uint64_t LoopTripcount) {
|
||||
__kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount);
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
|
||||
uint64_t loop_tripcount) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
if (checkDeviceAndCtors(device_id, loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", device_id);
|
||||
EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
uint64_t LoopTripcount) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
if (checkDeviceAndCtors(DeviceId, Loc)) {
|
||||
DP("Not offloading to device %" PRId64 "\n", DeviceId);
|
||||
return;
|
||||
}
|
||||
|
||||
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
|
||||
loop_tripcount);
|
||||
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
|
||||
LoopTripcount);
|
||||
PM->TblMapMtx.lock();
|
||||
PM->Devices[device_id]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
|
||||
loop_tripcount);
|
||||
PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
|
||||
LoopTripcount);
|
||||
PM->TblMapMtx.unlock();
|
||||
}
|
||||
|
||||
|
@ -454,7 +453,7 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
|
|||
}
|
||||
}
|
||||
|
||||
EXTERN int __tgt_print_device_info(int64_t device_id) {
|
||||
return PM->Devices[device_id]->printDeviceInfo(
|
||||
PM->Devices[device_id]->RTLDeviceID);
|
||||
EXTERN int __tgt_print_device_info(int64_t DeviceId) {
|
||||
return PM->Devices[DeviceId]->printDeviceInfo(
|
||||
PM->Devices[DeviceId]->RTLDeviceID);
|
||||
}
|
||||
|
|
|
@ -62,81 +62,81 @@ PropertyTy getProperty(omp_interop_val_t &InteropVal,
|
|||
omp_interop_property_t Property, int *Err);
|
||||
|
||||
template <>
|
||||
intptr_t getProperty<intptr_t>(omp_interop_val_t &interop_val,
|
||||
omp_interop_property_t property, int *err) {
|
||||
switch (property) {
|
||||
intptr_t getProperty<intptr_t>(omp_interop_val_t &InteropVal,
|
||||
omp_interop_property_t Property, int *Err) {
|
||||
switch (Property) {
|
||||
case omp_ipr_fr_id:
|
||||
return interop_val.backend_type_id;
|
||||
return InteropVal.backend_type_id;
|
||||
case omp_ipr_vendor:
|
||||
return interop_val.vendor_id;
|
||||
return InteropVal.vendor_id;
|
||||
case omp_ipr_device_num:
|
||||
return interop_val.device_id;
|
||||
return InteropVal.device_id;
|
||||
default:;
|
||||
}
|
||||
getTypeMismatch(property, err);
|
||||
getTypeMismatch(Property, Err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
const char *getProperty<const char *>(omp_interop_val_t &interop_val,
|
||||
omp_interop_property_t property,
|
||||
int *err) {
|
||||
switch (property) {
|
||||
const char *getProperty<const char *>(omp_interop_val_t &InteropVal,
|
||||
omp_interop_property_t Property,
|
||||
int *Err) {
|
||||
switch (Property) {
|
||||
case omp_ipr_fr_id:
|
||||
return interop_val.interop_type == kmp_interop_type_tasksync
|
||||
return InteropVal.interop_type == kmp_interop_type_tasksync
|
||||
? "tasksync"
|
||||
: "device+context";
|
||||
case omp_ipr_vendor_name:
|
||||
return getVendorIdToStr(interop_val.vendor_id);
|
||||
return getVendorIdToStr(InteropVal.vendor_id);
|
||||
default:
|
||||
getTypeMismatch(property, err);
|
||||
getTypeMismatch(Property, Err);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void *getProperty<void *>(omp_interop_val_t &interop_val,
|
||||
omp_interop_property_t property, int *err) {
|
||||
switch (property) {
|
||||
void *getProperty<void *>(omp_interop_val_t &InteropVal,
|
||||
omp_interop_property_t Property, int *Err) {
|
||||
switch (Property) {
|
||||
case omp_ipr_device:
|
||||
if (interop_val.device_info.Device)
|
||||
return interop_val.device_info.Device;
|
||||
*err = omp_irc_no_value;
|
||||
return const_cast<char *>(interop_val.err_str);
|
||||
if (InteropVal.device_info.Device)
|
||||
return InteropVal.device_info.Device;
|
||||
*Err = omp_irc_no_value;
|
||||
return const_cast<char *>(InteropVal.err_str);
|
||||
case omp_ipr_device_context:
|
||||
return interop_val.device_info.Context;
|
||||
return InteropVal.device_info.Context;
|
||||
case omp_ipr_targetsync:
|
||||
return interop_val.async_info->Queue;
|
||||
return InteropVal.async_info->Queue;
|
||||
default:;
|
||||
}
|
||||
getTypeMismatch(property, err);
|
||||
getTypeMismatch(Property, Err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool getPropertyCheck(omp_interop_val_t **interop_ptr,
|
||||
omp_interop_property_t property, int *err) {
|
||||
if (err)
|
||||
*err = omp_irc_success;
|
||||
if (!interop_ptr) {
|
||||
if (err)
|
||||
*err = omp_irc_empty;
|
||||
bool getPropertyCheck(omp_interop_val_t **InteropPtr,
|
||||
omp_interop_property_t Property, int *Err) {
|
||||
if (Err)
|
||||
*Err = omp_irc_success;
|
||||
if (!InteropPtr) {
|
||||
if (Err)
|
||||
*Err = omp_irc_empty;
|
||||
return false;
|
||||
}
|
||||
if (property >= 0 || property < omp_ipr_first) {
|
||||
if (err)
|
||||
*err = omp_irc_out_of_range;
|
||||
if (Property >= 0 || Property < omp_ipr_first) {
|
||||
if (Err)
|
||||
*Err = omp_irc_out_of_range;
|
||||
return false;
|
||||
}
|
||||
if (property == omp_ipr_targetsync &&
|
||||
(*interop_ptr)->interop_type != kmp_interop_type_tasksync) {
|
||||
if (err)
|
||||
*err = omp_irc_other;
|
||||
if (Property == omp_ipr_targetsync &&
|
||||
(*InteropPtr)->interop_type != kmp_interop_type_tasksync) {
|
||||
if (Err)
|
||||
*Err = omp_irc_other;
|
||||
return false;
|
||||
}
|
||||
if ((property == omp_ipr_device || property == omp_ipr_device_context) &&
|
||||
(*interop_ptr)->interop_type == kmp_interop_type_tasksync) {
|
||||
if (err)
|
||||
*err = omp_irc_other;
|
||||
if ((Property == omp_ipr_device || Property == omp_ipr_device_context) &&
|
||||
(*InteropPtr)->interop_type == kmp_interop_type_tasksync) {
|
||||
if (Err)
|
||||
*Err = omp_irc_other;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -181,105 +181,105 @@ typedef int64_t kmp_int64;
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void __tgt_interop_init(ident_t *loc_ref, kmp_int32 gtid,
|
||||
omp_interop_val_t *&interop_ptr,
|
||||
kmp_interop_type_t interop_type, kmp_int32 device_id,
|
||||
kmp_int64 ndeps, kmp_depend_info_t *dep_list,
|
||||
kmp_int32 have_nowait) {
|
||||
kmp_int32 ndeps_noalias = 0;
|
||||
kmp_depend_info_t *noalias_dep_list = NULL;
|
||||
assert(interop_type != kmp_interop_type_unknown &&
|
||||
void __tgt_interop_init(ident_t *LocRef, kmp_int32 Gtid,
|
||||
omp_interop_val_t *&InteropPtr,
|
||||
kmp_interop_type_t InteropType, kmp_int32 DeviceId,
|
||||
kmp_int64 Ndeps, kmp_depend_info_t *DepList,
|
||||
kmp_int32 HaveNowait) {
|
||||
kmp_int32 NdepsNoalias = 0;
|
||||
kmp_depend_info_t *NoaliasDepList = NULL;
|
||||
assert(InteropType != kmp_interop_type_unknown &&
|
||||
"Cannot initialize with unknown interop_type!");
|
||||
if (device_id == -1) {
|
||||
device_id = omp_get_default_device();
|
||||
if (DeviceId == -1) {
|
||||
DeviceId = omp_get_default_device();
|
||||
}
|
||||
|
||||
if (interop_type == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias,
|
||||
noalias_dep_list);
|
||||
if (InteropType == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
|
||||
NoaliasDepList);
|
||||
}
|
||||
|
||||
interop_ptr = new omp_interop_val_t(device_id, interop_type);
|
||||
if (!device_is_ready(device_id)) {
|
||||
interop_ptr->err_str = "Device not ready!";
|
||||
InteropPtr = new omp_interop_val_t(DeviceId, InteropType);
|
||||
if (!deviceIsReady(DeviceId)) {
|
||||
InteropPtr->err_str = "Device not ready!";
|
||||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_id];
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
if (!Device.RTL || !Device.RTL->init_device_info ||
|
||||
Device.RTL->init_device_info(device_id, &(interop_ptr)->device_info,
|
||||
&(interop_ptr)->err_str)) {
|
||||
delete interop_ptr;
|
||||
interop_ptr = omp_interop_none;
|
||||
Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info,
|
||||
&(InteropPtr)->err_str)) {
|
||||
delete InteropPtr;
|
||||
InteropPtr = omp_interop_none;
|
||||
}
|
||||
if (interop_type == kmp_interop_type_tasksync) {
|
||||
if (InteropType == kmp_interop_type_tasksync) {
|
||||
if (!Device.RTL || !Device.RTL->init_async_info ||
|
||||
Device.RTL->init_async_info(device_id, &(interop_ptr)->async_info)) {
|
||||
delete interop_ptr;
|
||||
interop_ptr = omp_interop_none;
|
||||
Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) {
|
||||
delete InteropPtr;
|
||||
InteropPtr = omp_interop_none;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void __tgt_interop_use(ident_t *loc_ref, kmp_int32 gtid,
|
||||
omp_interop_val_t *&interop_ptr, kmp_int32 device_id,
|
||||
kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
||||
kmp_int32 have_nowait) {
|
||||
kmp_int32 ndeps_noalias = 0;
|
||||
kmp_depend_info_t *noalias_dep_list = NULL;
|
||||
assert(interop_ptr && "Cannot use nullptr!");
|
||||
omp_interop_val_t *interop_val = interop_ptr;
|
||||
if (device_id == -1) {
|
||||
device_id = omp_get_default_device();
|
||||
void __tgt_interop_use(ident_t *LocRef, kmp_int32 Gtid,
|
||||
omp_interop_val_t *&InteropPtr, kmp_int32 DeviceId,
|
||||
kmp_int32 Ndeps, kmp_depend_info_t *DepList,
|
||||
kmp_int32 HaveNowait) {
|
||||
kmp_int32 NdepsNoalias = 0;
|
||||
kmp_depend_info_t *NoaliasDepList = NULL;
|
||||
assert(InteropPtr && "Cannot use nullptr!");
|
||||
omp_interop_val_t *InteropVal = InteropPtr;
|
||||
if (DeviceId == -1) {
|
||||
DeviceId = omp_get_default_device();
|
||||
}
|
||||
assert(interop_val != omp_interop_none &&
|
||||
assert(InteropVal != omp_interop_none &&
|
||||
"Cannot use uninitialized interop_ptr!");
|
||||
assert((device_id == -1 || interop_val->device_id == device_id) &&
|
||||
assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
|
||||
"Inconsistent device-id usage!");
|
||||
|
||||
if (!device_is_ready(device_id)) {
|
||||
interop_ptr->err_str = "Device not ready!";
|
||||
if (!deviceIsReady(DeviceId)) {
|
||||
InteropPtr->err_str = "Device not ready!";
|
||||
return;
|
||||
}
|
||||
|
||||
if (interop_val->interop_type == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias,
|
||||
noalias_dep_list);
|
||||
if (InteropVal->interop_type == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
|
||||
NoaliasDepList);
|
||||
}
|
||||
// TODO Flush the queue associated with the interop through the plugin
|
||||
}
|
||||
|
||||
void __tgt_interop_destroy(ident_t *loc_ref, kmp_int32 gtid,
|
||||
omp_interop_val_t *&interop_ptr, kmp_int32 device_id,
|
||||
kmp_int32 ndeps, kmp_depend_info_t *dep_list,
|
||||
kmp_int32 have_nowait) {
|
||||
kmp_int32 ndeps_noalias = 0;
|
||||
kmp_depend_info_t *noalias_dep_list = NULL;
|
||||
assert(interop_ptr && "Cannot use nullptr!");
|
||||
omp_interop_val_t *interop_val = interop_ptr;
|
||||
if (device_id == -1) {
|
||||
device_id = omp_get_default_device();
|
||||
void __tgt_interop_destroy(ident_t *LocRef, kmp_int32 Gtid,
|
||||
omp_interop_val_t *&InteropPtr, kmp_int32 DeviceId,
|
||||
kmp_int32 Ndeps, kmp_depend_info_t *DepList,
|
||||
kmp_int32 HaveNowait) {
|
||||
kmp_int32 NdepsNoalias = 0;
|
||||
kmp_depend_info_t *NoaliasDepList = NULL;
|
||||
assert(InteropPtr && "Cannot use nullptr!");
|
||||
omp_interop_val_t *InteropVal = InteropPtr;
|
||||
if (DeviceId == -1) {
|
||||
DeviceId = omp_get_default_device();
|
||||
}
|
||||
|
||||
if (interop_val == omp_interop_none)
|
||||
if (InteropVal == omp_interop_none)
|
||||
return;
|
||||
|
||||
assert((device_id == -1 || interop_val->device_id == device_id) &&
|
||||
assert((DeviceId == -1 || InteropVal->device_id == DeviceId) &&
|
||||
"Inconsistent device-id usage!");
|
||||
if (!device_is_ready(device_id)) {
|
||||
interop_ptr->err_str = "Device not ready!";
|
||||
if (!deviceIsReady(DeviceId)) {
|
||||
InteropPtr->err_str = "Device not ready!";
|
||||
return;
|
||||
}
|
||||
|
||||
if (interop_val->interop_type == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias,
|
||||
noalias_dep_list);
|
||||
if (InteropVal->interop_type == kmp_interop_type_tasksync) {
|
||||
__kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias,
|
||||
NoaliasDepList);
|
||||
}
|
||||
// TODO Flush the queue associated with the interop through the plugin
|
||||
// TODO Signal out dependences
|
||||
|
||||
delete interop_ptr;
|
||||
interop_ptr = omp_interop_none;
|
||||
delete InteropPtr;
|
||||
InteropPtr = omp_interop_none;
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -70,13 +70,13 @@ void *&AsyncInfoTy::getVoidPtrLocation() {
|
|||
static const int64_t Alignment = 8;
|
||||
|
||||
/// Map global data and execute pending ctors
|
||||
static int InitLibrary(DeviceTy &Device) {
|
||||
static int initLibrary(DeviceTy &Device) {
|
||||
/*
|
||||
* Map global data
|
||||
*/
|
||||
int32_t device_id = Device.DeviceID;
|
||||
int rc = OFFLOAD_SUCCESS;
|
||||
bool supportsEmptyImages = Device.RTL->supports_empty_images &&
|
||||
int32_t DeviceId = Device.DeviceID;
|
||||
int Rc = OFFLOAD_SUCCESS;
|
||||
bool SupportsEmptyImages = Device.RTL->supports_empty_images &&
|
||||
Device.RTL->supports_empty_images() > 0;
|
||||
|
||||
std::lock_guard<decltype(Device.PendingGlobalsMtx)> LG(
|
||||
|
@ -88,50 +88,50 @@ static int InitLibrary(DeviceTy &Device) {
|
|||
&PM->HostEntriesBeginToTransTable[HostEntriesBegin];
|
||||
if (TransTable->HostTable.EntriesBegin ==
|
||||
TransTable->HostTable.EntriesEnd &&
|
||||
!supportsEmptyImages) {
|
||||
!SupportsEmptyImages) {
|
||||
// No host entry so no need to proceed
|
||||
continue;
|
||||
}
|
||||
|
||||
if (TransTable->TargetsTable[device_id] != 0) {
|
||||
if (TransTable->TargetsTable[DeviceId] != 0) {
|
||||
// Library entries have already been processed
|
||||
continue;
|
||||
}
|
||||
|
||||
// 1) get image.
|
||||
assert(TransTable->TargetsImages.size() > (size_t)device_id &&
|
||||
assert(TransTable->TargetsImages.size() > (size_t)DeviceId &&
|
||||
"Not expecting a device ID outside the table's bounds!");
|
||||
__tgt_device_image *img = TransTable->TargetsImages[device_id];
|
||||
if (!img) {
|
||||
REPORT("No image loaded for device id %d.\n", device_id);
|
||||
rc = OFFLOAD_FAIL;
|
||||
__tgt_device_image *Img = TransTable->TargetsImages[DeviceId];
|
||||
if (!Img) {
|
||||
REPORT("No image loaded for device id %d.\n", DeviceId);
|
||||
Rc = OFFLOAD_FAIL;
|
||||
break;
|
||||
}
|
||||
// 2) load image into the target table.
|
||||
__tgt_target_table *TargetTable = TransTable->TargetsTable[device_id] =
|
||||
Device.load_binary(img);
|
||||
__tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] =
|
||||
Device.loadBinary(Img);
|
||||
// Unable to get table for this image: invalidate image and fail.
|
||||
if (!TargetTable) {
|
||||
REPORT("Unable to generate entries table for device id %d.\n",
|
||||
device_id);
|
||||
TransTable->TargetsImages[device_id] = 0;
|
||||
rc = OFFLOAD_FAIL;
|
||||
DeviceId);
|
||||
TransTable->TargetsImages[DeviceId] = 0;
|
||||
Rc = OFFLOAD_FAIL;
|
||||
break;
|
||||
}
|
||||
|
||||
// Verify whether the two table sizes match.
|
||||
size_t hsize =
|
||||
size_t Hsize =
|
||||
TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
|
||||
size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
|
||||
size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
|
||||
|
||||
// Invalid image for these host entries!
|
||||
if (hsize != tsize) {
|
||||
if (Hsize != Tsize) {
|
||||
REPORT(
|
||||
"Host and Target tables mismatch for device id %d [%zx != %zx].\n",
|
||||
device_id, hsize, tsize);
|
||||
TransTable->TargetsImages[device_id] = 0;
|
||||
TransTable->TargetsTable[device_id] = 0;
|
||||
rc = OFFLOAD_FAIL;
|
||||
DeviceId, Hsize, Tsize);
|
||||
TransTable->TargetsImages[DeviceId] = 0;
|
||||
TransTable->TargetsTable[DeviceId] = 0;
|
||||
Rc = OFFLOAD_FAIL;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -175,8 +175,8 @@ static int InitLibrary(DeviceTy &Device) {
|
|||
}
|
||||
}
|
||||
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
return rc;
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
return Rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -185,22 +185,22 @@ static int InitLibrary(DeviceTy &Device) {
|
|||
if (!Device.PendingCtorsDtors.empty()) {
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
// Call all ctors for all libraries registered so far
|
||||
for (auto &lib : Device.PendingCtorsDtors) {
|
||||
if (!lib.second.PendingCtors.empty()) {
|
||||
for (auto &Lib : Device.PendingCtorsDtors) {
|
||||
if (!Lib.second.PendingCtors.empty()) {
|
||||
DP("Has pending ctors... call now\n");
|
||||
for (auto &entry : lib.second.PendingCtors) {
|
||||
void *ctor = entry;
|
||||
int rc =
|
||||
target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr,
|
||||
for (auto &Entry : Lib.second.PendingCtors) {
|
||||
void *Ctor = Entry;
|
||||
int Rc =
|
||||
target(nullptr, Device, Ctor, 0, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, 1, 1, true /*team*/, AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor));
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
// Clear the list to indicate that this device has been used
|
||||
lib.second.PendingCtors.clear();
|
||||
DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first));
|
||||
Lib.second.PendingCtors.clear();
|
||||
DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(Lib.first));
|
||||
}
|
||||
}
|
||||
// All constructors have been issued, wait for them now.
|
||||
|
@ -232,10 +232,10 @@ void handleTargetOutcome(bool Success, ident_t *Loc) {
|
|||
FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "
|
||||
"for debugging options.\n");
|
||||
|
||||
SourceInfo info(Loc);
|
||||
if (info.isAvailible())
|
||||
fprintf(stderr, "%s:%d:%d: ", info.getFilename(), info.getLine(),
|
||||
info.getColumn());
|
||||
SourceInfo Info(Loc);
|
||||
if (Info.isAvailible())
|
||||
fprintf(stderr, "%s:%d:%d: ", Info.getFilename(), Info.getLine(),
|
||||
Info.getColumn());
|
||||
else
|
||||
FAILURE_MESSAGE("Source location information not present. Compile with "
|
||||
"-g or -gline-tables-only.\n");
|
||||
|
@ -308,7 +308,7 @@ bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
|
|||
}
|
||||
|
||||
// Is device ready?
|
||||
if (!device_is_ready(DeviceID)) {
|
||||
if (!deviceIsReady(DeviceID)) {
|
||||
REPORT("Device %" PRId64 " is not ready.\n", DeviceID);
|
||||
handleTargetOutcome(false, Loc);
|
||||
return true;
|
||||
|
@ -324,7 +324,7 @@ bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
|
|||
Device.PendingGlobalsMtx);
|
||||
HasPendingGlobals = Device.HasPendingGlobals;
|
||||
}
|
||||
if (HasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) {
|
||||
if (HasPendingGlobals && initLibrary(Device) != OFFLOAD_SUCCESS) {
|
||||
REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID);
|
||||
handleTargetOutcome(false, Loc);
|
||||
return true;
|
||||
|
@ -333,54 +333,53 @@ bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static int32_t getParentIndex(int64_t type) {
|
||||
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
|
||||
static int32_t getParentIndex(int64_t Type) {
|
||||
return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
|
||||
}
|
||||
|
||||
void *targetAllocExplicit(size_t size, int device_num, int kind,
|
||||
const char *name) {
|
||||
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
|
||||
const char *Name) {
|
||||
TIMESCOPE();
|
||||
DP("Call to %s for device %d requesting %zu bytes\n", name, device_num, size);
|
||||
DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size);
|
||||
|
||||
if (size <= 0) {
|
||||
DP("Call to %s with non-positive length\n", name);
|
||||
if (Size <= 0) {
|
||||
DP("Call to %s with non-positive length\n", Name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *rc = NULL;
|
||||
void *Rc = NULL;
|
||||
|
||||
if (device_num == omp_get_initial_device()) {
|
||||
rc = malloc(size);
|
||||
DP("%s returns host ptr " DPxMOD "\n", name, DPxPTR(rc));
|
||||
return rc;
|
||||
if (DeviceNum == omp_get_initial_device()) {
|
||||
Rc = malloc(Size);
|
||||
DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc));
|
||||
return Rc;
|
||||
}
|
||||
|
||||
if (!device_is_ready(device_num)) {
|
||||
DP("%s returns NULL ptr\n", name);
|
||||
if (!deviceIsReady(DeviceNum)) {
|
||||
DP("%s returns NULL ptr\n", Name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DeviceTy &Device = *PM->Devices[device_num];
|
||||
rc = Device.allocData(size, nullptr, kind);
|
||||
DP("%s returns device ptr " DPxMOD "\n", name, DPxPTR(rc));
|
||||
return rc;
|
||||
DeviceTy &Device = *PM->Devices[DeviceNum];
|
||||
Rc = Device.allocData(Size, nullptr, Kind);
|
||||
DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc));
|
||||
return Rc;
|
||||
}
|
||||
|
||||
/// Call the user-defined mapper function followed by the appropriate
|
||||
// targetData* function (targetData{Begin,End,Update}).
|
||||
int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
|
||||
int64_t arg_size, int64_t arg_type,
|
||||
map_var_info_t arg_names, void *arg_mapper,
|
||||
AsyncInfoTy &AsyncInfo,
|
||||
TargetDataFuncPtrTy target_data_function) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper));
|
||||
int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
|
||||
int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
|
||||
void *ArgMapper, AsyncInfoTy &AsyncInfo,
|
||||
TargetDataFuncPtrTy TargetDataFunction) {
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper));
|
||||
|
||||
// The mapper function fills up Components.
|
||||
MapperComponentsTy MapperComponents;
|
||||
MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper);
|
||||
(*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size, arg_type,
|
||||
arg_names);
|
||||
MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(ArgMapper);
|
||||
(*MapperFuncPtr)((void *)&MapperComponents, ArgBase, Arg, ArgSize, ArgType,
|
||||
ArgNames);
|
||||
|
||||
// Construct new arrays for args_base, args, arg_sizes and arg_types
|
||||
// using the information in MapperComponents and call the corresponding
|
||||
|
@ -400,40 +399,40 @@ int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,
|
|||
MapperArgNames[I] = C.Name;
|
||||
}
|
||||
|
||||
int rc = target_data_function(loc, Device, MapperComponents.Components.size(),
|
||||
MapperArgsBase.data(), MapperArgs.data(),
|
||||
MapperArgSizes.data(), MapperArgTypes.data(),
|
||||
MapperArgNames.data(), /*arg_mappers*/ nullptr,
|
||||
AsyncInfo, /*FromMapper=*/true);
|
||||
int Rc = TargetDataFunction(Loc, Device, MapperComponents.Components.size(),
|
||||
MapperArgsBase.data(), MapperArgs.data(),
|
||||
MapperArgSizes.data(), MapperArgTypes.data(),
|
||||
MapperArgNames.data(), /*arg_mappers*/ nullptr,
|
||||
AsyncInfo, /*FromMapper=*/true);
|
||||
|
||||
return rc;
|
||||
return Rc;
|
||||
}
|
||||
|
||||
/// Internal function to do the mapping and transfer the data to the device
|
||||
int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, map_var_info_t *arg_names,
|
||||
void **arg_mappers, AsyncInfoTy &AsyncInfo,
|
||||
int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
bool FromMapper) {
|
||||
// process each input.
|
||||
for (int32_t i = 0; i < arg_num; ++i) {
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
// Ignore private variables and arrays - there is no mapping for them.
|
||||
if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
|
||||
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
|
||||
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
|
||||
(ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
|
||||
continue;
|
||||
|
||||
if (arg_mappers && arg_mappers[i]) {
|
||||
if (ArgMappers && ArgMappers[I]) {
|
||||
// Instead of executing the regular path of targetDataBegin, call the
|
||||
// targetDataMapper variant which will call targetDataBegin again
|
||||
// with new arguments.
|
||||
DP("Calling targetDataMapper for the %dth argument\n", i);
|
||||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
|
||||
map_var_info_t arg_name = (!arg_names) ? nullptr : arg_names[i];
|
||||
int rc = targetDataMapper(loc, Device, args_base[i], args[i],
|
||||
arg_sizes[i], arg_types[i], arg_name,
|
||||
arg_mappers[i], AsyncInfo, targetDataBegin);
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataBegin);
|
||||
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"
|
||||
" failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
|
@ -443,46 +442,46 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
continue;
|
||||
}
|
||||
|
||||
void *HstPtrBegin = args[i];
|
||||
void *HstPtrBase = args_base[i];
|
||||
int64_t data_size = arg_sizes[i];
|
||||
map_var_info_t HstPtrName = (!arg_names) ? nullptr : arg_names[i];
|
||||
void *HstPtrBegin = Args[I];
|
||||
void *HstPtrBase = ArgsBase[I];
|
||||
int64_t DataSize = ArgSizes[I];
|
||||
map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
|
||||
// Adjust for proper alignment if this is a combined entry (for structs).
|
||||
// Look at the next argument - if that is MEMBER_OF this one, then this one
|
||||
// is a combined entry.
|
||||
int64_t padding = 0;
|
||||
const int next_i = i + 1;
|
||||
if (getParentIndex(arg_types[i]) < 0 && next_i < arg_num &&
|
||||
getParentIndex(arg_types[next_i]) == i) {
|
||||
padding = (int64_t)HstPtrBegin % Alignment;
|
||||
if (padding) {
|
||||
int64_t Padding = 0;
|
||||
const int NextI = I + 1;
|
||||
if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
|
||||
getParentIndex(ArgTypes[NextI]) == I) {
|
||||
Padding = (int64_t)HstPtrBegin % Alignment;
|
||||
if (Padding) {
|
||||
DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
|
||||
"\n",
|
||||
padding, DPxPTR(HstPtrBegin));
|
||||
HstPtrBegin = (char *)HstPtrBegin - padding;
|
||||
data_size += padding;
|
||||
Padding, DPxPTR(HstPtrBegin));
|
||||
HstPtrBegin = (char *)HstPtrBegin - Padding;
|
||||
DataSize += Padding;
|
||||
}
|
||||
}
|
||||
|
||||
// Address of pointer on the host and device, respectively.
|
||||
void *Pointer_HstPtrBegin, *PointerTgtPtrBegin;
|
||||
TargetPointerResultTy Pointer_TPR;
|
||||
void *PointerHstPtrBegin, *PointerTgtPtrBegin;
|
||||
TargetPointerResultTy PointerTpr;
|
||||
bool IsHostPtr = false;
|
||||
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
|
||||
bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
|
||||
// Force the creation of a device side copy of the data when:
|
||||
// a close map modifier was associated with a map that contained a to.
|
||||
bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
|
||||
bool HasPresentModifier = arg_types[i] & OMP_TGT_MAPTYPE_PRESENT;
|
||||
bool HasHoldModifier = arg_types[i] & OMP_TGT_MAPTYPE_OMPX_HOLD;
|
||||
bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE;
|
||||
bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
|
||||
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
|
||||
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
|
||||
// have reached this point via __tgt_target_data_begin and not __tgt_target
|
||||
// then no argument is marked as TARGET_PARAM ("omp target data map" is not
|
||||
// associated with a target region, so there are no target parameters). This
|
||||
// may be considered a hack, we could revise the scheme in the future.
|
||||
bool UpdateRef =
|
||||
!(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && i == 0);
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
|
||||
!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
|
||||
DP("Has a pointer entry: \n");
|
||||
// Base is address of pointer.
|
||||
//
|
||||
|
@ -497,12 +496,12 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
// entry for a global that might not already be allocated by the time the
|
||||
// PTR_AND_OBJ entry is handled below, and so the allocation might fail
|
||||
// when HasPresentModifier.
|
||||
Pointer_TPR = Device.getTargetPointer(
|
||||
PointerTpr = Device.getTargetPointer(
|
||||
HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr,
|
||||
/*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
|
||||
HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo);
|
||||
PointerTgtPtrBegin = Pointer_TPR.TargetPointer;
|
||||
IsHostPtr = Pointer_TPR.Flags.IsHostPointer;
|
||||
PointerTgtPtrBegin = PointerTpr.TargetPointer;
|
||||
IsHostPtr = PointerTpr.Flags.IsHostPointer;
|
||||
if (!PointerTgtPtrBegin) {
|
||||
REPORT("Call to getTargetPointer returned null pointer (%s).\n",
|
||||
HasPresentModifier ? "'present' map type modifier"
|
||||
|
@ -512,27 +511,27 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
|
||||
"\n",
|
||||
sizeof(void *), DPxPTR(PointerTgtPtrBegin),
|
||||
(Pointer_TPR.Flags.IsNewEntry ? "" : " not"));
|
||||
Pointer_HstPtrBegin = HstPtrBase;
|
||||
(PointerTpr.Flags.IsNewEntry ? "" : " not"));
|
||||
PointerHstPtrBegin = HstPtrBase;
|
||||
// modify current entry.
|
||||
HstPtrBase = *(void **)HstPtrBase;
|
||||
// No need to update pointee ref count for the first element of the
|
||||
// subelement that comes from mapper.
|
||||
UpdateRef =
|
||||
(!FromMapper || i != 0); // subsequently update ref count of pointee
|
||||
(!FromMapper || I != 0); // subsequently update ref count of pointee
|
||||
}
|
||||
|
||||
const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO;
|
||||
const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO;
|
||||
const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
auto TPR = Device.getTargetPointer(
|
||||
HstPtrBegin, HstPtrBase, data_size, HstPtrName, HasFlagTo,
|
||||
HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
|
||||
HasPresentModifier, HasHoldModifier, AsyncInfo);
|
||||
HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways,
|
||||
IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier,
|
||||
HasHoldModifier, AsyncInfo);
|
||||
void *TgtPtrBegin = TPR.TargetPointer;
|
||||
IsHostPtr = TPR.Flags.IsHostPointer;
|
||||
// If data_size==0, then the argument could be a zero-length pointer to
|
||||
// NULL, so getOrAlloc() returning NULL is not an error.
|
||||
if (!TgtPtrBegin && (data_size || HasPresentModifier)) {
|
||||
if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
|
||||
REPORT("Call to getTargetPointer returned null pointer (%s).\n",
|
||||
HasPresentModifier ? "'present' map type modifier"
|
||||
: "device failure or illegal mapping");
|
||||
|
@ -540,16 +539,16 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
}
|
||||
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
|
||||
" - is%s new\n",
|
||||
data_size, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not"));
|
||||
DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not"));
|
||||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
|
||||
uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
|
||||
void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
|
||||
DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
|
||||
args_base[i] = TgtPtrBase;
|
||||
ArgsBase[I] = TgtPtrBase;
|
||||
}
|
||||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
|
||||
// Check whether we need to update the pointer on the device
|
||||
bool UpdateDevPtr = false;
|
||||
|
||||
|
@ -557,7 +556,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
|
||||
|
||||
Device.ShadowMtx.lock();
|
||||
auto Entry = Device.ShadowPtrMap.find(Pointer_HstPtrBegin);
|
||||
auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin);
|
||||
// If this pointer is not in the map we need to insert it. If the map
|
||||
// contains a stale entry, we need to update it (e.g. if the pointee was
|
||||
// deallocated and later on is reallocated at another device address). The
|
||||
|
@ -572,14 +571,14 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
if (Entry == Device.ShadowPtrMap.end() ||
|
||||
Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {
|
||||
// create or update shadow pointers for this entry
|
||||
Device.ShadowPtrMap[Pointer_HstPtrBegin] = {
|
||||
Device.ShadowPtrMap[PointerHstPtrBegin] = {
|
||||
HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
|
||||
Pointer_TPR.Entry->setMayContainAttachedPointers();
|
||||
PointerTpr.Entry->setMayContainAttachedPointers();
|
||||
UpdateDevPtr = true;
|
||||
}
|
||||
|
||||
if (UpdateDevPtr) {
|
||||
std::lock_guard<decltype(*Pointer_TPR.Entry)> LG(*Pointer_TPR.Entry);
|
||||
std::lock_guard<decltype(*PointerTpr.Entry)> LG(*PointerTpr.Entry);
|
||||
Device.ShadowMtx.unlock();
|
||||
|
||||
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
|
||||
|
@ -594,7 +593,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
|||
REPORT("Copying data to device failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
if (Pointer_TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
|
||||
if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) !=
|
||||
OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
} else
|
||||
|
@ -675,7 +674,7 @@ static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
|
|||
} // namespace
|
||||
|
||||
/// Internal function to undo the mapping and retrieve the data from the device.
|
||||
int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
||||
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
|
||||
|
@ -697,7 +696,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
|||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
Ret = targetDataMapper(loc, Device, ArgBases[I], Args[I], ArgSizes[I],
|
||||
Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataEnd);
|
||||
|
||||
|
@ -909,10 +908,10 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
|||
return Ret;
|
||||
}
|
||||
|
||||
static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
|
||||
static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
|
||||
void *HstPtrBegin, int64_t ArgSize,
|
||||
int64_t ArgType, AsyncInfoTy &AsyncInfo) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
bool IsLast, IsHostPtr;
|
||||
TargetPointerResultTy TPR = Device.getTgtPtrBegin(
|
||||
HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false,
|
||||
|
@ -985,13 +984,13 @@ static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase,
|
|||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,
|
||||
static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device,
|
||||
void *ArgsBase,
|
||||
__tgt_target_non_contig *NonContig,
|
||||
uint64_t Size, int64_t ArgType,
|
||||
int CurrentDim, int DimSize, uint64_t Offset,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
TIMESCOPE_WITH_IDENT(loc);
|
||||
TIMESCOPE_WITH_IDENT(Loc);
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
if (CurrentDim < DimSize) {
|
||||
for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
|
||||
|
@ -1000,7 +999,7 @@ static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,
|
|||
// we only need to transfer the first element for the last dimension
|
||||
// since we've already got a contiguous piece.
|
||||
if (CurrentDim != DimSize - 1 || I == 0) {
|
||||
Ret = targetDataNonContiguous(loc, Device, ArgsBase, NonContig, Size,
|
||||
Ret = targetDataNonContiguous(Loc, Device, ArgsBase, NonContig, Size,
|
||||
ArgType, CurrentDim + 1, DimSize,
|
||||
Offset + CurOffset, AsyncInfo);
|
||||
// Stop the whole process if any contiguous piece returns anything
|
||||
|
@ -1014,7 +1013,7 @@ static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device,
|
|||
DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64
|
||||
" len %" PRIu64 "\n",
|
||||
DPxPTR(Ptr), Offset, Size);
|
||||
Ret = targetDataContiguous(loc, Device, ArgsBase, Ptr, Size, ArgType,
|
||||
Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType,
|
||||
AsyncInfo);
|
||||
}
|
||||
return Ret;
|
||||
|
@ -1031,7 +1030,7 @@ static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
|
|||
}
|
||||
|
||||
/// Internal function to pass data to/from the target.
|
||||
int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
||||
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) {
|
||||
|
@ -1048,7 +1047,7 @@ int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
|||
DP("Calling targetDataMapper for the %dth argument\n", I);
|
||||
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
int Ret = targetDataMapper(loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataUpdate);
|
||||
|
||||
|
@ -1071,10 +1070,10 @@ int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
|||
NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
|
||||
int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
|
||||
Ret = targetDataNonContiguous(
|
||||
loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
|
||||
Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
|
||||
/*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo);
|
||||
} else {
|
||||
Ret = targetDataContiguous(loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
Ret = targetDataContiguous(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], AsyncInfo);
|
||||
}
|
||||
if (Ret == OFFLOAD_FAIL)
|
||||
|
@ -1321,7 +1320,7 @@ public:
|
|||
/// Process data before launching the kernel, including calling targetDataBegin
|
||||
/// to map and transfer data to target device, transferring (first-)private
|
||||
/// variables.
|
||||
static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
|
||||
static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgBases, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
|
@ -1329,9 +1328,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
|
|||
std::vector<ptrdiff_t> &TgtOffsets,
|
||||
PrivateArgumentManagerTy &PrivateArgumentManager,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", loc);
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc);
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
int Ret = targetDataBegin(loc, Device, ArgNum, ArgBases, Args, ArgSizes,
|
||||
int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataBegin failed, abort target.\n");
|
||||
|
@ -1452,17 +1451,17 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
|
|||
|
||||
/// Process data after launching the kernel, including transferring data back to
|
||||
/// host if needed and deallocating target memory of (first-)private variables.
|
||||
static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
|
||||
static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgBases, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
PrivateArgumentManagerTy &PrivateArgumentManager,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", loc);
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc);
|
||||
DeviceTy &Device = *PM->Devices[DeviceId];
|
||||
|
||||
// Move data from device.
|
||||
int Ret = targetDataEnd(loc, Device, ArgNum, ArgBases, Args, ArgSizes,
|
||||
int Ret = targetDataEnd(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT("Call to targetDataEnd failed, abort target.\n");
|
||||
|
@ -1486,7 +1485,7 @@ static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr,
|
|||
/// performs the same action as data_update and data_end above. This function
|
||||
/// returns 0 if it was able to transfer the execution to a target and an
|
||||
/// integer different from zero otherwise.
|
||||
int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
|
||||
int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy &AsyncInfo) {
|
||||
|
@ -1527,7 +1526,7 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
|||
int Ret;
|
||||
if (ArgNum) {
|
||||
// Process data, such as data mapping, before launching the kernel
|
||||
Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
|
||||
Ret = processDataBefore(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
|
||||
ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
|
||||
TgtOffsets, PrivateArgumentManager, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
|
@ -1543,7 +1542,7 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
|||
|
||||
{
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT(
|
||||
IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc);
|
||||
IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc);
|
||||
if (IsTeamConstruct)
|
||||
Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
|
||||
TgtArgs.size(), TeamNum, ThreadLimit,
|
||||
|
@ -1561,7 +1560,7 @@ int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
|||
if (ArgNum) {
|
||||
// Transfer data back and deallocate target memory for (first-)private
|
||||
// variables
|
||||
Ret = processDataAfter(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
|
||||
Ret = processDataAfter(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
|
||||
ArgSizes, ArgTypes, ArgNames, ArgMappers,
|
||||
PrivateArgumentManager, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
|
|
|
@ -20,34 +20,34 @@
|
|||
|
||||
#include <cstdint>
|
||||
|
||||
extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, map_var_info_t *arg_names,
|
||||
void **arg_mappers, AsyncInfoTy &AsyncInfo,
|
||||
extern int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
bool FromMapper = false);
|
||||
|
||||
extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
|
||||
extern int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *arg_names,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
bool FromMapper = false);
|
||||
|
||||
extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num,
|
||||
void **args_base, void **args, int64_t *arg_sizes,
|
||||
int64_t *arg_types, map_var_info_t *arg_names,
|
||||
void **arg_mappers, AsyncInfoTy &AsyncInfo,
|
||||
extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
bool FromMapper = false);
|
||||
|
||||
extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *arg_names,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit,
|
||||
int IsTeamConstruct, AsyncInfoTy &AsyncInfo);
|
||||
|
||||
extern void handleTargetOutcome(bool Success, ident_t *Loc);
|
||||
extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
|
||||
extern void *targetAllocExplicit(size_t size, int device_num, int kind,
|
||||
const char *name);
|
||||
extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
|
||||
const char *Name);
|
||||
|
||||
// This structure stores information of a mapped memory region.
|
||||
struct MapComponentInfoTy {
|
||||
|
@ -156,33 +156,33 @@ printKernelArguments(const ident_t *Loc, const int64_t DeviceId,
|
|||
const int32_t ArgNum, const int64_t *ArgSizes,
|
||||
const int64_t *ArgTypes, const map_var_info_t *ArgNames,
|
||||
const char *RegionType) {
|
||||
SourceInfo info(Loc);
|
||||
SourceInfo Info(Loc);
|
||||
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s at %s:%d:%d with %d arguments:\n",
|
||||
RegionType, info.getFilename(), info.getLine(), info.getColumn(),
|
||||
RegionType, Info.getFilename(), Info.getLine(), Info.getColumn(),
|
||||
ArgNum);
|
||||
|
||||
for (int32_t i = 0; i < ArgNum; ++i) {
|
||||
const map_var_info_t varName = (ArgNames) ? ArgNames[i] : nullptr;
|
||||
const char *type = nullptr;
|
||||
const char *implicit =
|
||||
(ArgTypes[i] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
|
||||
if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO && ArgTypes[i] & OMP_TGT_MAPTYPE_FROM)
|
||||
type = "tofrom";
|
||||
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO)
|
||||
type = "to";
|
||||
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_FROM)
|
||||
type = "from";
|
||||
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_PRIVATE)
|
||||
type = "private";
|
||||
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_LITERAL)
|
||||
type = "firstprivate";
|
||||
else if (ArgSizes[i] != 0)
|
||||
type = "alloc";
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
const map_var_info_t VarName = (ArgNames) ? ArgNames[I] : nullptr;
|
||||
const char *Type = nullptr;
|
||||
const char *Implicit =
|
||||
(ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
|
||||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO && ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
|
||||
Type = "tofrom";
|
||||
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO)
|
||||
Type = "to";
|
||||
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
|
||||
Type = "from";
|
||||
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)
|
||||
Type = "private";
|
||||
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL)
|
||||
Type = "firstprivate";
|
||||
else if (ArgSizes[I] != 0)
|
||||
Type = "alloc";
|
||||
else
|
||||
type = "use_address";
|
||||
Type = "use_address";
|
||||
|
||||
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", type,
|
||||
getNameFromMapping(varName).c_str(), ArgSizes[i], implicit);
|
||||
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", Type,
|
||||
getNameFromMapping(VarName).c_str(), ArgSizes[I], Implicit);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ __attribute__((destructor(101))) void deinit() {
|
|||
#endif
|
||||
}
|
||||
|
||||
void RTLsTy::LoadRTLs() {
|
||||
void RTLsTy::loadRTLs() {
|
||||
// Parse environment variable OMP_TARGET_OFFLOAD (if set)
|
||||
PM->TargetOffloadPolicy =
|
||||
(kmp_target_offload_kind_t)__kmpc_get_target_offload();
|
||||
|
@ -92,9 +92,9 @@ void RTLsTy::LoadRTLs() {
|
|||
// is correct and if they are supporting any devices.
|
||||
for (auto *Name : RTLNames) {
|
||||
DP("Loading library '%s'...\n", Name);
|
||||
void *dynlib_handle = dlopen(Name, RTLD_NOW);
|
||||
void *DynlibHandle = dlopen(Name, RTLD_NOW);
|
||||
|
||||
if (!dynlib_handle) {
|
||||
if (!DynlibHandle) {
|
||||
// Library does not exist or cannot be found.
|
||||
DP("Unable to load library '%s': %s!\n", Name, dlerror());
|
||||
continue;
|
||||
|
@ -110,34 +110,34 @@ void RTLsTy::LoadRTLs() {
|
|||
bool ValidPlugin = true;
|
||||
|
||||
if (!(*((void **)&R.is_valid_binary) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_is_valid_binary")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.number_of_devices) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_number_of_devices")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_number_of_devices")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.init_device) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_init_device")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_device")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.load_binary) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_load_binary")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_load_binary")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.data_alloc) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_alloc")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_alloc")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.data_submit) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_submit")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_submit")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.data_retrieve) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_retrieve")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_retrieve")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.data_delete) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_delete")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_delete")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.run_region) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_run_target_region")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_run_target_region")))
|
||||
ValidPlugin = false;
|
||||
if (!(*((void **)&R.run_team_region) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region")))
|
||||
dlsym(DynlibHandle, "__tgt_rtl_run_target_team_region")))
|
||||
ValidPlugin = false;
|
||||
|
||||
// Invalid plugin
|
||||
|
@ -155,7 +155,7 @@ void RTLsTy::LoadRTLs() {
|
|||
continue;
|
||||
}
|
||||
|
||||
R.LibraryHandler = dynlib_handle;
|
||||
R.LibraryHandler = DynlibHandle;
|
||||
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
R.RTLName = Name;
|
||||
|
@ -166,48 +166,45 @@ void RTLsTy::LoadRTLs() {
|
|||
|
||||
// Optional functions
|
||||
*((void **)&R.deinit_device) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_deinit_device");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_deinit_device");
|
||||
*((void **)&R.init_requires) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_init_requires");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_requires");
|
||||
*((void **)&R.data_submit_async) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_submit_async");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_submit_async");
|
||||
*((void **)&R.data_retrieve_async) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_retrieve_async");
|
||||
*((void **)&R.run_region_async) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_run_target_region_async");
|
||||
*((void **)&R.run_team_region_async) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async");
|
||||
*((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_run_target_team_region_async");
|
||||
*((void **)&R.synchronize) = dlsym(DynlibHandle, "__tgt_rtl_synchronize");
|
||||
*((void **)&R.data_exchange) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_exchange");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_exchange");
|
||||
*((void **)&R.data_exchange_async) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_data_exchange_async");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_data_exchange_async");
|
||||
*((void **)&R.is_data_exchangable) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_is_data_exchangable");
|
||||
*((void **)&R.register_lib) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_register_lib");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_is_data_exchangable");
|
||||
*((void **)&R.register_lib) = dlsym(DynlibHandle, "__tgt_rtl_register_lib");
|
||||
*((void **)&R.unregister_lib) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_unregister_lib");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_unregister_lib");
|
||||
*((void **)&R.supports_empty_images) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_supports_empty_images");
|
||||
*((void **)&R.set_info_flag) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_set_info_flag");
|
||||
*((void **)&R.print_device_info) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
|
||||
*((void **)&R.create_event) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_create_event");
|
||||
*((void **)&R.record_event) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_record_event");
|
||||
*((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event");
|
||||
*((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_print_device_info");
|
||||
*((void **)&R.create_event) = dlsym(DynlibHandle, "__tgt_rtl_create_event");
|
||||
*((void **)&R.record_event) = dlsym(DynlibHandle, "__tgt_rtl_record_event");
|
||||
*((void **)&R.wait_event) = dlsym(DynlibHandle, "__tgt_rtl_wait_event");
|
||||
*((void **)&R.sync_event) = dlsym(DynlibHandle, "__tgt_rtl_sync_event");
|
||||
*((void **)&R.destroy_event) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_destroy_event");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_destroy_event");
|
||||
*((void **)&R.release_async_info) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_release_async_info");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_release_async_info");
|
||||
*((void **)&R.init_async_info) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_init_async_info");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_async_info");
|
||||
*((void **)&R.init_device_info) =
|
||||
dlsym(dynlib_handle, "__tgt_rtl_init_device_info");
|
||||
dlsym(DynlibHandle, "__tgt_rtl_init_device_info");
|
||||
}
|
||||
|
||||
DP("RTLs loaded!\n");
|
||||
|
@ -218,9 +215,9 @@ void RTLsTy::LoadRTLs() {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Functionality for registering libs
|
||||
|
||||
static void RegisterImageIntoTranslationTable(TranslationTable &TT,
|
||||
static void registerImageIntoTranslationTable(TranslationTable &TT,
|
||||
RTLInfoTy &RTL,
|
||||
__tgt_device_image *image) {
|
||||
__tgt_device_image *Image) {
|
||||
|
||||
// same size, as when we increase one, we also increase the other.
|
||||
assert(TT.TargetsTable.size() == TT.TargetsImages.size() &&
|
||||
|
@ -236,11 +233,11 @@ static void RegisterImageIntoTranslationTable(TranslationTable &TT,
|
|||
}
|
||||
|
||||
// Register the image in all devices for this target type.
|
||||
for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) {
|
||||
for (int32_t I = 0; I < RTL.NumberOfDevices; ++I) {
|
||||
// If we are changing the image we are also invalidating the target table.
|
||||
if (TT.TargetsImages[RTL.Idx + i] != image) {
|
||||
TT.TargetsImages[RTL.Idx + i] = image;
|
||||
TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table.
|
||||
if (TT.TargetsImages[RTL.Idx + I] != Image) {
|
||||
TT.TargetsImages[RTL.Idx + I] = Image;
|
||||
TT.TargetsTable[RTL.Idx + I] = 0; // lazy initialization of target table.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -248,29 +245,29 @@ static void RegisterImageIntoTranslationTable(TranslationTable &TT,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Functionality for registering Ctors/Dtors
|
||||
|
||||
static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc,
|
||||
__tgt_device_image *img,
|
||||
static void registerGlobalCtorsDtorsForImage(__tgt_bin_desc *Desc,
|
||||
__tgt_device_image *Img,
|
||||
RTLInfoTy *RTL) {
|
||||
|
||||
for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) {
|
||||
DeviceTy &Device = *PM->Devices[RTL->Idx + i];
|
||||
for (int32_t I = 0; I < RTL->NumberOfDevices; ++I) {
|
||||
DeviceTy &Device = *PM->Devices[RTL->Idx + I];
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
Device.HasPendingGlobals = true;
|
||||
for (__tgt_offload_entry *entry = img->EntriesBegin;
|
||||
entry != img->EntriesEnd; ++entry) {
|
||||
if (entry->flags & OMP_DECLARE_TARGET_CTOR) {
|
||||
for (__tgt_offload_entry *Entry = Img->EntriesBegin;
|
||||
Entry != Img->EntriesEnd; ++Entry) {
|
||||
if (Entry->flags & OMP_DECLARE_TARGET_CTOR) {
|
||||
DP("Adding ctor " DPxMOD " to the pending list.\n",
|
||||
DPxPTR(entry->addr));
|
||||
Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr);
|
||||
} else if (entry->flags & OMP_DECLARE_TARGET_DTOR) {
|
||||
DPxPTR(Entry->addr));
|
||||
Device.PendingCtorsDtors[Desc].PendingCtors.push_back(Entry->addr);
|
||||
} else if (Entry->flags & OMP_DECLARE_TARGET_DTOR) {
|
||||
// Dtors are pushed in reverse order so they are executed from end
|
||||
// to beginning when unregistering the library!
|
||||
DP("Adding dtor " DPxMOD " to the pending list.\n",
|
||||
DPxPTR(entry->addr));
|
||||
Device.PendingCtorsDtors[desc].PendingDtors.push_front(entry->addr);
|
||||
DPxPTR(Entry->addr));
|
||||
Device.PendingCtorsDtors[Desc].PendingDtors.push_front(Entry->addr);
|
||||
}
|
||||
|
||||
if (entry->flags & OMP_DECLARE_TARGET_LINK) {
|
||||
if (Entry->flags & OMP_DECLARE_TARGET_LINK) {
|
||||
DP("The \"link\" attribute is not yet supported!\n");
|
||||
}
|
||||
}
|
||||
|
@ -278,16 +275,16 @@ static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc,
|
|||
}
|
||||
}
|
||||
|
||||
void RTLsTy::RegisterRequires(int64_t flags) {
|
||||
void RTLsTy::registerRequires(int64_t Flags) {
|
||||
// TODO: add more elaborate check.
|
||||
// Minimal check: only set requires flags if previous value
|
||||
// is undefined. This ensures that only the first call to this
|
||||
// function will set the requires flags. All subsequent calls
|
||||
// will be checked for compatibility.
|
||||
assert(flags != OMP_REQ_UNDEFINED &&
|
||||
assert(Flags != OMP_REQ_UNDEFINED &&
|
||||
"illegal undefined flag for requires directive!");
|
||||
if (RequiresFlags == OMP_REQ_UNDEFINED) {
|
||||
RequiresFlags = flags;
|
||||
RequiresFlags = Flags;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -297,17 +294,17 @@ void RTLsTy::RegisterRequires(int64_t flags) {
|
|||
// - unified_address
|
||||
// - unified_shared_memory
|
||||
if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) !=
|
||||
(flags & OMP_REQ_REVERSE_OFFLOAD)) {
|
||||
(Flags & OMP_REQ_REVERSE_OFFLOAD)) {
|
||||
FATAL_MESSAGE0(
|
||||
1, "'#pragma omp requires reverse_offload' not used consistently!");
|
||||
}
|
||||
if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) !=
|
||||
(flags & OMP_REQ_UNIFIED_ADDRESS)) {
|
||||
(Flags & OMP_REQ_UNIFIED_ADDRESS)) {
|
||||
FATAL_MESSAGE0(
|
||||
1, "'#pragma omp requires unified_address' not used consistently!");
|
||||
}
|
||||
if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) !=
|
||||
(flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
(Flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
FATAL_MESSAGE0(
|
||||
1,
|
||||
"'#pragma omp requires unified_shared_memory' not used consistently!");
|
||||
|
@ -316,21 +313,21 @@ void RTLsTy::RegisterRequires(int64_t flags) {
|
|||
// TODO: insert any other missing checks
|
||||
|
||||
DP("New requires flags %" PRId64 " compatible with existing %" PRId64 "!\n",
|
||||
flags, RequiresFlags);
|
||||
Flags, RequiresFlags);
|
||||
}
|
||||
|
||||
void RTLsTy::initRTLonce(RTLInfoTy &R) {
|
||||
// If this RTL is not already in use, initialize it.
|
||||
if (!R.isUsed && R.NumberOfDevices != 0) {
|
||||
if (!R.IsUsed && R.NumberOfDevices != 0) {
|
||||
// Initialize the device information for the RTL we are about to use.
|
||||
const size_t Start = PM->Devices.size();
|
||||
PM->Devices.reserve(Start + R.NumberOfDevices);
|
||||
for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) {
|
||||
for (int32_t DeviceId = 0; DeviceId < R.NumberOfDevices; DeviceId++) {
|
||||
PM->Devices.push_back(std::make_unique<DeviceTy>(&R));
|
||||
// global device ID
|
||||
PM->Devices[Start + device_id]->DeviceID = Start + device_id;
|
||||
PM->Devices[Start + DeviceId]->DeviceID = Start + DeviceId;
|
||||
// RTL local device ID
|
||||
PM->Devices[Start + device_id]->RTLDeviceID = device_id;
|
||||
PM->Devices[Start + DeviceId]->RTLDeviceID = DeviceId;
|
||||
}
|
||||
|
||||
// Initialize the index of this RTL and save it in the used RTLs.
|
||||
|
@ -339,7 +336,7 @@ void RTLsTy::initRTLonce(RTLInfoTy &R) {
|
|||
: UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices;
|
||||
assert((size_t)R.Idx == Start &&
|
||||
"RTL index should equal the number of devices used so far.");
|
||||
R.isUsed = true;
|
||||
R.IsUsed = true;
|
||||
UsedRTLs.push_back(&R);
|
||||
|
||||
DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx);
|
||||
|
@ -351,58 +348,58 @@ void RTLsTy::initAllRTLs() {
|
|||
initRTLonce(R);
|
||||
}
|
||||
|
||||
void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
||||
void RTLsTy::registerLib(__tgt_bin_desc *Desc) {
|
||||
PM->RTLsMtx.lock();
|
||||
// Register the images with the RTLs that understand them, if any.
|
||||
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
|
||||
for (int32_t I = 0; I < Desc->NumDeviceImages; ++I) {
|
||||
// Obtain the image.
|
||||
__tgt_device_image *img = &desc->DeviceImages[i];
|
||||
__tgt_device_image *Img = &Desc->DeviceImages[I];
|
||||
|
||||
RTLInfoTy *FoundRTL = nullptr;
|
||||
|
||||
// Scan the RTLs that have associated images until we find one that supports
|
||||
// the current image.
|
||||
for (auto &R : AllRTLs) {
|
||||
if (!R.is_valid_binary(img)) {
|
||||
if (!R.is_valid_binary(Img)) {
|
||||
DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
|
||||
DPxPTR(img->ImageStart), R.RTLName.c_str());
|
||||
DPxPTR(Img->ImageStart), R.RTLName.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
DP("Image " DPxMOD " is compatible with RTL %s!\n",
|
||||
DPxPTR(img->ImageStart), R.RTLName.c_str());
|
||||
DPxPTR(Img->ImageStart), R.RTLName.c_str());
|
||||
|
||||
initRTLonce(R);
|
||||
|
||||
// Initialize (if necessary) translation table for this library.
|
||||
PM->TrlTblMtx.lock();
|
||||
if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) {
|
||||
PM->HostEntriesBeginRegistrationOrder.push_back(desc->HostEntriesBegin);
|
||||
if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
|
||||
PM->HostEntriesBeginRegistrationOrder.push_back(Desc->HostEntriesBegin);
|
||||
TranslationTable &TransTable =
|
||||
(PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin;
|
||||
TransTable.HostTable.EntriesEnd = desc->HostEntriesEnd;
|
||||
(PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
|
||||
TransTable.HostTable.EntriesBegin = Desc->HostEntriesBegin;
|
||||
TransTable.HostTable.EntriesEnd = Desc->HostEntriesEnd;
|
||||
}
|
||||
|
||||
// Retrieve translation table for this library.
|
||||
TranslationTable &TransTable =
|
||||
(PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
(PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
|
||||
|
||||
DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(img->ImageStart),
|
||||
DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart),
|
||||
R.RTLName.c_str());
|
||||
RegisterImageIntoTranslationTable(TransTable, R, img);
|
||||
registerImageIntoTranslationTable(TransTable, R, Img);
|
||||
PM->TrlTblMtx.unlock();
|
||||
FoundRTL = &R;
|
||||
|
||||
// Load ctors/dtors for static objects
|
||||
RegisterGlobalCtorsDtorsForImage(desc, img, FoundRTL);
|
||||
registerGlobalCtorsDtorsForImage(Desc, Img, FoundRTL);
|
||||
|
||||
// if an RTL was found we are done - proceed to register the next image
|
||||
break;
|
||||
}
|
||||
|
||||
if (!FoundRTL) {
|
||||
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart));
|
||||
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart));
|
||||
}
|
||||
}
|
||||
PM->RTLsMtx.unlock();
|
||||
|
@ -410,14 +407,14 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
DP("Done registering entries!\n");
|
||||
}
|
||||
|
||||
void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
||||
void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) {
|
||||
DP("Unloading target library!\n");
|
||||
|
||||
PM->RTLsMtx.lock();
|
||||
// Find which RTL understands each image, if any.
|
||||
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
|
||||
for (int32_t I = 0; I < Desc->NumDeviceImages; ++I) {
|
||||
// Obtain the image.
|
||||
__tgt_device_image *img = &desc->DeviceImages[i];
|
||||
__tgt_device_image *Img = &Desc->DeviceImages[I];
|
||||
|
||||
RTLInfoTy *FoundRTL = NULL;
|
||||
|
||||
|
@ -425,36 +422,36 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
// the current image. We only need to scan RTLs that are already being used.
|
||||
for (auto *R : UsedRTLs) {
|
||||
|
||||
assert(R->isUsed && "Expecting used RTLs.");
|
||||
assert(R->IsUsed && "Expecting used RTLs.");
|
||||
|
||||
if (!R->is_valid_binary(img)) {
|
||||
if (!R->is_valid_binary(Img)) {
|
||||
DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n",
|
||||
DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
continue;
|
||||
}
|
||||
|
||||
DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n",
|
||||
DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
|
||||
FoundRTL = R;
|
||||
|
||||
// Execute dtors for static objects if the device has been used, i.e.
|
||||
// if its PendingCtors list has been emptied.
|
||||
for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) {
|
||||
DeviceTy &Device = *PM->Devices[FoundRTL->Idx + i];
|
||||
for (int32_t I = 0; I < FoundRTL->NumberOfDevices; ++I) {
|
||||
DeviceTy &Device = *PM->Devices[FoundRTL->Idx + I];
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
|
||||
if (Device.PendingCtorsDtors[Desc].PendingCtors.empty()) {
|
||||
AsyncInfoTy AsyncInfo(Device);
|
||||
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
|
||||
int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr,
|
||||
for (auto &Dtor : Device.PendingCtorsDtors[Desc].PendingDtors) {
|
||||
int Rc = target(nullptr, Device, Dtor, 0, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, 1, 1, true /*team*/,
|
||||
AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(Dtor));
|
||||
}
|
||||
}
|
||||
// Remove this library's entry from PendingCtorsDtors
|
||||
Device.PendingCtorsDtors.erase(desc);
|
||||
Device.PendingCtorsDtors.erase(Desc);
|
||||
// All constructors have been issued, wait for them now.
|
||||
if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS)
|
||||
DP("Failed synchronizing destructors kernels.\n");
|
||||
|
@ -463,7 +460,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
}
|
||||
|
||||
DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n",
|
||||
DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler));
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -471,7 +468,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
// if no RTL was found proceed to unregister the next image
|
||||
if (!FoundRTL) {
|
||||
DP("No RTLs in use support the image " DPxMOD "!\n",
|
||||
DPxPTR(img->ImageStart));
|
||||
DPxPTR(Img->ImageStart));
|
||||
}
|
||||
}
|
||||
PM->RTLsMtx.unlock();
|
||||
|
@ -479,22 +476,22 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
|
||||
// Remove entries from PM->HostPtrToTableMap
|
||||
PM->TblMapMtx.lock();
|
||||
for (__tgt_offload_entry *cur = desc->HostEntriesBegin;
|
||||
cur < desc->HostEntriesEnd; ++cur) {
|
||||
PM->HostPtrToTableMap.erase(cur->addr);
|
||||
for (__tgt_offload_entry *Cur = Desc->HostEntriesBegin;
|
||||
Cur < Desc->HostEntriesEnd; ++Cur) {
|
||||
PM->HostPtrToTableMap.erase(Cur->addr);
|
||||
}
|
||||
|
||||
// Remove translation table for this descriptor.
|
||||
auto TransTable =
|
||||
PM->HostEntriesBeginToTransTable.find(desc->HostEntriesBegin);
|
||||
PM->HostEntriesBeginToTransTable.find(Desc->HostEntriesBegin);
|
||||
if (TransTable != PM->HostEntriesBeginToTransTable.end()) {
|
||||
DP("Removing translation table for descriptor " DPxMOD "\n",
|
||||
DPxPTR(desc->HostEntriesBegin));
|
||||
DPxPTR(Desc->HostEntriesBegin));
|
||||
PM->HostEntriesBeginToTransTable.erase(TransTable);
|
||||
} else {
|
||||
DP("Translation table for descriptor " DPxMOD " cannot be found, probably "
|
||||
"it has been already removed.\n",
|
||||
DPxPTR(desc->HostEntriesBegin));
|
||||
DPxPTR(Desc->HostEntriesBegin));
|
||||
}
|
||||
|
||||
PM->TblMapMtx.unlock();
|
||||
|
|
Loading…
Reference in New Issue