forked from OSchip/llvm-project
[Libomptarget][NFC] Move global Libomptarget state to a struct
Presently, there a number of global variables in libomptarget (devices, RTLs, tables, mutexes, etc.) that are not placed within a struct. This patch places them into a struct ``PluginManager``. All of the functions that act on this data remain free. Differential Revision: https://reviews.llvm.org/D90519
This commit is contained in:
parent
0d4e1729e3
commit
a95b25b29e
|
@ -19,13 +19,13 @@
|
|||
#include <cstdlib>
|
||||
|
||||
EXTERN int omp_get_num_devices(void) {
|
||||
RTLsMtx->lock();
|
||||
size_t Devices_size = Devices.size();
|
||||
RTLsMtx->unlock();
|
||||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
|
||||
DP("Call to omp_get_num_devices returning %zd\n", Devices_size);
|
||||
DP("Call to omp_get_num_devices returning %zd\n", DevicesSize);
|
||||
|
||||
return Devices_size;
|
||||
return DevicesSize;
|
||||
}
|
||||
|
||||
EXTERN int omp_get_initial_device(void) {
|
||||
|
@ -56,7 +56,7 @@ EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
rc = Devices[device_num].allocData(size);
|
||||
rc = PM->Devices[device_num].allocData(size);
|
||||
DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
|
||||
return rc;
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
|||
return;
|
||||
}
|
||||
|
||||
Devices[device_num].deleteData(device_ptr);
|
||||
PM->Devices[device_num].deleteData(device_ptr);
|
||||
DP("omp_target_free deallocated device ptr\n");
|
||||
}
|
||||
|
||||
|
@ -99,16 +99,16 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
|
|||
return true;
|
||||
}
|
||||
|
||||
RTLsMtx->lock();
|
||||
size_t Devices_size = Devices.size();
|
||||
RTLsMtx->unlock();
|
||||
if (Devices_size <= (size_t)device_num) {
|
||||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
if (DevicesSize <= (size_t)device_num) {
|
||||
DP("Call to omp_target_is_present with invalid device ID, returning "
|
||||
"false\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DeviceTy& Device = Devices[device_num];
|
||||
DeviceTy &Device = PM->Devices[device_num];
|
||||
bool IsLast; // not used
|
||||
bool IsHostPtr;
|
||||
void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr);
|
||||
|
@ -117,7 +117,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
|
|||
// getTgtPtrBegin() function which means that there is no device
|
||||
// corresponding point for ptr. This function should return false
|
||||
// in that situation.
|
||||
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
rc = !IsHostPtr;
|
||||
DP("Call to omp_target_is_present returns %d\n", rc);
|
||||
return rc;
|
||||
|
@ -157,16 +157,16 @@ EXTERN int omp_target_memcpy(void *dst, void *src, size_t length,
|
|||
rc = OFFLOAD_FAIL;
|
||||
} else if (src_device == omp_get_initial_device()) {
|
||||
DP("copy from host to device\n");
|
||||
DeviceTy& DstDev = Devices[dst_device];
|
||||
DeviceTy &DstDev = PM->Devices[dst_device];
|
||||
rc = DstDev.submitData(dstAddr, srcAddr, length, nullptr);
|
||||
} else if (dst_device == omp_get_initial_device()) {
|
||||
DP("copy from device to host\n");
|
||||
DeviceTy& SrcDev = Devices[src_device];
|
||||
DeviceTy &SrcDev = PM->Devices[src_device];
|
||||
rc = SrcDev.retrieveData(dstAddr, srcAddr, length, nullptr);
|
||||
} else {
|
||||
DP("copy from device to device\n");
|
||||
DeviceTy &SrcDev = Devices[src_device];
|
||||
DeviceTy &DstDev = Devices[dst_device];
|
||||
DeviceTy &SrcDev = PM->Devices[src_device];
|
||||
DeviceTy &DstDev = PM->Devices[dst_device];
|
||||
// First try to use D2D memcpy which is more efficient. If fails, fall back
|
||||
// to unefficient way.
|
||||
if (SrcDev.isDataExchangable(DstDev)) {
|
||||
|
@ -263,7 +263,7 @@ EXTERN int omp_target_associate_ptr(void *host_ptr, void *device_ptr,
|
|||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
DeviceTy& Device = Devices[device_num];
|
||||
DeviceTy &Device = PM->Devices[device_num];
|
||||
void *device_addr = (void *)((uint64_t)device_ptr + (uint64_t)device_offset);
|
||||
int rc = Device.associatePtr(host_ptr, device_addr, size);
|
||||
DP("omp_target_associate_ptr returns %d\n", rc);
|
||||
|
@ -290,7 +290,7 @@ EXTERN int omp_target_disassociate_ptr(void *host_ptr, int device_num) {
|
|||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
DeviceTy& Device = Devices[device_num];
|
||||
DeviceTy &Device = PM->Devices[device_num];
|
||||
int rc = Device.disassociatePtr(host_ptr);
|
||||
DP("omp_target_disassociate_ptr returns %d\n", rc);
|
||||
return rc;
|
||||
|
|
|
@ -20,9 +20,6 @@
|
|||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
/// Map between Device ID (i.e. openmp device id) and its DeviceTy.
|
||||
DevicesTy Devices;
|
||||
|
||||
DeviceTy::DeviceTy(const DeviceTy &D)
|
||||
: DeviceID(D.DeviceID), RTL(D.RTL), RTLDeviceID(D.RTLDeviceID),
|
||||
IsInit(D.IsInit), InitFlag(), HasPendingGlobals(D.HasPendingGlobals),
|
||||
|
@ -239,7 +236,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
|
|||
MESSAGE("device mapping required by 'present' map type modifier does not "
|
||||
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
||||
DPxPTR(HstPtrBegin), Size);
|
||||
} else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
!HasCloseModifier) {
|
||||
// If unified shared memory is active, implicitly mapped variables that are
|
||||
// not privatized use host address. Any explicitly mapped variables also use
|
||||
|
@ -305,7 +302,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|||
Size, (UpdateRefCount ? " updated" : ""),
|
||||
HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
|
||||
rc = (void *)tp;
|
||||
} else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
// If the value isn't found in the mapping and unified shared memory
|
||||
// is on then it means we have stumbled upon a value which we need to
|
||||
// use directly from the host.
|
||||
|
@ -335,7 +332,8 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
|
|||
|
||||
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
|
||||
bool HasCloseModifier) {
|
||||
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
!HasCloseModifier)
|
||||
return OFFLOAD_SUCCESS;
|
||||
// Check if the pointer is contained in any sub-nodes.
|
||||
int rc;
|
||||
|
@ -370,7 +368,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
|
|||
void DeviceTy::init() {
|
||||
// Make call to init_requires if it exists for this plugin.
|
||||
if (RTL->init_requires)
|
||||
RTL->init_requires(RTLs->RequiresFlags);
|
||||
RTL->init_requires(PM->RTLs.RequiresFlags);
|
||||
int32_t Ret = RTL->init_device(RTLDeviceID);
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return;
|
||||
|
@ -512,16 +510,16 @@ bool device_is_ready(int device_num) {
|
|||
DP("Checking whether device %d is ready.\n", device_num);
|
||||
// Devices.size() can only change while registering a new
|
||||
// library, so try to acquire the lock of RTLs' mutex.
|
||||
RTLsMtx->lock();
|
||||
size_t Devices_size = Devices.size();
|
||||
RTLsMtx->unlock();
|
||||
if (Devices_size <= (size_t)device_num) {
|
||||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
if (DevicesSize <= (size_t)device_num) {
|
||||
DP("Device ID %d does not have a matching RTL\n", device_num);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get device info
|
||||
DeviceTy &Device = Devices[device_num];
|
||||
DeviceTy &Device = PM->Devices[device_num];
|
||||
|
||||
DP("Is the device %d (local ID %d) initialized? %d\n", device_num,
|
||||
Device.RTLDeviceID, Device.IsInit);
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "rtl.h"
|
||||
|
||||
// Forward declarations.
|
||||
struct RTLInfoTy;
|
||||
struct __tgt_bin_desc;
|
||||
|
@ -29,6 +31,14 @@ struct __tgt_target_table;
|
|||
struct __tgt_async_info;
|
||||
class MemoryManagerTy;
|
||||
|
||||
// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition
|
||||
enum kmp_target_offload_kind {
|
||||
tgt_disabled = 0,
|
||||
tgt_default = 1,
|
||||
tgt_mandatory = 2
|
||||
};
|
||||
typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
|
||||
|
||||
/// Map between host data and target data.
|
||||
struct HostDataToTargetTy {
|
||||
uintptr_t HstPtrBase; // host info.
|
||||
|
@ -221,8 +231,31 @@ private:
|
|||
|
||||
/// Map between Device ID (i.e. openmp device id) and its DeviceTy.
|
||||
typedef std::vector<DeviceTy> DevicesTy;
|
||||
extern DevicesTy Devices;
|
||||
|
||||
extern bool device_is_ready(int device_num);
|
||||
|
||||
/// Struct for the data required to handle plugins
|
||||
struct PluginManager {
|
||||
/// RTLs identified on the host
|
||||
RTLsTy RTLs;
|
||||
|
||||
/// Devices associated with RTLs
|
||||
DevicesTy Devices;
|
||||
std::mutex RTLsMtx; ///< For RTLs and Devices
|
||||
|
||||
/// Translation table retreived from the binary
|
||||
HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
|
||||
std::mutex TrlTblMtx; ///< For Translation Table
|
||||
|
||||
/// Map from ptrs on the host to an entry in the Translation Table
|
||||
HostPtrToTableMapTy HostPtrToTableMap;
|
||||
std::mutex TblMapMtx; ///< For HostPtrToTableMap
|
||||
|
||||
// Store target policy (disabled, mandatory, default)
|
||||
kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
|
||||
std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy
|
||||
};
|
||||
|
||||
extern PluginManager *PM;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,75 +20,73 @@
|
|||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
|
||||
// Store target policy (disabled, mandatory, default)
|
||||
kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default;
|
||||
std::mutex TargetOffloadMtx;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// manage the success or failure of a target construct
|
||||
static void HandleDefaultTargetOffload() {
|
||||
TargetOffloadMtx.lock();
|
||||
if (TargetOffloadPolicy == tgt_default) {
|
||||
PM->TargetOffloadMtx.lock();
|
||||
if (PM->TargetOffloadPolicy == tgt_default) {
|
||||
if (omp_get_num_devices() > 0) {
|
||||
DP("Default TARGET OFFLOAD policy is now mandatory "
|
||||
"(devices were found)\n");
|
||||
TargetOffloadPolicy = tgt_mandatory;
|
||||
PM->TargetOffloadPolicy = tgt_mandatory;
|
||||
} else {
|
||||
DP("Default TARGET OFFLOAD policy is now disabled "
|
||||
"(no devices were found)\n");
|
||||
TargetOffloadPolicy = tgt_disabled;
|
||||
PM->TargetOffloadPolicy = tgt_disabled;
|
||||
}
|
||||
}
|
||||
TargetOffloadMtx.unlock();
|
||||
PM->TargetOffloadMtx.unlock();
|
||||
}
|
||||
|
||||
static int IsOffloadDisabled() {
|
||||
if (TargetOffloadPolicy == tgt_default) HandleDefaultTargetOffload();
|
||||
return TargetOffloadPolicy == tgt_disabled;
|
||||
if (PM->TargetOffloadPolicy == tgt_default)
|
||||
HandleDefaultTargetOffload();
|
||||
return PM->TargetOffloadPolicy == tgt_disabled;
|
||||
}
|
||||
|
||||
static void HandleTargetOutcome(bool success) {
|
||||
switch (TargetOffloadPolicy) {
|
||||
case tgt_disabled:
|
||||
if (success) {
|
||||
FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled");
|
||||
}
|
||||
break;
|
||||
case tgt_default:
|
||||
FATAL_MESSAGE0(1, "default offloading policy must be switched to "
|
||||
"mandatory or disabled");
|
||||
break;
|
||||
case tgt_mandatory:
|
||||
if (!success) {
|
||||
if (getInfoLevel() > 1)
|
||||
for (const auto &Device : Devices)
|
||||
dumpTargetPointerMappings(Device);
|
||||
else
|
||||
FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target"
|
||||
"pointer maps\n");
|
||||
switch (PM->TargetOffloadPolicy) {
|
||||
case tgt_disabled:
|
||||
if (success) {
|
||||
FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled");
|
||||
}
|
||||
break;
|
||||
case tgt_default:
|
||||
FATAL_MESSAGE0(1, "default offloading policy must be switched to "
|
||||
"mandatory or disabled");
|
||||
break;
|
||||
case tgt_mandatory:
|
||||
if (!success) {
|
||||
if (getInfoLevel() > 1)
|
||||
for (const auto &Device : PM->Devices)
|
||||
dumpTargetPointerMappings(Device);
|
||||
else
|
||||
FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target"
|
||||
"pointer maps\n");
|
||||
|
||||
FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory");
|
||||
}
|
||||
break;
|
||||
FATAL_MESSAGE0(
|
||||
1, "failure of target construct while offloading is mandatory");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds requires flags
|
||||
EXTERN void __tgt_register_requires(int64_t flags) {
|
||||
RTLs->RegisterRequires(flags);
|
||||
PM->RTLs.RegisterRequires(flags);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// adds a target shared library to the target execution image
|
||||
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
|
||||
RTLs->RegisterLib(desc);
|
||||
PM->RTLs.RegisterLib(desc);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// unloads a target shared library
|
||||
EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
|
||||
RTLs->UnregisterLib(desc);
|
||||
PM->RTLs.UnregisterLib(desc);
|
||||
}
|
||||
|
||||
/// creates host-to-target data mapping, stores it in the
|
||||
|
@ -131,7 +129,7 @@ EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
|
|||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = Devices[device_id];
|
||||
DeviceTy &Device = PM->Devices[device_id];
|
||||
|
||||
#ifdef OMPTARGET_DEBUG
|
||||
for (int i = 0; i < arg_num; ++i) {
|
||||
|
@ -188,16 +186,16 @@ EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
|
|||
device_id = omp_get_default_device();
|
||||
}
|
||||
|
||||
RTLsMtx->lock();
|
||||
size_t Devices_size = Devices.size();
|
||||
RTLsMtx->unlock();
|
||||
if (Devices_size <= (size_t)device_id) {
|
||||
PM->RTLsMtx.lock();
|
||||
size_t DevicesSize = PM->Devices.size();
|
||||
PM->RTLsMtx.unlock();
|
||||
if (DevicesSize <= (size_t)device_id) {
|
||||
DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id);
|
||||
HandleTargetOutcome(false);
|
||||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = Devices[device_id];
|
||||
DeviceTy &Device = PM->Devices[device_id];
|
||||
if (!Device.IsInit) {
|
||||
DP("Uninit device: ignore");
|
||||
HandleTargetOutcome(false);
|
||||
|
@ -262,7 +260,7 @@ EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
|
|||
return;
|
||||
}
|
||||
|
||||
DeviceTy& Device = Devices[device_id];
|
||||
DeviceTy &Device = PM->Devices[device_id];
|
||||
int rc = target_data_update(Device, arg_num, args_base,
|
||||
args, arg_sizes, arg_types, arg_mappers);
|
||||
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
|
||||
|
@ -439,8 +437,8 @@ EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
|
|||
|
||||
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
|
||||
loop_tripcount);
|
||||
TblMapMtx->lock();
|
||||
Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
|
||||
loop_tripcount);
|
||||
TblMapMtx->unlock();
|
||||
PM->TblMapMtx.lock();
|
||||
PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
|
||||
loop_tripcount);
|
||||
PM->TblMapMtx.unlock();
|
||||
}
|
||||
|
|
|
@ -59,11 +59,11 @@ static int InitLibrary(DeviceTy& Device) {
|
|||
int rc = OFFLOAD_SUCCESS;
|
||||
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
TrlTblMtx->lock();
|
||||
for (HostEntriesBeginToTransTableTy::iterator
|
||||
ii = HostEntriesBeginToTransTable->begin();
|
||||
ii != HostEntriesBeginToTransTable->end(); ++ii) {
|
||||
TranslationTable *TransTable = &ii->second;
|
||||
PM->TrlTblMtx.lock();
|
||||
for (HostEntriesBeginToTransTableTy::iterator entry_it =
|
||||
PM->HostEntriesBeginToTransTable.begin();
|
||||
entry_it != PM->HostEntriesBeginToTransTable.end(); ++entry_it) {
|
||||
TranslationTable *TransTable = &entry_it->second;
|
||||
if (TransTable->HostTable.EntriesBegin ==
|
||||
TransTable->HostTable.EntriesEnd) {
|
||||
// No host entry so no need to proceed
|
||||
|
@ -141,7 +141,7 @@ static int InitLibrary(DeviceTy& Device) {
|
|||
}
|
||||
Device.DataMapMtx.unlock();
|
||||
}
|
||||
TrlTblMtx->unlock();
|
||||
PM->TrlTblMtx.unlock();
|
||||
|
||||
if (rc != OFFLOAD_SUCCESS) {
|
||||
Device.PendingGlobalsMtx.unlock();
|
||||
|
@ -188,7 +188,7 @@ int CheckDeviceAndCtors(int64_t device_id) {
|
|||
}
|
||||
|
||||
// Get device info.
|
||||
DeviceTy &Device = Devices[device_id];
|
||||
DeviceTy &Device = PM->Devices[device_id];
|
||||
|
||||
// Check whether global data has been mapped for this device
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
|
@ -368,7 +368,7 @@ int targetDataBegin(DeviceTy &Device, int32_t arg_num, void **args_base,
|
|||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
|
||||
bool copy = false;
|
||||
if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
|
||||
if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
|
||||
HasCloseModifier) {
|
||||
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
|
||||
copy = true;
|
||||
|
@ -537,7 +537,7 @@ int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
|
|||
if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
|
||||
bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
bool CopyMember = false;
|
||||
if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
|
||||
if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
|
||||
HasCloseModifier) {
|
||||
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
|
||||
!(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
|
||||
|
@ -551,7 +551,7 @@ int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
|
|||
}
|
||||
|
||||
if ((DelEntry || Always || CopyMember) &&
|
||||
!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin)) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
|
@ -684,7 +684,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
|
||||
DPxPTR(HstPtrBegin));
|
||||
|
@ -765,18 +765,19 @@ namespace {
|
|||
/// Find the table information in the map or look it up in the translation
|
||||
/// tables.
|
||||
TableMap *getTableMap(void *HostPtr) {
|
||||
std::lock_guard<std::mutex> TblMapLock(*TblMapMtx);
|
||||
HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap->find(HostPtr);
|
||||
std::lock_guard<std::mutex> TblMapLock(PM->TblMapMtx);
|
||||
HostPtrToTableMapTy::iterator TableMapIt =
|
||||
PM->HostPtrToTableMap.find(HostPtr);
|
||||
|
||||
if (TableMapIt != HostPtrToTableMap->end())
|
||||
if (TableMapIt != PM->HostPtrToTableMap.end())
|
||||
return &TableMapIt->second;
|
||||
|
||||
// We don't have a map. So search all the registered libraries.
|
||||
TableMap *TM = nullptr;
|
||||
std::lock_guard<std::mutex> TrlTblLock(*TrlTblMtx);
|
||||
std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
|
||||
for (HostEntriesBeginToTransTableTy::iterator Itr =
|
||||
HostEntriesBeginToTransTable->begin();
|
||||
Itr != HostEntriesBeginToTransTable->end(); ++Itr) {
|
||||
PM->HostEntriesBeginToTransTable.begin();
|
||||
Itr != PM->HostEntriesBeginToTransTable.end(); ++Itr) {
|
||||
// get the translation table (which contains all the good info).
|
||||
TranslationTable *TransTable = &Itr->second;
|
||||
// iterate over all the host table entries to see if we can locate the
|
||||
|
@ -787,7 +788,7 @@ TableMap *getTableMap(void *HostPtr) {
|
|||
continue;
|
||||
// we got a match, now fill the HostPtrToTableMap so that we
|
||||
// may avoid this search next time.
|
||||
TM = &(*HostPtrToTableMap)[HostPtr];
|
||||
TM = &(PM->HostPtrToTableMap)[HostPtr];
|
||||
TM->Table = TransTable;
|
||||
TM->Index = I;
|
||||
return TM;
|
||||
|
@ -802,11 +803,11 @@ TableMap *getTableMap(void *HostPtr) {
|
|||
/// __kmpc_push_target_tripcount in one thread but doing offloading in another
|
||||
/// thread, which might occur when we call task yield.
|
||||
uint64_t getLoopTripCount(int64_t DeviceId) {
|
||||
DeviceTy &Device = Devices[DeviceId];
|
||||
DeviceTy &Device = PM->Devices[DeviceId];
|
||||
uint64_t LoopTripCount = 0;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> TblMapLock(*TblMapMtx);
|
||||
std::lock_guard<std::mutex> TblMapLock(PM->TblMapMtx);
|
||||
auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL));
|
||||
if (I != Device.LoopTripCnt.end()) {
|
||||
LoopTripCount = I->second;
|
||||
|
@ -989,7 +990,7 @@ int processDataBefore(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
|||
std::vector<ptrdiff_t> &TgtOffsets,
|
||||
PrivateArgumentManagerTy &PrivateArgumentManager,
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
DeviceTy &Device = Devices[DeviceId];
|
||||
DeviceTy &Device = PM->Devices[DeviceId];
|
||||
int Ret = targetDataBegin(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes,
|
||||
ArgMappers, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
|
@ -1028,7 +1029,7 @@ int processDataBefore(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
|||
DPxPTR(HstPtrVal));
|
||||
continue;
|
||||
}
|
||||
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("Unified memory is active, no need to map lambda captured"
|
||||
"variable (" DPxMOD ")\n",
|
||||
|
@ -1107,7 +1108,7 @@ int processDataAfter(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
|||
int64_t *ArgTypes, void **ArgMappers,
|
||||
PrivateArgumentManagerTy &PrivateArgumentManager,
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
DeviceTy &Device = Devices[DeviceId];
|
||||
DeviceTy &Device = PM->Devices[DeviceId];
|
||||
|
||||
// Move data from device.
|
||||
int Ret = targetDataEnd(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes,
|
||||
|
@ -1137,7 +1138,7 @@ int processDataAfter(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
|||
int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases,
|
||||
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, void **ArgMappers,
|
||||
int32_t TeamNum, int32_t ThreadLimit, int IsTeamConstruct) {
|
||||
DeviceTy &Device = Devices[DeviceId];
|
||||
DeviceTy &Device = PM->Devices[DeviceId];
|
||||
|
||||
TableMap *TM = getTableMap(HostPtr);
|
||||
// No map for this host pointer found!
|
||||
|
@ -1150,7 +1151,7 @@ int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases,
|
|||
// get target table.
|
||||
__tgt_target_table *TargetTable = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> TrlTblLock(*TrlTblMtx);
|
||||
std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
|
||||
assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&
|
||||
"Not expecting a device ID outside the table's bounds!");
|
||||
TargetTable = TM->Table->TargetsTable[DeviceId];
|
||||
|
|
|
@ -40,15 +40,6 @@ extern int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
|
|||
|
||||
extern int CheckDeviceAndCtors(int64_t device_id);
|
||||
|
||||
// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition
|
||||
enum kmp_target_offload_kind {
|
||||
tgt_disabled = 0,
|
||||
tgt_default = 1,
|
||||
tgt_mandatory = 2
|
||||
};
|
||||
typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
|
||||
extern kmp_target_offload_kind_t TargetOffloadPolicy;
|
||||
|
||||
// This structure stores information of a mapped memory region.
|
||||
struct MapComponentInfoTy {
|
||||
void *Base;
|
||||
|
|
|
@ -31,39 +31,23 @@ static const char *RTLNames[] = {
|
|||
/* AMDGPU target */ "libomptarget.rtl.amdgpu.so",
|
||||
};
|
||||
|
||||
RTLsTy *RTLs;
|
||||
std::mutex *RTLsMtx;
|
||||
|
||||
HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable;
|
||||
std::mutex *TrlTblMtx;
|
||||
|
||||
HostPtrToTableMapTy *HostPtrToTableMap;
|
||||
std::mutex *TblMapMtx;
|
||||
PluginManager *PM;
|
||||
|
||||
__attribute__((constructor(101))) void init() {
|
||||
DP("Init target library!\n");
|
||||
RTLs = new RTLsTy();
|
||||
RTLsMtx = new std::mutex();
|
||||
HostEntriesBeginToTransTable = new HostEntriesBeginToTransTableTy();
|
||||
TrlTblMtx = new std::mutex();
|
||||
HostPtrToTableMap = new HostPtrToTableMapTy();
|
||||
TblMapMtx = new std::mutex();
|
||||
PM = new PluginManager();
|
||||
}
|
||||
|
||||
__attribute__((destructor(101))) void deinit() {
|
||||
DP("Deinit target library!\n");
|
||||
delete RTLs;
|
||||
delete RTLsMtx;
|
||||
delete HostEntriesBeginToTransTable;
|
||||
delete TrlTblMtx;
|
||||
delete HostPtrToTableMap;
|
||||
delete TblMapMtx;
|
||||
delete PM;
|
||||
}
|
||||
|
||||
void RTLsTy::LoadRTLs() {
|
||||
// Parse environment variable OMP_TARGET_OFFLOAD (if set)
|
||||
TargetOffloadPolicy = (kmp_target_offload_kind_t) __kmpc_get_target_offload();
|
||||
if (TargetOffloadPolicy == tgt_disabled) {
|
||||
PM->TargetOffloadPolicy =
|
||||
(kmp_target_offload_kind_t)__kmpc_get_target_offload();
|
||||
if (PM->TargetOffloadPolicy == tgt_disabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -197,7 +181,7 @@ static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc,
|
|||
__tgt_device_image *img, RTLInfoTy *RTL) {
|
||||
|
||||
for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) {
|
||||
DeviceTy &Device = Devices[RTL->Idx + i];
|
||||
DeviceTy &Device = PM->Devices[RTL->Idx + i];
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
Device.HasPendingGlobals = true;
|
||||
for (__tgt_offload_entry *entry = img->EntriesBegin;
|
||||
|
@ -266,7 +250,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
// Attempt to load all plugins available in the system.
|
||||
std::call_once(initFlag, &RTLsTy::LoadRTLs, this);
|
||||
|
||||
RTLsMtx->lock();
|
||||
PM->RTLsMtx.lock();
|
||||
// Register the images with the RTLs that understand them, if any.
|
||||
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
|
||||
// Obtain the image.
|
||||
|
@ -290,21 +274,21 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
if (!R.isUsed) {
|
||||
// Initialize the device information for the RTL we are about to use.
|
||||
DeviceTy device(&R);
|
||||
size_t start = Devices.size();
|
||||
Devices.resize(start + R.NumberOfDevices, device);
|
||||
size_t Start = PM->Devices.size();
|
||||
PM->Devices.resize(Start + R.NumberOfDevices, device);
|
||||
for (int32_t device_id = 0; device_id < R.NumberOfDevices;
|
||||
device_id++) {
|
||||
// global device ID
|
||||
Devices[start + device_id].DeviceID = start + device_id;
|
||||
PM->Devices[Start + device_id].DeviceID = Start + device_id;
|
||||
// RTL local device ID
|
||||
Devices[start + device_id].RTLDeviceID = device_id;
|
||||
PM->Devices[Start + device_id].RTLDeviceID = device_id;
|
||||
}
|
||||
|
||||
// Initialize the index of this RTL and save it in the used RTLs.
|
||||
R.Idx = (UsedRTLs.empty())
|
||||
? 0
|
||||
: UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices;
|
||||
assert((size_t) R.Idx == start &&
|
||||
assert((size_t) R.Idx == Start &&
|
||||
"RTL index should equal the number of devices used so far.");
|
||||
R.isUsed = true;
|
||||
UsedRTLs.push_back(&R);
|
||||
|
@ -313,22 +297,22 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
}
|
||||
|
||||
// Initialize (if necessary) translation table for this library.
|
||||
TrlTblMtx->lock();
|
||||
if(!HostEntriesBeginToTransTable->count(desc->HostEntriesBegin)){
|
||||
TranslationTable &tt =
|
||||
(*HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
tt.HostTable.EntriesBegin = desc->HostEntriesBegin;
|
||||
tt.HostTable.EntriesEnd = desc->HostEntriesEnd;
|
||||
PM->TrlTblMtx.lock();
|
||||
if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) {
|
||||
TranslationTable &TransTable =
|
||||
(PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin;
|
||||
TransTable.HostTable.EntriesEnd = desc->HostEntriesEnd;
|
||||
}
|
||||
|
||||
// Retrieve translation table for this library.
|
||||
TranslationTable &TransTable =
|
||||
(*HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
(PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
|
||||
|
||||
DP("Registering image " DPxMOD " with RTL %s!\n",
|
||||
DPxPTR(img->ImageStart), R.RTLName.c_str());
|
||||
RegisterImageIntoTranslationTable(TransTable, R, img);
|
||||
TrlTblMtx->unlock();
|
||||
PM->TrlTblMtx.unlock();
|
||||
FoundRTL = &R;
|
||||
|
||||
// Load ctors/dtors for static objects
|
||||
|
@ -342,8 +326,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart));
|
||||
}
|
||||
}
|
||||
RTLsMtx->unlock();
|
||||
|
||||
PM->RTLsMtx.unlock();
|
||||
|
||||
DP("Done registering entries!\n");
|
||||
}
|
||||
|
@ -351,7 +334,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
||||
DP("Unloading target library!\n");
|
||||
|
||||
RTLsMtx->lock();
|
||||
PM->RTLsMtx.lock();
|
||||
// Find which RTL understands each image, if any.
|
||||
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
|
||||
// Obtain the image.
|
||||
|
@ -379,7 +362,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
// Execute dtors for static objects if the device has been used, i.e.
|
||||
// if its PendingCtors list has been emptied.
|
||||
for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) {
|
||||
DeviceTy &Device = Devices[FoundRTL->Idx + i];
|
||||
DeviceTy &Device = PM->Devices[FoundRTL->Idx + i];
|
||||
Device.PendingGlobalsMtx.lock();
|
||||
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
|
||||
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
|
||||
|
@ -407,28 +390,28 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
|
|||
DPxPTR(img->ImageStart));
|
||||
}
|
||||
}
|
||||
RTLsMtx->unlock();
|
||||
PM->RTLsMtx.unlock();
|
||||
DP("Done unregistering images!\n");
|
||||
|
||||
// Remove entries from HostPtrToTableMap
|
||||
TblMapMtx->lock();
|
||||
// Remove entries from PM->HostPtrToTableMap
|
||||
PM->TblMapMtx.lock();
|
||||
for (__tgt_offload_entry *cur = desc->HostEntriesBegin;
|
||||
cur < desc->HostEntriesEnd; ++cur) {
|
||||
HostPtrToTableMap->erase(cur->addr);
|
||||
PM->HostPtrToTableMap.erase(cur->addr);
|
||||
}
|
||||
|
||||
// Remove translation table for this descriptor.
|
||||
auto tt = HostEntriesBeginToTransTable->find(desc->HostEntriesBegin);
|
||||
if (tt != HostEntriesBeginToTransTable->end()) {
|
||||
auto TransTable = PM->HostEntriesBeginToTransTable.find(desc->HostEntriesBegin);
|
||||
if (TransTable != PM->HostEntriesBeginToTransTable.end()) {
|
||||
DP("Removing translation table for descriptor " DPxMOD "\n",
|
||||
DPxPTR(desc->HostEntriesBegin));
|
||||
HostEntriesBeginToTransTable->erase(tt);
|
||||
PM->HostEntriesBeginToTransTable.erase(TransTable);
|
||||
} else {
|
||||
DP("Translation table for descriptor " DPxMOD " cannot be found, probably "
|
||||
"it has been already removed.\n", DPxPTR(desc->HostEntriesBegin));
|
||||
}
|
||||
|
||||
TblMapMtx->unlock();
|
||||
PM->TblMapMtx.unlock();
|
||||
|
||||
// TODO: Remove RTL and the devices it manages if it's not used anymore?
|
||||
// TODO: Write some RTL->unload_image(...) function?
|
||||
|
|
|
@ -158,8 +158,6 @@ public:
|
|||
// Unregister a shared library from all RTLs.
|
||||
void UnregisterLib(__tgt_bin_desc *desc);
|
||||
};
|
||||
extern RTLsTy *RTLs;
|
||||
extern std::mutex *RTLsMtx;
|
||||
|
||||
|
||||
/// Map between the host entry begin and the translation table. Each
|
||||
|
@ -177,8 +175,6 @@ struct TranslationTable {
|
|||
};
|
||||
typedef std::map<__tgt_offload_entry *, TranslationTable>
|
||||
HostEntriesBeginToTransTableTy;
|
||||
extern HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable;
|
||||
extern std::mutex *TrlTblMtx;
|
||||
|
||||
/// Map between the host ptr and a table index
|
||||
struct TableMap {
|
||||
|
@ -189,7 +185,5 @@ struct TableMap {
|
|||
: Table(table), Index(index) {}
|
||||
};
|
||||
typedef std::map<void *, TableMap> HostPtrToTableMapTy;
|
||||
extern HostPtrToTableMapTy *HostPtrToTableMap;
|
||||
extern std::mutex *TblMapMtx;
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue