forked from OSchip/llvm-project
[OpenMP] Add more pass-through functions in DeviceTy
Summary: 1. Add DeviceTy::data_alloc, DeviceTy::data_delete, DeviceTy::data_alloc, DeviceTy::synchronize pass-through functions. Avoid directly accessing Device.RTL 2. Fix the type of the first argument of synchronize_ty in rth.h, device id is int32_t which is consistent with other functions. Reviewers: tianshilei1992, jdoerfert Reviewed By: tianshilei1992 Subscribers: yaxunl, guansong, sstefan1, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D84487
This commit is contained in:
parent
4c6eebf86a
commit
9323166601
|
@ -57,8 +57,7 @@ EXTERN void *omp_target_alloc(size_t size, int device_num) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
DeviceTy &Device = Devices[device_num];
|
||||
rc = Device.RTL->data_alloc(Device.RTLDeviceID, size, NULL);
|
||||
rc = Devices[device_num].data_alloc(size);
|
||||
DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
|
||||
return rc;
|
||||
}
|
||||
|
@ -83,8 +82,7 @@ EXTERN void omp_target_free(void *device_ptr, int device_num) {
|
|||
return;
|
||||
}
|
||||
|
||||
DeviceTy &Device = Devices[device_num];
|
||||
Device.RTL->data_delete(Device.RTLDeviceID, (void *)device_ptr);
|
||||
Devices[device_num].data_delete(device_ptr);
|
||||
DP("omp_target_free deallocated device ptr\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -217,7 +217,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
|
|||
} else if (Size) {
|
||||
// If it is not contained and Size > 0, we should create a new entry for it.
|
||||
IsNew = true;
|
||||
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
|
||||
uintptr_t tp = (uintptr_t)data_alloc(Size, HstPtrBegin);
|
||||
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
|
||||
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n",
|
||||
DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin),
|
||||
|
@ -299,7 +299,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
|
|||
if (HT.decRefCount() == 0) {
|
||||
DP("Deleting tgt data " DPxMOD " of size %ld\n",
|
||||
DPxPTR(HT.TgtPtrBegin), Size);
|
||||
RTL->data_delete(RTLDeviceID, (void *)HT.TgtPtrBegin);
|
||||
data_delete((void *)HT.TgtPtrBegin);
|
||||
DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
|
||||
", Size=%ld\n", (ForceDelete ? " (forced)" : ""),
|
||||
DPxPTR(HT.HstPtrBegin), DPxPTR(HT.TgtPtrBegin), Size);
|
||||
|
@ -351,6 +351,14 @@ __tgt_target_table *DeviceTy::load_binary(void *Img) {
|
|||
return rc;
|
||||
}
|
||||
|
||||
void *DeviceTy::data_alloc(int64_t Size, void *HstPtr) {
|
||||
return RTL->data_alloc(RTLDeviceID, Size, HstPtr);
|
||||
}
|
||||
|
||||
int32_t DeviceTy::data_delete(void *TgtPtrBegin) {
|
||||
return RTL->data_delete(RTLDeviceID, TgtPtrBegin);
|
||||
}
|
||||
|
||||
// Submit data to device
|
||||
int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr) {
|
||||
|
@ -423,6 +431,12 @@ bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
|
|||
return false;
|
||||
}
|
||||
|
||||
int32_t DeviceTy::synchronize(__tgt_async_info *AsyncInfoPtr) {
|
||||
if (RTL->synchronize)
|
||||
return RTL->synchronize(RTLDeviceID, AsyncInfoPtr);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
/// Check whether a device has an associated RTL and initialize it if it's not
|
||||
/// already initialized.
|
||||
bool device_is_ready(int device_num) {
|
||||
|
|
|
@ -192,6 +192,18 @@ struct DeviceTy {
|
|||
int32_t initOnce();
|
||||
__tgt_target_table *load_binary(void *Img);
|
||||
|
||||
// device memory allocation/deallocation routines
|
||||
/// Allocates \p Size bytes on the device and returns the address/nullptr when
|
||||
/// succeeds/fails. \p HstPtr is an address of the host data which the
|
||||
/// allocated target data will be associated with. If it is unknown, the
|
||||
/// default value of \p HstPtr is nullptr. Note: this function doesn't do
|
||||
/// pointer association. Actually, all the __tgt_rtl_data_alloc
|
||||
/// implementations ignore \p HstPtr.
|
||||
void *data_alloc(int64_t Size, void *HstPtr = nullptr);
|
||||
/// Deallocates memory which \p TgtPtrBegin points at and returns
|
||||
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
|
||||
int32_t data_delete(void *TgtPtrBegin);
|
||||
|
||||
// Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
|
||||
// synchronous.
|
||||
// Copy data from host to device
|
||||
|
@ -213,6 +225,10 @@ struct DeviceTy {
|
|||
uint64_t LoopTripCount,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
|
||||
/// Synchronize device/queue/event based on \p AsyncInfoPtr and return
|
||||
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
|
||||
int32_t synchronize(__tgt_async_info *AsyncInfoPtr);
|
||||
|
||||
private:
|
||||
// Call to RTL
|
||||
void init(); // To be called only via DeviceTy::initOnce()
|
||||
|
|
|
@ -845,8 +845,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
|||
TgtBaseOffset = 0;
|
||||
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE) {
|
||||
// Allocate memory for (first-)private array
|
||||
TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID,
|
||||
arg_sizes[i], HstPtrBegin);
|
||||
TgtPtrBegin = Device.data_alloc(arg_sizes[i], HstPtrBegin);
|
||||
if (!TgtPtrBegin) {
|
||||
DP ("Data allocation for %sprivate array " DPxMOD " failed, "
|
||||
"abort target.\n",
|
||||
|
@ -929,7 +928,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
|||
|
||||
// Deallocate (first-)private arrays
|
||||
for (auto it : fpArrays) {
|
||||
int rt = Device.RTL->data_delete(Device.RTLDeviceID, it);
|
||||
int rt = Device.data_delete(it);
|
||||
if (rt != OFFLOAD_SUCCESS) {
|
||||
DP("Deallocation of (first-)private arrays failed.\n");
|
||||
return OFFLOAD_FAIL;
|
||||
|
@ -944,8 +943,5 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
|||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
if (Device.RTL->synchronize)
|
||||
return Device.RTL->synchronize(device_id, &AsyncInfo);
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
return Device.synchronize(&AsyncInfo);
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ struct RTLInfoTy {
|
|||
int32_t, uint64_t,
|
||||
__tgt_async_info *);
|
||||
typedef int64_t(init_requires_ty)(int64_t);
|
||||
typedef int64_t(synchronize_ty)(int64_t, __tgt_async_info *);
|
||||
typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
|
||||
|
||||
int32_t Idx = -1; // RTL index, index is the number of devices
|
||||
// of other RTLs that were registered before,
|
||||
|
|
Loading…
Reference in New Issue