forked from OSchip/llvm-project
[OpenMP] Put old APIs back and added new _async series for backward compatibility
Summary: According to comments on bi-weekly meeting, this patch put back old APIs and added new `_async` series Reviewers: jdoerfert Reviewed By: jdoerfert Subscribers: yaxunl, guansong, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D77822
This commit is contained in:
parent
4e87823026
commit
03ff643d2e
|
@ -58,21 +58,24 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
|
||||||
// case an error occurred on the target device.
|
// case an error occurred on the target device.
|
||||||
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
|
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
|
||||||
|
|
||||||
// Pass the data content to the target device using the target address. If
|
// Pass the data content to the target device using the target address. In case
|
||||||
// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
|
// of success, return zero. Otherwise, return an error code.
|
||||||
// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
|
|
||||||
// case, it is synchronous. In case of success, return zero. Otherwise, return
|
|
||||||
// an error code.
|
|
||||||
int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
|
int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
|
||||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
int64_t Size);
|
||||||
|
|
||||||
// Retrieve the data content from the target device using its address. If
|
int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
|
||||||
// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
|
int64_t Size,
|
||||||
// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
// case, it is synchronous. In case of success, return zero. Otherwise, return
|
|
||||||
// an error code.
|
// Retrieve the data content from the target device using its address. In case
|
||||||
|
// of success, return zero. Otherwise, return an error code.
|
||||||
int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
|
int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
|
||||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
int64_t Size);
|
||||||
|
|
||||||
|
// Asynchronous version of __tgt_rtl_data_retrieve
|
||||||
|
int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
|
||||||
|
void *TargetPtr, int64_t Size,
|
||||||
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
|
|
||||||
// De-allocate the data referenced by target ptr on the device. In case of
|
// De-allocate the data referenced by target ptr on the device. In case of
|
||||||
// success, return zero. Otherwise, return an error code.
|
// success, return zero. Otherwise, return an error code.
|
||||||
|
@ -86,8 +89,12 @@ int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
|
||||||
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
|
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
|
||||||
// case of success, return zero. Otherwise, return an error code.
|
// case of success, return zero. Otherwise, return an error code.
|
||||||
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
||||||
ptrdiff_t *Offsets, int32_t NumArgs,
|
ptrdiff_t *Offsets, int32_t NumArgs);
|
||||||
__tgt_async_info *AsyncInfoPtr);
|
|
||||||
|
// Asynchronous version of __tgt_rtl_run_target_region
|
||||||
|
int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
|
||||||
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
||||||
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
|
|
||||||
// Similar to __tgt_rtl_run_target_region, but additionally specify the
|
// Similar to __tgt_rtl_run_target_region, but additionally specify the
|
||||||
// number of teams to be created and a number of threads in each team. If
|
// number of teams to be created and a number of threads in each team. If
|
||||||
|
@ -97,8 +104,13 @@ int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
||||||
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
||||||
ptrdiff_t *Offsets, int32_t NumArgs,
|
ptrdiff_t *Offsets, int32_t NumArgs,
|
||||||
int32_t NumTeams, int32_t ThreadLimit,
|
int32_t NumTeams, int32_t ThreadLimit,
|
||||||
uint64_t loop_tripcount,
|
uint64_t loop_tripcount);
|
||||||
__tgt_async_info *AsyncInfoPtr);
|
|
||||||
|
// Asynchronous version of __tgt_rtl_run_target_team_region
|
||||||
|
int32_t __tgt_rtl_run_target_team_region_async(
|
||||||
|
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
|
||||||
|
int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
|
||||||
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
|
|
||||||
// Device synchronization. In case of success, return zero. Otherwise, return an
|
// Device synchronization. In case of success, return zero. Otherwise, return an
|
||||||
// error code.
|
// error code.
|
||||||
|
|
|
@ -725,40 +725,41 @@ void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
||||||
int64_t size, __tgt_async_info *async_info_ptr) {
|
int64_t size) {
|
||||||
// The function dataSubmit is always asynchronous. Considering some data
|
|
||||||
// transfer must be synchronous, we assume if async_info_ptr is nullptr, the
|
|
||||||
// transfer will be synchronous by creating a temporary async info and then
|
|
||||||
// synchronizing after call dataSubmit; otherwise, it is asynchronous.
|
|
||||||
if (async_info_ptr)
|
|
||||||
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
|
|
||||||
|
|
||||||
__tgt_async_info async_info;
|
__tgt_async_info async_info;
|
||||||
int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &async_info);
|
int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr, size,
|
||||||
|
&async_info);
|
||||||
if (rc != OFFLOAD_SUCCESS)
|
if (rc != OFFLOAD_SUCCESS)
|
||||||
return OFFLOAD_FAIL;
|
return OFFLOAD_FAIL;
|
||||||
|
|
||||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr,
|
||||||
int64_t size,
|
void *hst_ptr, int64_t size,
|
||||||
__tgt_async_info *async_info_ptr) {
|
__tgt_async_info *async_info_ptr) {
|
||||||
// The function dataRetrieve is always asynchronous. Considering some data
|
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||||
// transfer must be synchronous, we assume if async_info_ptr is nullptr, the
|
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
|
||||||
// transfer will be synchronous by creating a temporary async info and then
|
}
|
||||||
// synchronizing after call dataRetrieve; otherwise, it is asynchronous.
|
|
||||||
if (async_info_ptr)
|
|
||||||
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
|
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
||||||
|
int64_t size) {
|
||||||
__tgt_async_info async_info;
|
__tgt_async_info async_info;
|
||||||
int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &async_info);
|
int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr, size,
|
||||||
|
&async_info);
|
||||||
if (rc != OFFLOAD_SUCCESS)
|
if (rc != OFFLOAD_SUCCESS)
|
||||||
return OFFLOAD_FAIL;
|
return OFFLOAD_FAIL;
|
||||||
|
|
||||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
|
||||||
|
void *tgt_ptr, int64_t size,
|
||||||
|
__tgt_async_info *async_info_ptr) {
|
||||||
|
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||||
|
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
||||||
// Set the context we are using.
|
// Set the context we are using.
|
||||||
CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
|
CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
|
||||||
|
@ -782,8 +783,22 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||||
ptrdiff_t *tgt_offsets,
|
ptrdiff_t *tgt_offsets,
|
||||||
int32_t arg_num, int32_t team_num,
|
int32_t arg_num, int32_t team_num,
|
||||||
int32_t thread_limit,
|
int32_t thread_limit,
|
||||||
uint64_t loop_tripcount,
|
uint64_t loop_tripcount) {
|
||||||
__tgt_async_info *async_info) {
|
__tgt_async_info async_info;
|
||||||
|
int32_t rc = __tgt_rtl_run_target_team_region_async(
|
||||||
|
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
|
||||||
|
thread_limit, loop_tripcount, &async_info);
|
||||||
|
if (rc != OFFLOAD_SUCCESS)
|
||||||
|
return OFFLOAD_FAIL;
|
||||||
|
|
||||||
|
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_run_target_team_region_async(
|
||||||
|
int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
|
||||||
|
ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
|
||||||
|
int32_t thread_limit, uint64_t loop_tripcount,
|
||||||
|
__tgt_async_info *async_info) {
|
||||||
// Set the context we are using.
|
// Set the context we are using.
|
||||||
CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
|
CUresult err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
|
||||||
if (err != CUDA_SUCCESS) {
|
if (err != CUDA_SUCCESS) {
|
||||||
|
@ -890,21 +905,34 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
DP("Launch of entry point at " DPxMOD " successful!\n",
|
DP("Launch of entry point at " DPxMOD " successful!\n",
|
||||||
DPxPTR(tgt_entry_ptr));
|
DPxPTR(tgt_entry_ptr));
|
||||||
|
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
||||||
void **tgt_args, ptrdiff_t *tgt_offsets,
|
void **tgt_args, ptrdiff_t *tgt_offsets,
|
||||||
int32_t arg_num,
|
int32_t arg_num) {
|
||||||
__tgt_async_info *async_info) {
|
__tgt_async_info async_info;
|
||||||
|
int32_t rc = __tgt_rtl_run_target_region_async(
|
||||||
|
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &async_info);
|
||||||
|
if (rc != OFFLOAD_SUCCESS)
|
||||||
|
return OFFLOAD_FAIL;
|
||||||
|
|
||||||
|
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
|
||||||
|
void *tgt_entry_ptr, void **tgt_args,
|
||||||
|
ptrdiff_t *tgt_offsets,
|
||||||
|
int32_t arg_num,
|
||||||
|
__tgt_async_info *async_info) {
|
||||||
// use one team and the default number of threads.
|
// use one team and the default number of threads.
|
||||||
const int32_t team_num = 1;
|
const int32_t team_num = 1;
|
||||||
const int32_t thread_limit = 0;
|
const int32_t thread_limit = 0;
|
||||||
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
|
return __tgt_rtl_run_target_team_region_async(
|
||||||
tgt_offsets, arg_num, team_num,
|
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
|
||||||
thread_limit, 0, async_info);
|
thread_limit, 0, async_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *async_info) {
|
int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *async_info) {
|
||||||
|
|
|
@ -7,10 +7,14 @@ VERS1.0 {
|
||||||
__tgt_rtl_load_binary;
|
__tgt_rtl_load_binary;
|
||||||
__tgt_rtl_data_alloc;
|
__tgt_rtl_data_alloc;
|
||||||
__tgt_rtl_data_submit;
|
__tgt_rtl_data_submit;
|
||||||
|
__tgt_rtl_data_submit_async;
|
||||||
__tgt_rtl_data_retrieve;
|
__tgt_rtl_data_retrieve;
|
||||||
|
__tgt_rtl_data_retrieve_async;
|
||||||
__tgt_rtl_data_delete;
|
__tgt_rtl_data_delete;
|
||||||
__tgt_rtl_run_target_team_region;
|
__tgt_rtl_run_target_team_region;
|
||||||
|
__tgt_rtl_run_target_team_region_async;
|
||||||
__tgt_rtl_run_target_region;
|
__tgt_rtl_run_target_region;
|
||||||
|
__tgt_rtl_run_target_region_async;
|
||||||
__tgt_rtl_synchronize;
|
__tgt_rtl_synchronize;
|
||||||
local:
|
local:
|
||||||
*;
|
*;
|
||||||
|
|
|
@ -277,13 +277,13 @@ void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
||||||
int64_t size, __tgt_async_info *) {
|
int64_t size) {
|
||||||
memcpy(tgt_ptr, hst_ptr, size);
|
memcpy(tgt_ptr, hst_ptr, size);
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
||||||
int64_t size, __tgt_async_info *) {
|
int64_t size) {
|
||||||
memcpy(hst_ptr, tgt_ptr, size);
|
memcpy(hst_ptr, tgt_ptr, size);
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -293,11 +293,12 @@ int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
||||||
return OFFLOAD_SUCCESS;
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __tgt_rtl_run_target_team_region(
|
int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||||
int32_t device_id, void *tgt_entry_ptr, void **tgt_args,
|
void **tgt_args,
|
||||||
ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num,
|
ptrdiff_t *tgt_offsets,
|
||||||
int32_t thread_limit, uint64_t loop_tripcount /*not used*/,
|
int32_t arg_num, int32_t team_num,
|
||||||
__tgt_async_info *async_info /*not used*/) {
|
int32_t thread_limit,
|
||||||
|
uint64_t loop_tripcount /*not used*/) {
|
||||||
// ignore team num and thread limit.
|
// ignore team num and thread limit.
|
||||||
|
|
||||||
// Use libffi to launch execution.
|
// Use libffi to launch execution.
|
||||||
|
@ -331,17 +332,10 @@ int32_t __tgt_rtl_run_target_team_region(
|
||||||
|
|
||||||
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
||||||
void **tgt_args, ptrdiff_t *tgt_offsets,
|
void **tgt_args, ptrdiff_t *tgt_offsets,
|
||||||
int32_t arg_num,
|
int32_t arg_num) {
|
||||||
__tgt_async_info *async_info_ptr) {
|
|
||||||
// use one team and one thread.
|
// use one team and one thread.
|
||||||
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
|
return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args,
|
||||||
tgt_offsets, arg_num, 1, 1, 0,
|
tgt_offsets, arg_num, 1, 1, 0);
|
||||||
async_info_ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t __tgt_rtl_synchronize(int32_t device_id,
|
|
||||||
__tgt_async_info *async_info_ptr) {
|
|
||||||
return OFFLOAD_SUCCESS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -334,24 +334,33 @@ __tgt_target_table *DeviceTy::load_binary(void *Img) {
|
||||||
// Submit data to device
|
// Submit data to device
|
||||||
int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
|
int32_t DeviceTy::data_submit(void *TgtPtrBegin, void *HstPtrBegin,
|
||||||
int64_t Size, __tgt_async_info *AsyncInfoPtr) {
|
int64_t Size, __tgt_async_info *AsyncInfoPtr) {
|
||||||
|
if (!AsyncInfoPtr || !RTL->data_submit_async || !RTL->synchronize)
|
||||||
return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
|
return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
|
||||||
AsyncInfoPtr);
|
else
|
||||||
|
return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
|
||||||
|
AsyncInfoPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve data from device
|
// Retrieve data from device
|
||||||
int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
|
int32_t DeviceTy::data_retrieve(void *HstPtrBegin, void *TgtPtrBegin,
|
||||||
int64_t Size, __tgt_async_info *AsyncInfoPtr) {
|
int64_t Size, __tgt_async_info *AsyncInfoPtr) {
|
||||||
return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
|
if (!AsyncInfoPtr || !RTL->data_retrieve_async || !RTL->synchronize)
|
||||||
AsyncInfoPtr);
|
return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
|
||||||
|
else
|
||||||
|
return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
|
||||||
|
AsyncInfoPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run region on device
|
// Run region on device
|
||||||
int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
int32_t DeviceTy::run_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
||||||
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
||||||
__tgt_async_info *AsyncInfo) {
|
__tgt_async_info *AsyncInfoPtr) {
|
||||||
return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
if (!AsyncInfoPtr || !RTL->run_region || !RTL->synchronize)
|
||||||
TgtVarsSize, AsyncInfo);
|
return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
||||||
|
TgtVarsSize);
|
||||||
|
else
|
||||||
|
return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||||
|
TgtOffsets, TgtVarsSize, AsyncInfoPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run team region on device.
|
// Run team region on device.
|
||||||
|
@ -359,10 +368,15 @@ int32_t DeviceTy::run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
||||||
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
||||||
int32_t NumTeams, int32_t ThreadLimit,
|
int32_t NumTeams, int32_t ThreadLimit,
|
||||||
uint64_t LoopTripCount,
|
uint64_t LoopTripCount,
|
||||||
__tgt_async_info *AsyncInfo) {
|
__tgt_async_info *AsyncInfoPtr) {
|
||||||
return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
if (!AsyncInfoPtr || !RTL->run_team_region_async || !RTL->synchronize)
|
||||||
TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount,
|
return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||||
AsyncInfo);
|
TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit,
|
||||||
|
LoopTripCount);
|
||||||
|
else
|
||||||
|
return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr,
|
||||||
|
TgtOffsets, TgtVarsSize, NumTeams,
|
||||||
|
ThreadLimit, LoopTripCount, AsyncInfoPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether a device has an associated RTL and initialize it if it's not
|
/// Check whether a device has an associated RTL and initialize it if it's not
|
||||||
|
|
|
@ -174,8 +174,8 @@ struct DeviceTy {
|
||||||
int32_t initOnce();
|
int32_t initOnce();
|
||||||
__tgt_target_table *load_binary(void *Img);
|
__tgt_target_table *load_binary(void *Img);
|
||||||
|
|
||||||
// Asynchronous data transfer. When AsyncInfoPtr is nullptr, the transfer will
|
// Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
|
||||||
// be synchronous.
|
// synchronous.
|
||||||
int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
|
int32_t data_submit(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
|
||||||
__tgt_async_info *AsyncInfoPtr);
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
|
int32_t data_retrieve(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size,
|
||||||
|
@ -183,11 +183,12 @@ struct DeviceTy {
|
||||||
|
|
||||||
int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
int32_t run_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
||||||
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
||||||
__tgt_async_info *AsyncInfo);
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
int32_t run_team_region(void *TgtEntryPtr, void **TgtVarsPtr,
|
||||||
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
ptrdiff_t *TgtOffsets, int32_t TgtVarsSize,
|
||||||
int32_t NumTeams, int32_t ThreadLimit,
|
int32_t NumTeams, int32_t ThreadLimit,
|
||||||
uint64_t LoopTripCount, __tgt_async_info *AsyncInfo);
|
uint64_t LoopTripCount,
|
||||||
|
__tgt_async_info *AsyncInfoPtr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Call to RTL
|
// Call to RTL
|
||||||
|
|
|
@ -108,18 +108,18 @@ EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceTy& Device = Devices[device_id];
|
DeviceTy &Device = Devices[device_id];
|
||||||
|
|
||||||
#ifdef OMPTARGET_DEBUG
|
#ifdef OMPTARGET_DEBUG
|
||||||
for (int i=0; i<arg_num; ++i) {
|
for (int i = 0; i < arg_num; ++i) {
|
||||||
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
|
||||||
", Type=0x%" PRIx64 "\n", i, DPxPTR(args_base[i]), DPxPTR(args[i]),
|
", Type=0x%" PRIx64 "\n",
|
||||||
arg_sizes[i], arg_types[i]);
|
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int rc = target_data_begin(Device, arg_num, args_base,
|
int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
|
||||||
args, arg_sizes, arg_types);
|
arg_types, nullptr);
|
||||||
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
|
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,8 +171,8 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int rc = target_data_end(Device, arg_num, args_base,
|
int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
|
||||||
args, arg_sizes, arg_types);
|
arg_types, nullptr);
|
||||||
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
|
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -816,5 +816,8 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
||||||
return OFFLOAD_FAIL;
|
return OFFLOAD_FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Device.RTL->synchronize(device_id, &AsyncInfo);
|
if (Device.RTL->synchronize)
|
||||||
|
return Device.RTL->synchronize(device_id, &AsyncInfo);
|
||||||
|
|
||||||
|
return OFFLOAD_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,11 +20,11 @@
|
||||||
extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
|
extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
|
||||||
void **args_base, void **args, int64_t *arg_sizes,
|
void **args_base, void **args, int64_t *arg_sizes,
|
||||||
int64_t *arg_types,
|
int64_t *arg_types,
|
||||||
__tgt_async_info *async_info_ptr = nullptr);
|
__tgt_async_info *async_info_ptr);
|
||||||
|
|
||||||
extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
|
extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
|
||||||
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
void **args, int64_t *arg_sizes, int64_t *arg_types,
|
||||||
__tgt_async_info *async_info_ptr = nullptr);
|
__tgt_async_info *async_info_ptr);
|
||||||
|
|
||||||
extern int target_data_update(DeviceTy &Device, int32_t arg_num,
|
extern int target_data_update(DeviceTy &Device, int32_t arg_num,
|
||||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
|
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
|
||||||
|
|
|
@ -96,43 +96,49 @@ void RTLsTy::LoadRTLs() {
|
||||||
R.RTLName = Name;
|
R.RTLName = Name;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!(*((void**) &R.is_valid_binary) = dlsym(
|
if (!(*((void **)&R.is_valid_binary) =
|
||||||
dynlib_handle, "__tgt_rtl_is_valid_binary")))
|
dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.number_of_devices) = dlsym(
|
if (!(*((void **)&R.number_of_devices) =
|
||||||
dynlib_handle, "__tgt_rtl_number_of_devices")))
|
dlsym(dynlib_handle, "__tgt_rtl_number_of_devices")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.init_device) = dlsym(
|
if (!(*((void **)&R.init_device) =
|
||||||
dynlib_handle, "__tgt_rtl_init_device")))
|
dlsym(dynlib_handle, "__tgt_rtl_init_device")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.load_binary) = dlsym(
|
if (!(*((void **)&R.load_binary) =
|
||||||
dynlib_handle, "__tgt_rtl_load_binary")))
|
dlsym(dynlib_handle, "__tgt_rtl_load_binary")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.data_alloc) = dlsym(
|
if (!(*((void **)&R.data_alloc) =
|
||||||
dynlib_handle, "__tgt_rtl_data_alloc")))
|
dlsym(dynlib_handle, "__tgt_rtl_data_alloc")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.data_submit) = dlsym(
|
if (!(*((void **)&R.data_submit) =
|
||||||
dynlib_handle, "__tgt_rtl_data_submit")))
|
dlsym(dynlib_handle, "__tgt_rtl_data_submit")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.data_retrieve) = dlsym(
|
if (!(*((void **)&R.data_retrieve) =
|
||||||
dynlib_handle, "__tgt_rtl_data_retrieve")))
|
dlsym(dynlib_handle, "__tgt_rtl_data_retrieve")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.data_delete) = dlsym(
|
if (!(*((void **)&R.data_delete) =
|
||||||
dynlib_handle, "__tgt_rtl_data_delete")))
|
dlsym(dynlib_handle, "__tgt_rtl_data_delete")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.run_region) = dlsym(
|
if (!(*((void **)&R.run_region) =
|
||||||
dynlib_handle, "__tgt_rtl_run_target_region")))
|
dlsym(dynlib_handle, "__tgt_rtl_run_target_region")))
|
||||||
continue;
|
continue;
|
||||||
if (!(*((void**) &R.run_team_region) = dlsym(
|
if (!(*((void **)&R.run_team_region) =
|
||||||
dynlib_handle, "__tgt_rtl_run_target_team_region")))
|
dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region")))
|
||||||
continue;
|
|
||||||
if (!(*((void**) &R.synchronize) = dlsym(
|
|
||||||
dynlib_handle, "__tgt_rtl_synchronize")))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Optional functions
|
// Optional functions
|
||||||
*((void**) &R.init_requires) = dlsym(
|
*((void **)&R.init_requires) =
|
||||||
dynlib_handle, "__tgt_rtl_init_requires");
|
dlsym(dynlib_handle, "__tgt_rtl_init_requires");
|
||||||
|
*((void **)&R.data_submit_async) =
|
||||||
|
dlsym(dynlib_handle, "__tgt_rtl_data_submit_async");
|
||||||
|
*((void **)&R.data_retrieve_async) =
|
||||||
|
dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async");
|
||||||
|
*((void **)&R.run_region_async) =
|
||||||
|
dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async");
|
||||||
|
*((void **)&R.run_team_region_async) =
|
||||||
|
dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async");
|
||||||
|
*((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize");
|
||||||
|
|
||||||
// No devices are supported by this RTL?
|
// No devices are supported by this RTL?
|
||||||
if (!(R.NumberOfDevices = R.number_of_devices())) {
|
if (!(R.NumberOfDevices = R.number_of_devices())) {
|
||||||
|
@ -140,8 +146,8 @@ void RTLsTy::LoadRTLs() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
DP("Registering RTL %s supporting %d devices!\n",
|
DP("Registering RTL %s supporting %d devices!\n", R.RTLName.c_str(),
|
||||||
R.RTLName.c_str(), R.NumberOfDevices);
|
R.NumberOfDevices);
|
||||||
|
|
||||||
// The RTL is valid! Will save the information in the RTLs list.
|
// The RTL is valid! Will save the information in the RTLs list.
|
||||||
AllRTLs.push_back(R);
|
AllRTLs.push_back(R);
|
||||||
|
|
|
@ -30,16 +30,23 @@ struct RTLInfoTy {
|
||||||
typedef int32_t(init_device_ty)(int32_t);
|
typedef int32_t(init_device_ty)(int32_t);
|
||||||
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
|
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
|
||||||
typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
|
typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
|
||||||
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t,
|
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
|
||||||
__tgt_async_info *);
|
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
|
||||||
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t,
|
__tgt_async_info *);
|
||||||
__tgt_async_info *);
|
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
|
||||||
|
typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t,
|
||||||
|
__tgt_async_info *);
|
||||||
typedef int32_t(data_delete_ty)(int32_t, void *);
|
typedef int32_t(data_delete_ty)(int32_t, void *);
|
||||||
typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *, int32_t,
|
typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *,
|
||||||
__tgt_async_info *);
|
int32_t);
|
||||||
|
typedef int32_t(run_region_async_ty)(int32_t, void *, void **, ptrdiff_t *,
|
||||||
|
int32_t, __tgt_async_info *);
|
||||||
typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
|
typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
|
||||||
int32_t, int32_t, int32_t, uint64_t,
|
int32_t, int32_t, int32_t, uint64_t);
|
||||||
__tgt_async_info *);
|
typedef int32_t(run_team_region_async_ty)(int32_t, void *, void **,
|
||||||
|
ptrdiff_t *, int32_t, int32_t,
|
||||||
|
int32_t, uint64_t,
|
||||||
|
__tgt_async_info *);
|
||||||
typedef int64_t(init_requires_ty)(int64_t);
|
typedef int64_t(init_requires_ty)(int64_t);
|
||||||
typedef int64_t(synchronize_ty)(int64_t, __tgt_async_info *);
|
typedef int64_t(synchronize_ty)(int64_t, __tgt_async_info *);
|
||||||
|
|
||||||
|
@ -62,10 +69,14 @@ struct RTLInfoTy {
|
||||||
load_binary_ty *load_binary = nullptr;
|
load_binary_ty *load_binary = nullptr;
|
||||||
data_alloc_ty *data_alloc = nullptr;
|
data_alloc_ty *data_alloc = nullptr;
|
||||||
data_submit_ty *data_submit = nullptr;
|
data_submit_ty *data_submit = nullptr;
|
||||||
|
data_submit_async_ty *data_submit_async = nullptr;
|
||||||
data_retrieve_ty *data_retrieve = nullptr;
|
data_retrieve_ty *data_retrieve = nullptr;
|
||||||
|
data_retrieve_async_ty *data_retrieve_async = nullptr;
|
||||||
data_delete_ty *data_delete = nullptr;
|
data_delete_ty *data_delete = nullptr;
|
||||||
run_region_ty *run_region = nullptr;
|
run_region_ty *run_region = nullptr;
|
||||||
|
run_region_async_ty *run_region_async = nullptr;
|
||||||
run_team_region_ty *run_team_region = nullptr;
|
run_team_region_ty *run_team_region = nullptr;
|
||||||
|
run_team_region_async_ty *run_team_region_async = nullptr;
|
||||||
init_requires_ty *init_requires = nullptr;
|
init_requires_ty *init_requires = nullptr;
|
||||||
synchronize_ty *synchronize = nullptr;
|
synchronize_ty *synchronize = nullptr;
|
||||||
|
|
||||||
|
@ -94,10 +105,14 @@ struct RTLInfoTy {
|
||||||
load_binary = r.load_binary;
|
load_binary = r.load_binary;
|
||||||
data_alloc = r.data_alloc;
|
data_alloc = r.data_alloc;
|
||||||
data_submit = r.data_submit;
|
data_submit = r.data_submit;
|
||||||
|
data_submit_async = r.data_submit_async;
|
||||||
data_retrieve = r.data_retrieve;
|
data_retrieve = r.data_retrieve;
|
||||||
|
data_retrieve_async = r.data_retrieve_async;
|
||||||
data_delete = r.data_delete;
|
data_delete = r.data_delete;
|
||||||
run_region = r.run_region;
|
run_region = r.run_region;
|
||||||
|
run_region_async = r.run_region_async;
|
||||||
run_team_region = r.run_team_region;
|
run_team_region = r.run_team_region;
|
||||||
|
run_team_region_async = r.run_team_region_async;
|
||||||
init_requires = r.init_requires;
|
init_requires = r.init_requires;
|
||||||
isUsed = r.isUsed;
|
isUsed = r.isUsed;
|
||||||
synchronize = r.synchronize;
|
synchronize = r.synchronize;
|
||||||
|
|
Loading…
Reference in New Issue