[OpenMP][CUDA] Add resource pool for CUevent

Following D111954, this patch adds the resource pool for CUevent.

Reviewed By: ye-luo

Differential Revision: https://reviews.llvm.org/D116315
This commit is contained in:
Shilei Tian 2021-12-28 17:42:31 -05:00
parent 18ffb5dc25
commit 943d1d83dd
1 changed files with 42 additions and 31 deletions

View File

@ -114,21 +114,6 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
return OFFLOAD_SUCCESS;
}
int createEvent(void **P) {
CUevent Event = nullptr;
CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT);
if (Err != CUDA_SUCCESS) {
DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}
*P = Event;
return OFFLOAD_SUCCESS;
}
int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
@ -157,19 +142,6 @@ int syncEvent(void *EventPtr) {
return OFFLOAD_SUCCESS;
}
int destroyEvent(void *EventPtr) {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
CUresult Err = cuEventDestroy(Event);
if (Err != CUDA_SUCCESS) {
DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
}
// Structure contains per-device data
struct DeviceDataTy {
/// List that contains all the kernels.
@ -231,6 +203,28 @@ public:
}
};
/// Allocator for CUevent.
template <> class AllocatorTy<CUevent> {
public:
/// See AllocatorTy<T>::create.
int create(CUevent &Event) noexcept {
if (!checkResult(cuEventCreate(&Event, CU_EVENT_DEFAULT),
"Error returned from cuEventCreate\n"))
return OFFLOAD_FAIL;
return OFFLOAD_SUCCESS;
}
/// See AllocatorTy<T>::destroy.
int destroy(CUevent Event) noexcept {
if (!checkResult(cuEventDestroy(Event),
"Error returned from cuEventDestroy\n"))
return OFFLOAD_FAIL;
return OFFLOAD_SUCCESS;
}
};
/// A generic pool of resources where \p T is the resource type.
/// \p T should be copyable as the object is stored in \p std::vector .
template <typename T> class ResourcePoolTy {
@ -341,6 +335,8 @@ class DeviceRTLTy {
using StreamPoolTy = ResourcePoolTy<CUstream>;
std::vector<std::unique_ptr<StreamPoolTy>> StreamPool;
ResourcePoolTy<CUevent> EventPool;
std::vector<DeviceDataTy> DeviceData;
std::vector<CUmodule> Modules;
@ -493,7 +489,7 @@ public:
DeviceRTLTy()
: NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1),
EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED),
DynamicMemorySize(0) {
DynamicMemorySize(0), EventPool(AllocatorTy<CUevent>()) {
DP("Start initializing CUDA\n");
@ -575,6 +571,8 @@ public:
for (auto &S : StreamPool)
S.reset();
EventPool.clear();
for (DeviceDataTy &D : DeviceData) {
// Destroy context
if (D.Context) {
@ -1395,6 +1393,19 @@ public:
printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
}
int createEvent(void **P) {
CUevent Event = nullptr;
if (EventPool.acquire(Event) != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
*P = Event;
return OFFLOAD_SUCCESS;
}
int destroyEvent(void *EventPtr) {
EventPool.release(reinterpret_cast<CUevent>(EventPtr));
return OFFLOAD_SUCCESS;
}
int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo,
void *EventPtr) const {
CUstream Stream = getStream(DeviceId, AsyncInfo);
@ -1620,7 +1631,7 @@ void __tgt_rtl_print_device_info(int32_t device_id) {
int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
assert(event && "event is nullptr");
return createEvent(event);
return DeviceRTL.createEvent(event);
}
int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
@ -1650,7 +1661,7 @@ int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) {
int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
assert(event_ptr && "event is nullptr");
return destroyEvent(event_ptr);
return DeviceRTL.destroyEvent(event_ptr);
}
#ifdef __cplusplus