From 943d1d83dd7799c6371a42c224bfe072ddf2fe88 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 28 Dec 2021 17:42:31 -0500 Subject: [PATCH] [OpenMP][CUDA] Add resource pool for CUevent Following D111954, this patch adds the resource pool for CUevent. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D116315 --- openmp/libomptarget/plugins/cuda/src/rtl.cpp | 73 +++++++++++--------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 1afee7ce3a02..970a574b2eb3 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -114,21 +114,6 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size, return OFFLOAD_SUCCESS; } -int createEvent(void **P) { - CUevent Event = nullptr; - - CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT); - if (Err != CUDA_SUCCESS) { - DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event)); - CUDA_ERR_STRING(Err); - return OFFLOAD_FAIL; - } - - *P = Event; - - return OFFLOAD_SUCCESS; -} - int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { CUstream Stream = reinterpret_cast(AsyncInfo->Queue); CUevent Event = reinterpret_cast(EventPtr); @@ -157,19 +142,6 @@ int syncEvent(void *EventPtr) { return OFFLOAD_SUCCESS; } -int destroyEvent(void *EventPtr) { - CUevent Event = reinterpret_cast(EventPtr); - - CUresult Err = cuEventDestroy(Event); - if (Err != CUDA_SUCCESS) { - DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event)); - CUDA_ERR_STRING(Err); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; -} - // Structure contains per-device data struct DeviceDataTy { /// List that contains all the kernels. @@ -231,6 +203,28 @@ public: } }; +/// Allocator for CUevent. +template <> class AllocatorTy { +public: + /// See AllocatorTy::create. + int create(CUevent &Event) noexcept { + if (!checkResult(cuEventCreate(&Event, CU_EVENT_DEFAULT), + "Error returned from cuEventCreate\n")) + return OFFLOAD_FAIL; + + return OFFLOAD_SUCCESS; + } + + /// See AllocatorTy::destroy. + int destroy(CUevent Event) noexcept { + if (!checkResult(cuEventDestroy(Event), + "Error returned from cuEventDestroy\n")) + return OFFLOAD_FAIL; + + return OFFLOAD_SUCCESS; + } +}; + /// A generic pool of resources where \p T is the resource type. /// \p T should be copyable as the object is stored in \p std::vector . template class ResourcePoolTy { @@ -341,6 +335,8 @@ class DeviceRTLTy { using StreamPoolTy = ResourcePoolTy; std::vector> StreamPool; + ResourcePoolTy EventPool; + std::vector DeviceData; std::vector Modules; @@ -493,7 +489,7 @@ public: DeviceRTLTy() : NumberOfDevices(0), EnvNumTeams(-1), EnvTeamLimit(-1), EnvTeamThreadLimit(-1), RequiresFlags(OMP_REQ_UNDEFINED), - DynamicMemorySize(0) { + DynamicMemorySize(0), EventPool(AllocatorTy()) { DP("Start initializing CUDA\n"); @@ -575,6 +571,8 @@ public: for (auto &S : StreamPool) S.reset(); + EventPool.clear(); + for (DeviceDataTy &D : DeviceData) { // Destroy context if (D.Context) { @@ -1395,6 +1393,19 @@ public: printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2); } + int createEvent(void **P) { + CUevent Event = nullptr; + if (EventPool.acquire(Event) != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + *P = Event; + return OFFLOAD_SUCCESS; + } + + int destroyEvent(void *EventPtr) { + EventPool.release(reinterpret_cast(EventPtr)); + return OFFLOAD_SUCCESS; + } + int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo, void *EventPtr) const { CUstream Stream = getStream(DeviceId, AsyncInfo); @@ -1620,7 +1631,7 @@ void __tgt_rtl_print_device_info(int32_t device_id) { int32_t __tgt_rtl_create_event(int32_t device_id, void **event) { assert(event && "event is nullptr"); - return createEvent(event); + return DeviceRTL.createEvent(event); } int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr, @@ -1650,7 +1661,7 @@ int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) { int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) { assert(event_ptr && "event is nullptr"); - return destroyEvent(event_ptr); + return DeviceRTL.destroyEvent(event_ptr); } #ifdef __cplusplus