diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index 43d05bb50b22..9d9b26ffd049 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -209,6 +209,18 @@ public: return States->MayContainAttachedPointers; } + /// Helper to make sure the entry is locked in a scope. + /// TODO: We should generalize this and use it for all our objects that use + /// lock/unlock methods. + struct LockGuard { + const HostDataToTargetTy &Entry; + + public: + LockGuard(const HostDataToTargetTy &Entry) : Entry(Entry) { Entry.lock(); } + ~LockGuard() { Entry.unlock(); } + }; + +private: void lock() const { States->UpdateMtx.lock(); } void unlock() const { States->UpdateMtx.unlock(); } diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 922441b4c051..4797c830c0c1 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "device.h" -#include "omptarget.h" #include "private.h" #include "rtl.h" @@ -20,8 +19,8 @@ #include #include -int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, - AsyncInfoTy &AsyncInfo) const { +int HostDataToTargetTy::addEventIfNecessary( + DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { // First, check if the user disabled atomic map transfer/malloc/dealloc. if (!PM->UseEventsForAtomicTransfers) return OFFLOAD_SUCCESS; @@ -61,7 +60,7 @@ DeviceTy::~DeviceTy() { } int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { - std::lock_guard LG(DataMapMtx); + DataMapMtx.lock(); // Check if entry exists auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); @@ -69,6 +68,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { // Mapping already exists bool isValid = search->HstPtrEnd == (uintptr_t)HstPtrBegin + Size && search->TgtPtrBegin == (uintptr_t)TgtPtrBegin; + DataMapMtx.unlock(); if (isValid) { DP("Attempt to re-associate the same device ptr+offset with the same " "host ptr, nothing to do\n"); @@ -99,11 +99,13 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { newEntry.dynRefCountToStr().c_str(), newEntry.holdRefCountToStr().c_str()); (void)newEntry; + DataMapMtx.unlock(); + return OFFLOAD_SUCCESS; } int DeviceTy::disassociatePtr(void *HstPtrBegin) { - std::lock_guard LG(DataMapMtx); + DataMapMtx.lock(); auto search = HostDataToTargetMap.find(HstPtrBeginTy{(uintptr_t)HstPtrBegin}); if (search != HostDataToTargetMap.end()) { @@ -120,6 +122,7 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { if (Event) destroyEvent(Event); HostDataToTargetMap.erase(search); + DataMapMtx.unlock(); return OFFLOAD_SUCCESS; } else { REPORT("Trying to disassociate a pointer which was not mapped via " @@ -130,6 +133,7 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { } // Mapping not found + DataMapMtx.unlock(); return OFFLOAD_FAIL; } @@ -179,11 +183,13 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { return lr; } -TargetPointerResultTy DeviceTy::getTargetPointer( - void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, - bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) { +TargetPointerResultTy +DeviceTy::getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, + bool UpdateRefCount, bool HasCloseModifier, + bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo) { void *TargetPointer = nullptr; bool IsHostPtr = false; bool IsNew = false; @@ -280,7 +286,7 @@ TargetPointerResultTy DeviceTy::getTargetPointer( if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) { // Lock the entry before releasing the mapping table lock such that another // thread that could issue data movement will get the right result. - std::lock_guard LG(*Entry); + HostDataToTargetTy::LockGuard LG(*Entry); // Release the mapping table lock right after the entry is locked. DataMapMtx.unlock(); @@ -294,7 +300,8 @@ TargetPointerResultTy DeviceTy::getTargetPointer( // pointer points to a corrupted memory region so it doesn't make any // sense to continue to use it. TargetPointer = nullptr; - } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS) + } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != + OFFLOAD_SUCCESS) return {{false /* IsNewEntry */, false /* IsHostPointer */}, {} /* MapTableEntry */, nullptr /* TargetPointer */}; @@ -306,7 +313,7 @@ TargetPointerResultTy DeviceTy::getTargetPointer( // Note: Entry might be nullptr because of zero length array section. if (Entry != HostDataToTargetListTy::iterator() && !IsHostPtr && !HasPresentModifier) { - std::lock_guard LG(*Entry); + HostDataToTargetTy::LockGuard LG(*Entry); void *Event = Entry->getEvent(); if (Event) { int Ret = waitEvent(Event, AsyncInfo); @@ -336,7 +343,7 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool IsNew = false; IsHostPtr = false; IsLast = false; - std::lock_guard LG(DataMapMtx); + DataMapMtx.lock(); LookupResult lr = lookupMapping(HstPtrBegin, Size); if (lr.Flags.IsContained || @@ -387,6 +394,7 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, TargetPointer = HstPtrBegin; } + DataMapMtx.unlock(); return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer}; } @@ -436,6 +444,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, Ret = OFFLOAD_FAIL; } + DataMapMtx.unlock(); return Ret; } @@ -469,8 +478,9 @@ int32_t DeviceTy::initOnce() { // Load binary to device. __tgt_target_table *DeviceTy::load_binary(void *Img) { - std::lock_guardMtx)> LG(RTL->Mtx); + RTL->Mtx.lock(); __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img); + RTL->Mtx.unlock(); return rc; } @@ -632,11 +642,9 @@ bool device_is_ready(int device_num) { DP("Checking whether device %d is ready.\n", device_num); // Devices.size() can only change while registering a new // library, so try to acquire the lock of RTLs' mutex. - size_t DevicesSize; - { - std::lock_guardRTLsMtx)> LG(PM->RTLsMtx); - DevicesSize = PM->Devices.size(); - } + PM->RTLsMtx.lock(); + size_t DevicesSize = PM->Devices.size(); + PM->RTLsMtx.unlock(); if (DevicesSize <= (size_t)device_num) { DP("Device ID %d does not have a matching RTL\n", device_num); return false; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index de9f00ae617d..015e69af9058 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -79,101 +79,96 @@ static int InitLibrary(DeviceTy &Device) { bool supportsEmptyImages = Device.RTL->supports_empty_images && Device.RTL->supports_empty_images() > 0; - std::lock_guard LG( - Device.PendingGlobalsMtx); - { - std::lock_guardTrlTblMtx)> LG(PM->TrlTblMtx); - for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) { - TranslationTable *TransTable = - &PM->HostEntriesBeginToTransTable[HostEntriesBegin]; - if (TransTable->HostTable.EntriesBegin == - TransTable->HostTable.EntriesEnd && - !supportsEmptyImages) { - // No host entry so no need to proceed - continue; - } + Device.PendingGlobalsMtx.lock(); + PM->TrlTblMtx.lock(); + for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) { + TranslationTable *TransTable = + &PM->HostEntriesBeginToTransTable[HostEntriesBegin]; + if (TransTable->HostTable.EntriesBegin == + TransTable->HostTable.EntriesEnd && + !supportsEmptyImages) { + // No host entry so no need to proceed + continue; + } - if (TransTable->TargetsTable[device_id] != 0) { - // Library entries have already been processed - continue; - } + if (TransTable->TargetsTable[device_id] != 0) { + // Library entries have already been processed + continue; + } - // 1) get image. - assert(TransTable->TargetsImages.size() > (size_t)device_id && - "Not expecting a device ID outside the table's bounds!"); - __tgt_device_image *img = TransTable->TargetsImages[device_id]; - if (!img) { - REPORT("No image loaded for device id %d.\n", device_id); - rc = OFFLOAD_FAIL; - break; - } - // 2) load image into the target table. - __tgt_target_table *TargetTable = TransTable->TargetsTable[device_id] = - Device.load_binary(img); - // Unable to get table for this image: invalidate image and fail. - if (!TargetTable) { - REPORT("Unable to generate entries table for device id %d.\n", - device_id); - TransTable->TargetsImages[device_id] = 0; - rc = OFFLOAD_FAIL; - break; - } + // 1) get image. + assert(TransTable->TargetsImages.size() > (size_t)device_id && + "Not expecting a device ID outside the table's bounds!"); + __tgt_device_image *img = TransTable->TargetsImages[device_id]; + if (!img) { + REPORT("No image loaded for device id %d.\n", device_id); + rc = OFFLOAD_FAIL; + break; + } + // 2) load image into the target table. + __tgt_target_table *TargetTable = TransTable->TargetsTable[device_id] = + Device.load_binary(img); + // Unable to get table for this image: invalidate image and fail. + if (!TargetTable) { + REPORT("Unable to generate entries table for device id %d.\n", device_id); + TransTable->TargetsImages[device_id] = 0; + rc = OFFLOAD_FAIL; + break; + } - // Verify whether the two table sizes match. - size_t hsize = - TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin; - size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; + // Verify whether the two table sizes match. + size_t hsize = + TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin; + size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; - // Invalid image for these host entries! - if (hsize != tsize) { - REPORT( - "Host and Target tables mismatch for device id %d [%zx != %zx].\n", - device_id, hsize, tsize); - TransTable->TargetsImages[device_id] = 0; - TransTable->TargetsTable[device_id] = 0; - rc = OFFLOAD_FAIL; - break; - } + // Invalid image for these host entries! + if (hsize != tsize) { + REPORT("Host and Target tables mismatch for device id %d [%zx != %zx].\n", + device_id, hsize, tsize); + TransTable->TargetsImages[device_id] = 0; + TransTable->TargetsTable[device_id] = 0; + rc = OFFLOAD_FAIL; + break; + } - // process global data that needs to be mapped. - std::lock_guard LG(Device.DataMapMtx); + // process global data that needs to be mapped. + Device.DataMapMtx.lock(); + __tgt_target_table *HostTable = &TransTable->HostTable; + for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin, + *CurrHostEntry = HostTable->EntriesBegin, + *EntryDeviceEnd = TargetTable->EntriesEnd; + CurrDeviceEntry != EntryDeviceEnd; + CurrDeviceEntry++, CurrHostEntry++) { + if (CurrDeviceEntry->size != 0) { + // has data. + assert(CurrDeviceEntry->size == CurrHostEntry->size && + "data size mismatch"); - __tgt_target_table *HostTable = &TransTable->HostTable; - for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin, - *CurrHostEntry = HostTable->EntriesBegin, - *EntryDeviceEnd = TargetTable->EntriesEnd; - CurrDeviceEntry != EntryDeviceEnd; - CurrDeviceEntry++, CurrHostEntry++) { - if (CurrDeviceEntry->size != 0) { - // has data. - assert(CurrDeviceEntry->size == CurrHostEntry->size && - "data size mismatch"); - - // Fortran may use multiple weak declarations for the same symbol, - // therefore we must allow for multiple weak symbols to be loaded from - // the fat binary. Treat these mappings as any other "regular" - // mapping. Add entry to map. - if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size)) - continue; - DP("Add mapping from host " DPxMOD " to device " DPxMOD - " with size %zu" - "\n", - DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr), - CurrDeviceEntry->size); - Device.HostDataToTargetMap.emplace( - (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/, - (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, - (uintptr_t)CurrHostEntry->addr + - CurrHostEntry->size /*HstPtrEnd*/, - (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, - false /*UseHoldRefCount*/, nullptr /*Name*/, - true /*IsRefCountINF*/); - } + // Fortran may use multiple weak declarations for the same symbol, + // therefore we must allow for multiple weak symbols to be loaded from + // the fat binary. Treat these mappings as any other "regular" mapping. + // Add entry to map. + if (Device.getTgtPtrBegin(CurrHostEntry->addr, CurrHostEntry->size)) + continue; + DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu" + "\n", + DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr), + CurrDeviceEntry->size); + Device.HostDataToTargetMap.emplace( + (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/, + (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/, + (uintptr_t)CurrHostEntry->addr + CurrHostEntry->size /*HstPtrEnd*/, + (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/, + false /*UseHoldRefCount*/, nullptr /*Name*/, + true /*IsRefCountINF*/); } } + Device.DataMapMtx.unlock(); } + PM->TrlTblMtx.unlock(); if (rc != OFFLOAD_SUCCESS) { + Device.PendingGlobalsMtx.unlock(); return rc; } @@ -193,6 +188,7 @@ static int InitLibrary(DeviceTy &Device) { nullptr, nullptr, nullptr, 1, 1, true /*team*/, AsyncInfo); if (rc != OFFLOAD_SUCCESS) { REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); + Device.PendingGlobalsMtx.unlock(); return OFFLOAD_FAIL; } } @@ -206,6 +202,7 @@ static int InitLibrary(DeviceTy &Device) { return OFFLOAD_FAIL; } Device.HasPendingGlobals = false; + Device.PendingGlobalsMtx.unlock(); return OFFLOAD_SUCCESS; } @@ -249,7 +246,7 @@ void handleTargetOutcome(bool Success, ident_t *Loc) { } static void handleDefaultTargetOffload() { - std::lock_guardTargetOffloadMtx)> LG(PM->TargetOffloadMtx); + PM->TargetOffloadMtx.lock(); if (PM->TargetOffloadPolicy == tgt_default) { if (omp_get_num_devices() > 0) { DP("Default TARGET OFFLOAD policy is now mandatory " @@ -261,6 +258,7 @@ static void handleDefaultTargetOffload() { PM->TargetOffloadPolicy = tgt_disabled; } } + PM->TargetOffloadMtx.unlock(); } static bool isOffloadDisabled() { @@ -316,13 +314,10 @@ bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) { DeviceTy &Device = *PM->Devices[DeviceID]; // Check whether global data has been mapped for this device - bool HasPendingGlobals; - { - std::lock_guard LG( - Device.PendingGlobalsMtx); - HasPendingGlobals = Device.HasPendingGlobals; - } - if (HasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) { + Device.PendingGlobalsMtx.lock(); + bool hasPendingGlobals = Device.HasPendingGlobals; + Device.PendingGlobalsMtx.unlock(); + if (hasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) { REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID); handleTargetOutcome(false, Loc); return true; @@ -577,8 +572,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, } if (UpdateDevPtr) { - std::lock_guard LG( - *Pointer_TPR.MapTableEntry); + HostDataToTargetTy::LockGuard LG(*Pointer_TPR.MapTableEntry); Device.ShadowMtx.unlock(); DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", @@ -641,7 +635,7 @@ static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, uintptr_t LB = (uintptr_t)Begin; uintptr_t UB = LB + Size; // Now we are looking into the shadow map so we need to lock it. - std::lock_guard LG(Device.ShadowMtx); + Device.ShadowMtx.lock(); for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); Itr != Device.ShadowPtrMap.end();) { uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first; @@ -658,6 +652,7 @@ static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, if (CB(Itr) == OFFLOAD_FAIL) break; } + Device.ShadowMtx.unlock(); } } // namespace