[LIBOMPTARGET]Fix PR44933: fix crash because of the too early deinitialization of libomptarget.

Summary:
Instead of using global variables with unpredicted time of
deinitialization, use dynamically allocated variables with functions
explicitly marked as global constructor/destructor and priority. This
allows to prevent the crash because of the incorrect order of dynamic
libraries deinitialization.

Reviewers: grokos, hfinkel

Subscribers: caomhin, kkwli0, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D74837
This commit is contained in:
Alexey Bataev 2020-02-19 09:41:50 -05:00
parent c5ce6d8b56
commit 63cef621f9
7 changed files with 127 additions and 73 deletions

View File

@ -21,9 +21,9 @@
#include <cstdlib>
EXTERN int omp_get_num_devices(void) {
RTLsMtx.lock();
RTLsMtx->lock();
size_t Devices_size = Devices.size();
RTLsMtx.unlock();
RTLsMtx->unlock();
DP("Call to omp_get_num_devices returning %zd\n", Devices_size);
@ -102,9 +102,9 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
return true;
}
RTLsMtx.lock();
RTLsMtx->lock();
size_t Devices_size = Devices.size();
RTLsMtx.unlock();
RTLsMtx->unlock();
if (Devices_size <= (size_t)device_num) {
DP("Call to omp_target_is_present with invalid device ID, returning "
"false\n");
@ -120,7 +120,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
// getTgtPtrBegin() function which means that there is no device
// corresponding point for ptr. This function should return false
// in that situation.
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
rc = !IsHostPtr;
DP("Call to omp_target_is_present returns %d\n", rc);
return rc;

View File

@ -189,7 +189,8 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
// maps are respected.
// In addition to the mapping rules above, the close map
// modifier forces the mapping of the variable to the device.
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) {
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
!HasCloseModifier) {
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
IsHostPtr = true;
@ -235,7 +236,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
(UpdateRefCount ? " updated" : ""),
HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str());
rc = (void *)tp;
} else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
} else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to
// use directly from the host.
@ -265,7 +266,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
bool HasCloseModifier) {
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
return OFFLOAD_SUCCESS;
// Check if the pointer is contained in any sub-nodes.
int rc;
@ -299,7 +300,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
void DeviceTy::init() {
// Make call to init_requires if it exists for this plugin.
if (RTL->init_requires)
RTL->init_requires(RTLs.RequiresFlags);
RTL->init_requires(RTLs->RequiresFlags);
int32_t rc = RTL->init_device(RTLDeviceID);
if (rc == OFFLOAD_SUCCESS) {
IsInit = true;
@ -363,9 +364,9 @@ bool device_is_ready(int device_num) {
DP("Checking whether device %d is ready.\n", device_num);
// Devices.size() can only change while registering a new
// library, so try to acquire the lock of RTLs' mutex.
RTLsMtx.lock();
RTLsMtx->lock();
size_t Devices_size = Devices.size();
RTLsMtx.unlock();
RTLsMtx->unlock();
if (Devices_size <= (size_t)device_num) {
DP("Device ID %d does not have a matching RTL\n", device_num);
return false;

View File

@ -71,19 +71,19 @@ static void HandleTargetOutcome(bool success) {
////////////////////////////////////////////////////////////////////////////////
/// adds requires flags
EXTERN void __tgt_register_requires(int64_t flags) {
RTLs.RegisterRequires(flags);
RTLs->RegisterRequires(flags);
}
////////////////////////////////////////////////////////////////////////////////
/// adds a target shared library to the target execution image
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
RTLs.RegisterLib(desc);
RTLs->RegisterLib(desc);
}
////////////////////////////////////////////////////////////////////////////////
/// unloads a target shared library
EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
RTLs.UnregisterLib(desc);
RTLs->UnregisterLib(desc);
}
/// creates host-to-target data mapping, stores it in the
@ -147,9 +147,9 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
device_id = omp_get_default_device();
}
RTLsMtx.lock();
RTLsMtx->lock();
size_t Devices_size = Devices.size();
RTLsMtx.unlock();
RTLsMtx->unlock();
if (Devices_size <= (size_t)device_id) {
DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id);
HandleTargetOutcome(false);
@ -343,8 +343,8 @@ EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
loop_tripcount);
TblMapMtx.lock();
TblMapMtx->lock();
Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
loop_tripcount);
TblMapMtx.unlock();
TblMapMtx->unlock();
}

View File

@ -67,10 +67,10 @@ static int InitLibrary(DeviceTy& Device) {
int rc = OFFLOAD_SUCCESS;
Device.PendingGlobalsMtx.lock();
TrlTblMtx.lock();
TrlTblMtx->lock();
for (HostEntriesBeginToTransTableTy::iterator
ii = HostEntriesBeginToTransTable.begin();
ii != HostEntriesBeginToTransTable.end(); ++ii) {
ii = HostEntriesBeginToTransTable->begin();
ii != HostEntriesBeginToTransTable->end(); ++ii) {
TranslationTable *TransTable = &ii->second;
if (TransTable->HostTable.EntriesBegin ==
TransTable->HostTable.EntriesEnd) {
@ -149,7 +149,7 @@ static int InitLibrary(DeviceTy& Device) {
}
Device.DataMapMtx.unlock();
}
TrlTblMtx.unlock();
TrlTblMtx->unlock();
if (rc != OFFLOAD_SUCCESS) {
Device.PendingGlobalsMtx.unlock();
@ -299,7 +299,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
bool copy = false;
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
HasCloseModifier) {
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
copy = true;
@ -401,7 +401,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
bool CopyMember = false;
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
HasCloseModifier) {
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
@ -416,7 +416,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
}
if ((DelEntry || Always || CopyMember) &&
!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin)) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@ -499,7 +499,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
continue;
}
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
DPxPTR(HstPtrBegin));
@ -590,14 +590,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
// Find the table information in the map or look it up in the translation
// tables.
TableMap *TM = 0;
TblMapMtx.lock();
HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr);
if (TableMapIt == HostPtrToTableMap.end()) {
TblMapMtx->lock();
HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap->find(host_ptr);
if (TableMapIt == HostPtrToTableMap->end()) {
// We don't have a map. So search all the registered libraries.
TrlTblMtx.lock();
TrlTblMtx->lock();
for (HostEntriesBeginToTransTableTy::iterator
ii = HostEntriesBeginToTransTable.begin(),
ie = HostEntriesBeginToTransTable.end();
ii = HostEntriesBeginToTransTable->begin(),
ie = HostEntriesBeginToTransTable->end();
!TM && ii != ie; ++ii) {
// get the translation table (which contains all the good info).
TranslationTable *TransTable = &ii->second;
@ -611,17 +611,17 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
continue;
// we got a match, now fill the HostPtrToTableMap so that we
// may avoid this search next time.
TM = &HostPtrToTableMap[host_ptr];
TM = &(*HostPtrToTableMap)[host_ptr];
TM->Table = TransTable;
TM->Index = i;
break;
}
}
TrlTblMtx.unlock();
TrlTblMtx->unlock();
} else {
TM = &TableMapIt->second;
}
TblMapMtx.unlock();
TblMapMtx->unlock();
// No map for this host pointer found!
if (!TM) {
@ -631,11 +631,11 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
}
// get target table.
TrlTblMtx.lock();
TrlTblMtx->lock();
assert(TM->Table->TargetsTable.size() > (size_t)device_id &&
"Not expecting a device ID outside the table's bounds!");
__tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id];
TrlTblMtx.unlock();
TrlTblMtx->unlock();
assert(TargetTable && "Global data has not been mapped\n");
// Move data to device.
@ -682,7 +682,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
DPxPTR(HstPtrVal));
continue;
}
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("Unified memory is active, no need to map lambda captured"
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));
@ -765,14 +765,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
// Pop loop trip count
uint64_t ltc = 0;
TblMapMtx.lock();
TblMapMtx->lock();
auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL));
if (I != Device.LoopTripCnt.end()) {
ltc = I->second;
Device.LoopTripCnt.erase(I);
DP("loop trip count is %lu.\n", ltc);
}
TblMapMtx.unlock();
TblMapMtx->unlock();
// Launch device execution.
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",

View File

@ -28,14 +28,34 @@ static const char *RTLNames[] = {
/* CUDA target */ "libomptarget.rtl.cuda.so",
/* AArch64 target */ "libomptarget.rtl.aarch64.so"};
RTLsTy RTLs;
std::mutex RTLsMtx;
RTLsTy *RTLs;
std::mutex *RTLsMtx;
HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
std::mutex TrlTblMtx;
HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable;
std::mutex *TrlTblMtx;
HostPtrToTableMapTy HostPtrToTableMap;
std::mutex TblMapMtx;
HostPtrToTableMapTy *HostPtrToTableMap;
std::mutex *TblMapMtx;
__attribute__((constructor(0))) void init() {
DP("Init target library!\n");
RTLs = new RTLsTy();
RTLsMtx = new std::mutex();
HostEntriesBeginToTransTable = new HostEntriesBeginToTransTableTy();
TrlTblMtx = new std::mutex();
HostPtrToTableMap = new HostPtrToTableMapTy();
TblMapMtx = new std::mutex();
}
__attribute__((destructor(0))) void deinit() {
DP("Deinit target library!\n");
delete RTLs;
delete RTLsMtx;
delete HostEntriesBeginToTransTable;
delete TrlTblMtx;
delete HostPtrToTableMap;
delete TblMapMtx;
}
void RTLsTy::LoadRTLs() {
#ifdef OMPTARGET_DEBUG
@ -234,7 +254,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
// Attempt to load all plugins available in the system.
std::call_once(initFlag, &RTLsTy::LoadRTLs, this);
RTLsMtx.lock();
RTLsMtx->lock();
// Register the images with the RTLs that understand them, if any.
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
// Obtain the image.
@ -244,7 +264,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image.
for (auto &R : RTLs.AllRTLs) {
for (auto &R : AllRTLs) {
if (!R.is_valid_binary(img)) {
DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
DPxPTR(img->ImageStart), R.RTLName.c_str());
@ -269,35 +289,34 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
}
// Initialize the index of this RTL and save it in the used RTLs.
R.Idx = (RTLs.UsedRTLs.empty())
R.Idx = (UsedRTLs.empty())
? 0
: RTLs.UsedRTLs.back()->Idx +
RTLs.UsedRTLs.back()->NumberOfDevices;
: UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices;
assert((size_t) R.Idx == start &&
"RTL index should equal the number of devices used so far.");
R.isUsed = true;
RTLs.UsedRTLs.push_back(&R);
UsedRTLs.push_back(&R);
DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx);
}
// Initialize (if necessary) translation table for this library.
TrlTblMtx.lock();
if(!HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)){
TrlTblMtx->lock();
if(!HostEntriesBeginToTransTable->count(desc->HostEntriesBegin)){
TranslationTable &tt =
HostEntriesBeginToTransTable[desc->HostEntriesBegin];
(*HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
tt.HostTable.EntriesBegin = desc->HostEntriesBegin;
tt.HostTable.EntriesEnd = desc->HostEntriesEnd;
}
// Retrieve translation table for this library.
TranslationTable &TransTable =
HostEntriesBeginToTransTable[desc->HostEntriesBegin];
(*HostEntriesBeginToTransTable)[desc->HostEntriesBegin];
DP("Registering image " DPxMOD " with RTL %s!\n",
DPxPTR(img->ImageStart), R.RTLName.c_str());
RegisterImageIntoTranslationTable(TransTable, R, img);
TrlTblMtx.unlock();
TrlTblMtx->unlock();
FoundRTL = &R;
// Load ctors/dtors for static objects
@ -311,7 +330,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart));
}
}
RTLsMtx.unlock();
RTLsMtx->unlock();
DP("Done registering entries!\n");
@ -320,7 +339,7 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
DP("Unloading target library!\n");
RTLsMtx.lock();
RTLsMtx->lock();
// Find which RTL understands each image, if any.
for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
// Obtain the image.
@ -330,7 +349,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
// Scan the RTLs that have associated images until we find one that supports
// the current image. We only need to scan RTLs that are already being used.
for (auto *R : RTLs.UsedRTLs) {
for (auto *R : UsedRTLs) {
assert(R->isUsed && "Expecting used RTLs.");
@ -376,28 +395,28 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
DPxPTR(img->ImageStart));
}
}
RTLsMtx.unlock();
RTLsMtx->unlock();
DP("Done unregistering images!\n");
// Remove entries from HostPtrToTableMap
TblMapMtx.lock();
TblMapMtx->lock();
for (__tgt_offload_entry *cur = desc->HostEntriesBegin;
cur < desc->HostEntriesEnd; ++cur) {
HostPtrToTableMap.erase(cur->addr);
HostPtrToTableMap->erase(cur->addr);
}
// Remove translation table for this descriptor.
auto tt = HostEntriesBeginToTransTable.find(desc->HostEntriesBegin);
if (tt != HostEntriesBeginToTransTable.end()) {
auto tt = HostEntriesBeginToTransTable->find(desc->HostEntriesBegin);
if (tt != HostEntriesBeginToTransTable->end()) {
DP("Removing translation table for descriptor " DPxMOD "\n",
DPxPTR(desc->HostEntriesBegin));
HostEntriesBeginToTransTable.erase(tt);
HostEntriesBeginToTransTable->erase(tt);
} else {
DP("Translation table for descriptor " DPxMOD " cannot be found, probably "
"it has been already removed.\n", DPxPTR(desc->HostEntriesBegin));
}
TblMapMtx.unlock();
TblMapMtx->unlock();
// TODO: Remove RTL and the devices it manages if it's not used anymore?
// TODO: Write some RTL->unload_image(...) function?

View File

@ -134,8 +134,8 @@ public:
// Unregister a shared library from all RTLs.
void UnregisterLib(__tgt_bin_desc *desc);
};
extern RTLsTy RTLs;
extern std::mutex RTLsMtx;
extern RTLsTy *RTLs;
extern std::mutex *RTLsMtx;
/// Map between the host entry begin and the translation table. Each
@ -153,8 +153,8 @@ struct TranslationTable {
};
typedef std::map<__tgt_offload_entry *, TranslationTable>
HostEntriesBeginToTransTableTy;
extern HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
extern std::mutex TrlTblMtx;
extern HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable;
extern std::mutex *TrlTblMtx;
/// Map between the host ptr and a table index
struct TableMap {
@ -165,7 +165,7 @@ struct TableMap {
: Table(table), Index(index) {}
};
typedef std::map<void *, TableMap> HostPtrToTableMapTy;
extern HostPtrToTableMapTy HostPtrToTableMap;
extern std::mutex TblMapMtx;
extern HostPtrToTableMapTy *HostPtrToTableMap;
extern std::mutex *TblMapMtx;
#endif

View File

@ -0,0 +1,34 @@
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-aarch64-unknown-linux-gnu -ldl && %libomptarget-run-aarch64-unknown-linux-gnu %t.so 2>&1 | %fcheck-aarch64-unknown-linux-gnu
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-powerpc64-ibm-linux-gnu -ldl && %libomptarget-run-powerpc64-ibm-linux-gnu %t.so 2>&1 | %fcheck-powerpc64-ibm-linux-gnu
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-powerpc64le-ibm-linux-gnu -ldl && %libomptarget-run-powerpc64le-ibm-linux-gnu %t.so 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-x86_64-pc-linux-gnu -ldl && %libomptarget-run-x86_64-pc-linux-gnu %t.so 2>&1 | %fcheck-x86_64-pc-linux-gnu
#ifdef SHARED
#include <stdio.h>
int foo() {
#pragma omp target
;
printf("%s\n", "DONE.");
return 0;
}
#else
#include <dlfcn.h>
#include <stdio.h>
int main(int argc, char **argv) {
void *Handle = dlopen(argv[1], RTLD_NOW);
int (*Foo)(void);
if (Handle == NULL) {
printf("dlopen() failed: %s\n", dlerror());
return 1;
}
Foo = (int (*)(void)) dlsym(Handle, "foo");
if (Handle == NULL) {
printf("dlsym() failed: %s\n", dlerror());
return 1;
}
// CHECK: DONE.
// CHECK-NOT: {{abort|fault}}
return Foo();
}
#endif