From 29be7c9c4f5d03be21ac80dc178a730aa10452f9 Mon Sep 17 00:00:00 2001 From: Daniel McIntosh Date: Wed, 8 Dec 2021 13:16:21 -0500 Subject: [PATCH] [libcxxabi] Re-organized inheritance structure to remove CRTP in cxa_guard Currently, the `InitByte...` classes inherit from `GuardObject` so they can access the `base_address`, `init_byte_address` and `thread_id_address`. Then, since `GuardObject` needs to call `acquire`/`release`/`abort_init_byte`, it uses the curiously recurring template pattern (CRTP). This is rather messy. Instead, we'll have `GuardObject` contain an instance of `InitByte`, and pass it the addresses it needs in the constructor. `GuardObject` doesn't need the addresses anyways, so it makes more sense for `InitByte` to keep them instead of `GuardObject`. Then, `GuardObject` can call `acquire`/`release`/`abort` as one of `InitByte`'s member functions. Organizing things this way not only gets rid of the use of the CRTP, but also improves separation of concerns a bit since the `InitByte` classes are no longer indirectly responsible for things because of their inheritance from `GuardObject`. This means we no longer have strange things like calling `InitByteFutex.cxa_guard_acquire`, instead we call `GuardObject.cxa_guard_acquire`. This is the 4th of 5 changes to overhaul cxa_guard. See D108343 for what the final result will be. Depends on D115367 Reviewed By: ldionne, #libc_abi Differential Revision: https://reviews.llvm.org/D115368 --- libcxxabi/src/cxa_guard_impl.h | 295 +++++++++++++---------- libcxxabi/test/guard_test_basic.pass.cpp | 21 +- 2 files changed, 182 insertions(+), 134 deletions(-) diff --git a/libcxxabi/src/cxa_guard_impl.h b/libcxxabi/src/cxa_guard_impl.h index 36477e4da4f0..d8d991be062f 100644 --- a/libcxxabi/src/cxa_guard_impl.h +++ b/libcxxabi/src/cxa_guard_impl.h @@ -23,9 +23,15 @@ * the thread currently performing initialization is stored in the second word. * * Guard Object Layout: - * ------------------------------------------------------------------------- - * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... | - * ------------------------------------------------------------------------- + * --------------------------------------------------------------------------- + * | a+0: guard byte | a+1: init byte | a+2: unused ... | a+4: thread-id ... | + * --------------------------------------------------------------------------- + * + * Note that we don't do what the ABI docs suggest (put a mutex in the guard + * object which we acquire in cxa_guard_acquire and release in + * cxa_guard_release). Instead we use the init byte to imitate that behaviour, + * but without actually holding anything mutex related between aquire and + * release/abort. * * Access Protocol: * For each implementation the guard byte is checked and set before accessing @@ -176,8 +182,6 @@ struct GuardByte { public: /// The guard byte portion of cxa_guard_acquire. Returns true if /// initialization has already been completed. - /// Note: On completion, we haven't 'acquired' ownership of anything mutex - /// related. The name is simply referring to __cxa_guard_acquire. bool acquire() { // if guard_byte is non-zero, we have already completed initialization // (i.e. release has been called) @@ -185,8 +189,6 @@ public: } /// The guard byte portion of cxa_guard_release. - /// Note: On completion, we haven't 'released' ownership of anything mutex - /// related. The name is simply referring to __cxa_guard_release. void release() { guard_byte.store(COMPLETE_BIT, std::_AO_Release); } /// The guard byte portion of cxa_guard_abort. @@ -197,89 +199,70 @@ private: }; //===----------------------------------------------------------------------===// -// GuardBase +// InitByte Implementations //===----------------------------------------------------------------------===// - -enum class AcquireResult { - INIT_IS_DONE, - INIT_IS_PENDING, -}; -constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; -constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; - -template -struct GuardObject { - GuardObject() = delete; - GuardObject(GuardObject const&) = delete; - GuardObject& operator=(GuardObject const&) = delete; - -private: - GuardByte guard_byte; - -public: - /// ARM Constructor - explicit GuardObject(uint32_t* g) - : guard_byte(reinterpret_cast(g)), base_address(g), - init_byte_address(reinterpret_cast(g) + 1), thread_id_address(nullptr) {} - - /// Itanium Constructor - explicit GuardObject(uint64_t* g) - : guard_byte(reinterpret_cast(g)), base_address(g), - init_byte_address(reinterpret_cast(g) + 1), thread_id_address(reinterpret_cast(g) + 1) {} - - /// Implements __cxa_guard_acquire. - AcquireResult cxa_guard_acquire() { - if (guard_byte.acquire()) - return INIT_IS_DONE; - return derived()->acquire_init_byte(); - } - - /// Implements __cxa_guard_release. - void cxa_guard_release() { - // Update guard byte first, so if somebody is woken up by release_init_byte - // and comes all the way back around to __cxa_guard_acquire again, they see - // it as having completed initialization. - guard_byte.release(); - derived()->release_init_byte(); - } - - /// Implements __cxa_guard_abort. - void cxa_guard_abort() { - guard_byte.abort(); - derived()->abort_init_byte(); - } - -public: - /// base_address - the address of the original guard object. - void* const base_address; - /// The address of the byte used by the implementation during initialization. - uint8_t* const init_byte_address; - /// An optional address storing an identifier for the thread performing initialization. - /// It's used to detect recursive initialization. - uint32_t* const thread_id_address; - -private: - Derived* derived() { return static_cast(this); } -}; +// +// Each initialization byte implementation supports the following methods: +// +// InitByte(uint8_t* _init_byte_address, uint32_t* _thread_id_address) +// Construct the InitByte object, initializing our member variables +// +// bool acquire() +// Called before we start the initialization. Check if someone else has already started, and if +// not to signal our intent to start it ourselves. We determine the current status from the init +// byte, which is one of 4 possible values: +// COMPLETE: Initialization was finished by somebody else. Return true. +// PENDING: Somebody has started the initialization already, set the WAITING bit, +// then wait for the init byte to get updated with a new value. +// (PENDING|WAITING): Somebody has started the initialization already, and we're not the +// first one waiting. Wait for the init byte to get updated. +// UNSET: Initialization hasn't successfully completed, and nobody is currently +// performing the initialization. Set the PENDING bit to indicate our +// intention to start the initialization, and return false. +// The return value indicates whether initialization has already been completed. +// +// void release() +// Called after successfully completing the initialization. Update the init byte to reflect +// that, then if anybody else is waiting, wake them up. +// +// void abort() +// Called after an error is thrown during the initialization. Reset the init byte to UNSET to +// indicate that we're no longer performing the initialization, then if anybody is waiting, wake +// them up so they can try performing the initialization. +// //===----------------------------------------------------------------------===// // Single Threaded Implementation //===----------------------------------------------------------------------===// -struct InitByteNoThreads : GuardObject { - using GuardObject::GuardObject; +/// InitByteNoThreads - Doesn't use any inter-thread synchronization when +/// managing reads and writes to the init byte. +struct InitByteNoThreads { + InitByteNoThreads() = delete; + InitByteNoThreads(InitByteNoThreads const&) = delete; + InitByteNoThreads& operator=(InitByteNoThreads const&) = delete; - AcquireResult acquire_init_byte() { + explicit InitByteNoThreads(uint8_t* _init_byte_address, uint32_t*) : init_byte_address(_init_byte_address) {} + + /// The init byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + bool acquire() { if (*init_byte_address == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (*init_byte_address & PENDING_BIT) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); *init_byte_address = PENDING_BIT; - return INIT_IS_PENDING; + return false; } - void release_init_byte() { *init_byte_address = COMPLETE_BIT; } - void abort_init_byte() { *init_byte_address = UNSET; } + /// The init byte portion of cxa_guard_release. + void release() { *init_byte_address = COMPLETE_BIT; } + /// The init byte portion of cxa_guard_abort. + void abort() { *init_byte_address = UNSET; } + +private: + /// The address of the byte used during initialization. + uint8_t* const init_byte_address; }; //===----------------------------------------------------------------------===// @@ -319,18 +302,20 @@ struct LibcppMutex {}; struct LibcppCondVar {}; #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) +/// InitByteGlobalMutex - Uses a global mutex and condition variable (common to +/// all static local variables) to manage reads and writes to the init byte. template -struct InitByteGlobalMutex : GuardObject> { +struct InitByteGlobalMutex { - using BaseT = typename InitByteGlobalMutex::GuardObject; - using BaseT::BaseT; - - explicit InitByteGlobalMutex(uint32_t* g) : BaseT(g), has_thread_id_support(false) {} - explicit InitByteGlobalMutex(uint64_t* g) : BaseT(g), has_thread_id_support(GetThreadID != nullptr) {} + explicit InitByteGlobalMutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) + : init_byte_address(_init_byte_address), thread_id_address(_thread_id_address), + has_thread_id_support(_thread_id_address != nullptr && GetThreadID != nullptr) {} public: - AcquireResult acquire_init_byte() { + /// The init byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + bool acquire() { LockGuard g("__cxa_guard_acquire"); // Check for possible recursive initialization. if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { @@ -345,16 +330,17 @@ public: } if (*init_byte_address == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (has_thread_id_support) *thread_id_address = current_thread_id.get(); *init_byte_address = PENDING_BIT; - return INIT_IS_PENDING; + return false; } - void release_init_byte() { + /// The init byte portion of cxa_guard_release. + void release() { bool has_waiting; { LockGuard g("__cxa_guard_release"); @@ -368,7 +354,8 @@ public: } } - void abort_init_byte() { + /// The init byte portion of cxa_guard_abort. + void abort() { bool has_waiting; { LockGuard g("__cxa_guard_abort"); @@ -385,8 +372,12 @@ public: } private: - using BaseT::init_byte_address; - using BaseT::thread_id_address; + /// The address of the byte used during initialization. + uint8_t* const init_byte_address; + /// An optional address storing an identifier for the thread performing initialization. + /// It's used to detect recursive initialization. + uint32_t* const thread_id_address; + const bool has_thread_id_support; LazyValue current_thread_id; @@ -433,38 +424,32 @@ constexpr void (*PlatformFutexWake)(int*) = nullptr; constexpr bool PlatformSupportsFutex() { return +PlatformFutexWait != nullptr; } -/// InitByteFutex - Manages initialization using atomics and the futex syscall -/// for waiting and waking. +/// InitByteFutex - Uses a futex to manage reads and writes to the init byte. template -struct InitByteFutex : GuardObject> { - using BaseT = typename InitByteFutex::GuardObject; +struct InitByteFutex { - /// ARM Constructor - explicit InitByteFutex(uint32_t* g) - : BaseT(g), init_byte(this->init_byte_address), - has_thread_id_support(this->thread_id_address && GetThreadIDArg != nullptr), - thread_id(this->thread_id_address) {} - - /// Itanium Constructor - explicit InitByteFutex(uint64_t* g) - : BaseT(g), init_byte(this->init_byte_address), - has_thread_id_support(this->thread_id_address && GetThreadIDArg != nullptr), - thread_id(this->thread_id_address) {} + explicit InitByteFutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) + : init_byte(_init_byte_address), + has_thread_id_support(_thread_id_address != nullptr && GetThreadIDArg != nullptr), + thread_id(_thread_id_address), + base_address(reinterpret_cast(/*_init_byte_address & ~0x3*/ _init_byte_address - 1)) {} public: - AcquireResult acquire_init_byte() { + /// The init byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + bool acquire() { while (true) { uint8_t last_val = UNSET; if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, std::_AO_Acquire)) { if (has_thread_id_support) { thread_id.store(current_thread_id.get(), std::_AO_Relaxed); } - return INIT_IS_PENDING; + return false; } if (last_val == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (last_val & PENDING_BIT) { @@ -481,7 +466,7 @@ public: if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, std::_AO_Acq_Rel, std::_AO_Release)) { // (1) success, via someone else's work! if (last_val == COMPLETE_BIT) - return INIT_IS_DONE; + return true; // (3) someone else, bailed on doing the work, retry from the start! if (last_val == UNSET) @@ -495,13 +480,15 @@ public: } } - void release_init_byte() { + /// The init byte portion of cxa_guard_release. + void release() { uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } - void abort_init_byte() { + /// The init byte portion of cxa_guard_abort. + void abort() { if (has_thread_id_support) thread_id.store(0, std::_AO_Relaxed); @@ -512,12 +499,11 @@ public: private: /// Use the futex to wait on the current guard variable. Futex expects a - /// 32-bit 4-byte aligned address as the first argument, so we have to use use - /// the base address of the guard variable (not the init byte). - void wait_on_initialization() { - Wait(static_cast(this->base_address), expected_value_for_futex(PENDING_BIT | WAITING_BIT)); - } - void wake_all() { Wake(static_cast(this->base_address)); } + /// 32-bit 4-byte aligned address as the first argument, so we use the 4-byte + /// aligned address that encompasses the init byte (i.e. the address of the + /// raw guard object that was passed to __cxa_guard_acquire/release/abort). + void wait_on_initialization() { Wait(base_address, expected_value_for_futex(PENDING_BIT | WAITING_BIT)); } + void wake_all() { Wake(base_address); } private: AtomicInt init_byte; @@ -527,6 +513,10 @@ private: AtomicInt thread_id; LazyValue current_thread_id; + /// the 4-byte-aligned address that encompasses the init byte (i.e. the + /// address of the raw guard object). + int* const base_address; + /// Create the expected integer value for futex `wait(int* addr, int expected)`. /// We pass the base address as the first argument, So this function creates /// an zero-initialized integer with `b` copied at the correct offset. @@ -539,6 +529,66 @@ private: static_assert(Wait != nullptr && Wake != nullptr, ""); }; +//===----------------------------------------------------------------------===// +// GuardObject +//===----------------------------------------------------------------------===// + +enum class AcquireResult { + INIT_IS_DONE, + INIT_IS_PENDING, +}; +constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; +constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; + +/// Co-ordinates between GuardByte and InitByte. +template +struct GuardObject { + GuardObject() = delete; + GuardObject(GuardObject const&) = delete; + GuardObject& operator=(GuardObject const&) = delete; + +private: + GuardByte guard_byte; + InitByteT init_byte; + +public: + /// ARM Constructor + explicit GuardObject(uint32_t* raw_guard_object) + : guard_byte(reinterpret_cast(raw_guard_object)), + init_byte(reinterpret_cast(raw_guard_object) + 1, nullptr) {} + + /// Itanium Constructor + explicit GuardObject(uint64_t* raw_guard_object) + : guard_byte(reinterpret_cast(raw_guard_object)), + init_byte(reinterpret_cast(raw_guard_object) + 1, reinterpret_cast(raw_guard_object) + 1) { + } + + /// Implements __cxa_guard_acquire. + AcquireResult cxa_guard_acquire() { + // Use short-circuit evaluation to avoid calling init_byte.acquire when + // guard_byte.acquire returns true. (i.e. don't call it when we know from + // the guard byte that initialization has already been completed) + if (guard_byte.acquire() || init_byte.acquire()) + return INIT_IS_DONE; + return INIT_IS_PENDING; + } + + /// Implements __cxa_guard_release. + void cxa_guard_release() { + // Update guard byte first, so if somebody is woken up by init_byte.release + // and comes all the way back around to __cxa_guard_acquire again, they see + // it as having completed initialization. + guard_byte.release(); + init_byte.release(); + } + + /// Implements __cxa_guard_abort. + void cxa_guard_abort() { + guard_byte.abort(); + init_byte.abort(); + } +}; + //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// @@ -555,20 +605,23 @@ enum class Implementation { NoThreads, GlobalMutex, Futex }; template struct SelectImplementation; +/// Manage initialization without performing any inter-thread synchronization. template <> struct SelectImplementation { - using type = InitByteNoThreads; + using type = GuardObject; }; +/// Manage initialization using a global mutex and condition variable. template <> struct SelectImplementation { - using type = InitByteGlobalMutex::instance, - GlobalStatic::instance, PlatformThreadID>; + using type = GuardObject::instance, + GlobalStatic::instance, PlatformThreadID>>; }; +/// Manage initialization using atomics and the futex syscall for waiting and waking. template <> struct SelectImplementation { - using type = InitByteFutex; + using type = GuardObject>; }; // TODO(EricWF): We should prefer the futex implementation when available. But diff --git a/libcxxabi/test/guard_test_basic.pass.cpp b/libcxxabi/test/guard_test_basic.pass.cpp index 588069bb3759..aa243cc8f749 100644 --- a/libcxxabi/test/guard_test_basic.pass.cpp +++ b/libcxxabi/test/guard_test_basic.pass.cpp @@ -119,16 +119,13 @@ int main(int, char**) { { #if defined(_LIBCXXABI_HAS_NO_THREADS) static_assert(CurrentImplementation == Implementation::NoThreads, ""); - static_assert( - std::is_same::value, ""); + static_assert(std::is_same>::value, ""); #else static_assert(CurrentImplementation == Implementation::GlobalMutex, ""); static_assert( - std::is_same< - SelectedImplementation, - InitByteGlobalMutex::instance, - GlobalStatic::instance>>::value, + std::is_same::instance, + GlobalStatic::instance>>>::value, ""); #endif } @@ -142,19 +139,17 @@ int main(int, char**) { } } { - Tests::test(); - Tests::test(); + Tests>::test(); + Tests>::test(); } { using MutexImpl = - InitByteGlobalMutex; + GuardObject>; Tests::test(); Tests::test(); } { - using FutexImpl = - InitByteFutex<&NopFutexWait, &NopFutexWake, &MockGetThreadID>; + using FutexImpl = GuardObject>; Tests::test(); Tests::test(); }