[libcxxabi] Re-organized inheritance structure to remove CRTP in cxa_guard

Currently, the `InitByte...` classes inherit from `GuardObject` so they can
access the `base_address`, `init_byte_address` and `thread_id_address`. Then,
since `GuardObject` needs to call `acquire`/`release`/`abort_init_byte`, it uses
the curiously recurring template pattern (CRTP). This is rather messy.

Instead, we'll have `GuardObject` contain an instance of `InitByte`, and pass it
the addresses it needs in the constructor. `GuardObject` doesn't need the
addresses anyways, so it makes more sense for `InitByte` to keep them instead of
`GuardObject`. Then, `GuardObject` can call `acquire`/`release`/`abort` as one
of `InitByte`'s member functions.

Organizing things this way not only gets rid of the use of the CRTP, but also
improves separation of concerns a bit since the `InitByte` classes are no longer
indirectly responsible for things because of their inheritance from
`GuardObject`. This means we no longer have strange things like calling
`InitByteFutex.cxa_guard_acquire`, instead we call
`GuardObject<InitByteFutex>.cxa_guard_acquire`.

This is the 4th of 5 changes to overhaul cxa_guard.
See D108343 for what the final result will be.

Depends on D115367

Reviewed By: ldionne, #libc_abi

Differential Revision: https://reviews.llvm.org/D115368
This commit is contained in:
Daniel McIntosh 2021-12-08 13:16:21 -05:00
parent 847ea76219
commit 29be7c9c4f
2 changed files with 182 additions and 134 deletions

View File

@ -23,9 +23,15 @@
* the thread currently performing initialization is stored in the second word.
*
* Guard Object Layout:
* -------------------------------------------------------------------------
* |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
* -------------------------------------------------------------------------
* ---------------------------------------------------------------------------
* | a+0: guard byte | a+1: init byte | a+2: unused ... | a+4: thread-id ... |
* ---------------------------------------------------------------------------
*
* Note that we don't do what the ABI docs suggest (put a mutex in the guard
* object which we acquire in cxa_guard_acquire and release in
* cxa_guard_release). Instead we use the init byte to imitate that behaviour,
* but without actually holding anything mutex related between aquire and
* release/abort.
*
* Access Protocol:
* For each implementation the guard byte is checked and set before accessing
@ -176,8 +182,6 @@ struct GuardByte {
public:
/// The guard byte portion of cxa_guard_acquire. Returns true if
/// initialization has already been completed.
/// Note: On completion, we haven't 'acquired' ownership of anything mutex
/// related. The name is simply referring to __cxa_guard_acquire.
bool acquire() {
// if guard_byte is non-zero, we have already completed initialization
// (i.e. release has been called)
@ -185,8 +189,6 @@ public:
}
/// The guard byte portion of cxa_guard_release.
/// Note: On completion, we haven't 'released' ownership of anything mutex
/// related. The name is simply referring to __cxa_guard_release.
void release() { guard_byte.store(COMPLETE_BIT, std::_AO_Release); }
/// The guard byte portion of cxa_guard_abort.
@ -197,89 +199,70 @@ private:
};
//===----------------------------------------------------------------------===//
// GuardBase
// InitByte Implementations
//===----------------------------------------------------------------------===//
enum class AcquireResult {
INIT_IS_DONE,
INIT_IS_PENDING,
};
constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
template <class Derived>
struct GuardObject {
GuardObject() = delete;
GuardObject(GuardObject const&) = delete;
GuardObject& operator=(GuardObject const&) = delete;
private:
GuardByte guard_byte;
public:
/// ARM Constructor
explicit GuardObject(uint32_t* g)
: guard_byte(reinterpret_cast<uint8_t*>(g)), base_address(g),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), thread_id_address(nullptr) {}
/// Itanium Constructor
explicit GuardObject(uint64_t* g)
: guard_byte(reinterpret_cast<uint8_t*>(g)), base_address(g),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1), thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
/// Implements __cxa_guard_acquire.
AcquireResult cxa_guard_acquire() {
if (guard_byte.acquire())
return INIT_IS_DONE;
return derived()->acquire_init_byte();
}
/// Implements __cxa_guard_release.
void cxa_guard_release() {
// Update guard byte first, so if somebody is woken up by release_init_byte
// and comes all the way back around to __cxa_guard_acquire again, they see
// it as having completed initialization.
guard_byte.release();
derived()->release_init_byte();
}
/// Implements __cxa_guard_abort.
void cxa_guard_abort() {
guard_byte.abort();
derived()->abort_init_byte();
}
public:
/// base_address - the address of the original guard object.
void* const base_address;
/// The address of the byte used by the implementation during initialization.
uint8_t* const init_byte_address;
/// An optional address storing an identifier for the thread performing initialization.
/// It's used to detect recursive initialization.
uint32_t* const thread_id_address;
private:
Derived* derived() { return static_cast<Derived*>(this); }
};
//
// Each initialization byte implementation supports the following methods:
//
// InitByte(uint8_t* _init_byte_address, uint32_t* _thread_id_address)
// Construct the InitByte object, initializing our member variables
//
// bool acquire()
// Called before we start the initialization. Check if someone else has already started, and if
// not to signal our intent to start it ourselves. We determine the current status from the init
// byte, which is one of 4 possible values:
// COMPLETE: Initialization was finished by somebody else. Return true.
// PENDING: Somebody has started the initialization already, set the WAITING bit,
// then wait for the init byte to get updated with a new value.
// (PENDING|WAITING): Somebody has started the initialization already, and we're not the
// first one waiting. Wait for the init byte to get updated.
// UNSET: Initialization hasn't successfully completed, and nobody is currently
// performing the initialization. Set the PENDING bit to indicate our
// intention to start the initialization, and return false.
// The return value indicates whether initialization has already been completed.
//
// void release()
// Called after successfully completing the initialization. Update the init byte to reflect
// that, then if anybody else is waiting, wake them up.
//
// void abort()
// Called after an error is thrown during the initialization. Reset the init byte to UNSET to
// indicate that we're no longer performing the initialization, then if anybody is waiting, wake
// them up so they can try performing the initialization.
//
//===----------------------------------------------------------------------===//
// Single Threaded Implementation
//===----------------------------------------------------------------------===//
struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
using GuardObject::GuardObject;
/// InitByteNoThreads - Doesn't use any inter-thread synchronization when
/// managing reads and writes to the init byte.
struct InitByteNoThreads {
InitByteNoThreads() = delete;
InitByteNoThreads(InitByteNoThreads const&) = delete;
InitByteNoThreads& operator=(InitByteNoThreads const&) = delete;
AcquireResult acquire_init_byte() {
explicit InitByteNoThreads(uint8_t* _init_byte_address, uint32_t*) : init_byte_address(_init_byte_address) {}
/// The init byte portion of cxa_guard_acquire. Returns true if
/// initialization has already been completed.
bool acquire() {
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
return true;
if (*init_byte_address & PENDING_BIT)
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
return false;
}
void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
void abort_init_byte() { *init_byte_address = UNSET; }
/// The init byte portion of cxa_guard_release.
void release() { *init_byte_address = COMPLETE_BIT; }
/// The init byte portion of cxa_guard_abort.
void abort() { *init_byte_address = UNSET; }
private:
/// The address of the byte used during initialization.
uint8_t* const init_byte_address;
};
//===----------------------------------------------------------------------===//
@ -319,18 +302,20 @@ struct LibcppMutex {};
struct LibcppCondVar {};
#endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
/// InitByteGlobalMutex - Uses a global mutex and condition variable (common to
/// all static local variables) to manage reads and writes to the init byte.
template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
uint32_t (*GetThreadID)() = PlatformThreadID>
struct InitByteGlobalMutex : GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond, GetThreadID>> {
struct InitByteGlobalMutex {
using BaseT = typename InitByteGlobalMutex::GuardObject;
using BaseT::BaseT;
explicit InitByteGlobalMutex(uint32_t* g) : BaseT(g), has_thread_id_support(false) {}
explicit InitByteGlobalMutex(uint64_t* g) : BaseT(g), has_thread_id_support(GetThreadID != nullptr) {}
explicit InitByteGlobalMutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address)
: init_byte_address(_init_byte_address), thread_id_address(_thread_id_address),
has_thread_id_support(_thread_id_address != nullptr && GetThreadID != nullptr) {}
public:
AcquireResult acquire_init_byte() {
/// The init byte portion of cxa_guard_acquire. Returns true if
/// initialization has already been completed.
bool acquire() {
LockGuard g("__cxa_guard_acquire");
// Check for possible recursive initialization.
if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
@ -345,16 +330,17 @@ public:
}
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
return true;
if (has_thread_id_support)
*thread_id_address = current_thread_id.get();
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
return false;
}
void release_init_byte() {
/// The init byte portion of cxa_guard_release.
void release() {
bool has_waiting;
{
LockGuard g("__cxa_guard_release");
@ -368,7 +354,8 @@ public:
}
}
void abort_init_byte() {
/// The init byte portion of cxa_guard_abort.
void abort() {
bool has_waiting;
{
LockGuard g("__cxa_guard_abort");
@ -385,8 +372,12 @@ public:
}
private:
using BaseT::init_byte_address;
using BaseT::thread_id_address;
/// The address of the byte used during initialization.
uint8_t* const init_byte_address;
/// An optional address storing an identifier for the thread performing initialization.
/// It's used to detect recursive initialization.
uint32_t* const thread_id_address;
const bool has_thread_id_support;
LazyValue<uint32_t, GetThreadID> current_thread_id;
@ -433,38 +424,32 @@ constexpr void (*PlatformFutexWake)(int*) = nullptr;
constexpr bool PlatformSupportsFutex() { return +PlatformFutexWait != nullptr; }
/// InitByteFutex - Manages initialization using atomics and the futex syscall
/// for waiting and waking.
/// InitByteFutex - Uses a futex to manage reads and writes to the init byte.
template <void (*Wait)(int*, int) = PlatformFutexWait, void (*Wake)(int*) = PlatformFutexWake,
uint32_t (*GetThreadIDArg)() = PlatformThreadID>
struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
using BaseT = typename InitByteFutex::GuardObject;
struct InitByteFutex {
/// ARM Constructor
explicit InitByteFutex(uint32_t* g)
: BaseT(g), init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg != nullptr),
thread_id(this->thread_id_address) {}
/// Itanium Constructor
explicit InitByteFutex(uint64_t* g)
: BaseT(g), init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg != nullptr),
thread_id(this->thread_id_address) {}
explicit InitByteFutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address)
: init_byte(_init_byte_address),
has_thread_id_support(_thread_id_address != nullptr && GetThreadIDArg != nullptr),
thread_id(_thread_id_address),
base_address(reinterpret_cast<int*>(/*_init_byte_address & ~0x3*/ _init_byte_address - 1)) {}
public:
AcquireResult acquire_init_byte() {
/// The init byte portion of cxa_guard_acquire. Returns true if
/// initialization has already been completed.
bool acquire() {
while (true) {
uint8_t last_val = UNSET;
if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, std::_AO_Acquire)) {
if (has_thread_id_support) {
thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
}
return INIT_IS_PENDING;
return false;
}
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
return true;
if (last_val & PENDING_BIT) {
@ -481,7 +466,7 @@ public:
if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, std::_AO_Acq_Rel, std::_AO_Release)) {
// (1) success, via someone else's work!
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
return true;
// (3) someone else, bailed on doing the work, retry from the start!
if (last_val == UNSET)
@ -495,13 +480,15 @@ public:
}
}
void release_init_byte() {
/// The init byte portion of cxa_guard_release.
void release() {
uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
if (old & WAITING_BIT)
wake_all();
}
void abort_init_byte() {
/// The init byte portion of cxa_guard_abort.
void abort() {
if (has_thread_id_support)
thread_id.store(0, std::_AO_Relaxed);
@ -512,12 +499,11 @@ public:
private:
/// Use the futex to wait on the current guard variable. Futex expects a
/// 32-bit 4-byte aligned address as the first argument, so we have to use use
/// the base address of the guard variable (not the init byte).
void wait_on_initialization() {
Wait(static_cast<int*>(this->base_address), expected_value_for_futex(PENDING_BIT | WAITING_BIT));
}
void wake_all() { Wake(static_cast<int*>(this->base_address)); }
/// 32-bit 4-byte aligned address as the first argument, so we use the 4-byte
/// aligned address that encompasses the init byte (i.e. the address of the
/// raw guard object that was passed to __cxa_guard_acquire/release/abort).
void wait_on_initialization() { Wait(base_address, expected_value_for_futex(PENDING_BIT | WAITING_BIT)); }
void wake_all() { Wake(base_address); }
private:
AtomicInt<uint8_t> init_byte;
@ -527,6 +513,10 @@ private:
AtomicInt<uint32_t> thread_id;
LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
/// the 4-byte-aligned address that encompasses the init byte (i.e. the
/// address of the raw guard object).
int* const base_address;
/// Create the expected integer value for futex `wait(int* addr, int expected)`.
/// We pass the base address as the first argument, So this function creates
/// an zero-initialized integer with `b` copied at the correct offset.
@ -539,6 +529,66 @@ private:
static_assert(Wait != nullptr && Wake != nullptr, "");
};
//===----------------------------------------------------------------------===//
// GuardObject
//===----------------------------------------------------------------------===//
enum class AcquireResult {
INIT_IS_DONE,
INIT_IS_PENDING,
};
constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
/// Co-ordinates between GuardByte and InitByte.
template <class InitByteT>
struct GuardObject {
GuardObject() = delete;
GuardObject(GuardObject const&) = delete;
GuardObject& operator=(GuardObject const&) = delete;
private:
GuardByte guard_byte;
InitByteT init_byte;
public:
/// ARM Constructor
explicit GuardObject(uint32_t* raw_guard_object)
: guard_byte(reinterpret_cast<uint8_t*>(raw_guard_object)),
init_byte(reinterpret_cast<uint8_t*>(raw_guard_object) + 1, nullptr) {}
/// Itanium Constructor
explicit GuardObject(uint64_t* raw_guard_object)
: guard_byte(reinterpret_cast<uint8_t*>(raw_guard_object)),
init_byte(reinterpret_cast<uint8_t*>(raw_guard_object) + 1, reinterpret_cast<uint32_t*>(raw_guard_object) + 1) {
}
/// Implements __cxa_guard_acquire.
AcquireResult cxa_guard_acquire() {
// Use short-circuit evaluation to avoid calling init_byte.acquire when
// guard_byte.acquire returns true. (i.e. don't call it when we know from
// the guard byte that initialization has already been completed)
if (guard_byte.acquire() || init_byte.acquire())
return INIT_IS_DONE;
return INIT_IS_PENDING;
}
/// Implements __cxa_guard_release.
void cxa_guard_release() {
// Update guard byte first, so if somebody is woken up by init_byte.release
// and comes all the way back around to __cxa_guard_acquire again, they see
// it as having completed initialization.
guard_byte.release();
init_byte.release();
}
/// Implements __cxa_guard_abort.
void cxa_guard_abort() {
guard_byte.abort();
init_byte.abort();
}
};
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
@ -555,20 +605,23 @@ enum class Implementation { NoThreads, GlobalMutex, Futex };
template <Implementation Impl>
struct SelectImplementation;
/// Manage initialization without performing any inter-thread synchronization.
template <>
struct SelectImplementation<Implementation::NoThreads> {
using type = InitByteNoThreads;
using type = GuardObject<InitByteNoThreads>;
};
/// Manage initialization using a global mutex and condition variable.
template <>
struct SelectImplementation<Implementation::GlobalMutex> {
using type = InitByteGlobalMutex<LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
using type = GuardObject<InitByteGlobalMutex<LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>>;
};
/// Manage initialization using atomics and the futex syscall for waiting and waking.
template <>
struct SelectImplementation<Implementation::Futex> {
using type = InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
using type = GuardObject<InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>>;
};
// TODO(EricWF): We should prefer the futex implementation when available. But

View File

@ -119,16 +119,13 @@ int main(int, char**) {
{
#if defined(_LIBCXXABI_HAS_NO_THREADS)
static_assert(CurrentImplementation == Implementation::NoThreads, "");
static_assert(
std::is_same<SelectedImplementation, InitByteNoThreads>::value, "");
static_assert(std::is_same<SelectedImplementation, GuardObject<InitByteNoThreads>>::value, "");
#else
static_assert(CurrentImplementation == Implementation::GlobalMutex, "");
static_assert(
std::is_same<
SelectedImplementation,
InitByteGlobalMutex<LibcppMutex, LibcppCondVar,
GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance>>::value,
std::is_same<SelectedImplementation,
GuardObject<InitByteGlobalMutex<LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance>>>::value,
"");
#endif
}
@ -142,19 +139,17 @@ int main(int, char**) {
}
}
{
Tests<uint32_t, InitByteNoThreads>::test();
Tests<uint64_t, InitByteNoThreads>::test();
Tests<uint32_t, GuardObject<InitByteNoThreads>>::test();
Tests<uint64_t, GuardObject<InitByteNoThreads>>::test();
}
{
using MutexImpl =
InitByteGlobalMutex<NopMutex, NopCondVar, global_nop_mutex,
global_nop_cond, MockGetThreadID>;
GuardObject<InitByteGlobalMutex<NopMutex, NopCondVar, global_nop_mutex, global_nop_cond, MockGetThreadID>>;
Tests<uint32_t, MutexImpl>::test();
Tests<uint64_t, MutexImpl>::test();
}
{
using FutexImpl =
InitByteFutex<&NopFutexWait, &NopFutexWake, &MockGetThreadID>;
using FutexImpl = GuardObject<InitByteFutex<&NopFutexWait, &NopFutexWake, &MockGetThreadID>>;
Tests<uint32_t, FutexImpl>::test();
Tests<uint64_t, FutexImpl>::test();
}