Rewrite cxa guard implementation.

This patch does three main things:
  (1) It re-writes the cxa guard implementation to make it testable.
  (2) Adds support for recursive init detection on non-apple platforms.
  (3) It adds a futex based implementation.

The futex based implementation locks and notifies on a per-object basis, unlike the
current implementation which uses a global lock for all objects. Once this patch settles
I'll turn it on by default when supported.

llvm-svn: 359060
This commit is contained in:
Eric Fiselier 2019-04-24 01:47:30 +00:00
parent 0b098754b7
commit 70ebeabfb8
5 changed files with 1127 additions and 263 deletions

View File

@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===//
#include "__cxxabi_config.h"
#include "cxxabi.h"
#include "abort_message.h"
#include <__threading_support>
#include <stdint.h>
#include <string.h>
// Tell the implementation that we're building the actual implementation
// (and not testing it)
#define BUILDING_CXA_GUARD
#include "cxa_guard_impl.h"
/*
This implementation must be careful to not call code external to this file
@ -24,278 +24,30 @@
to not be a problem.
*/
namespace __cxxabiv1
{
namespace
{
enum InitializationResult {
INIT_COMPLETE,
INIT_NOT_COMPLETE,
};
namespace __cxxabiv1 {
#if defined(_LIBCXXABI_GUARD_ABI_ARM)
// A 32-bit, 4-byte-aligned static data value. The least significant 2 bits must
// be statically initialized to 0.
typedef uint32_t guard_type;
using guard_type = uint32_t;
#else
typedef uint64_t guard_type;
using guard_type = uint64_t;
#endif
#if !defined(_LIBCXXABI_HAS_NO_THREADS) && defined(__APPLE__) && \
!defined(_LIBCXXABI_GUARD_ABI_ARM)
// This is a special-case pthread dependency for Mac. We can't pull this
// out into libcxx's threading API (__threading_support) because not all
// supported Mac environments provide this function (in pthread.h). To
// make it possible to build/use libcxx in those environments, we have to
// keep this pthread dependency local to libcxxabi. If there is some
// convenient way to detect precisely when pthread_mach_thread_np is
// available in a given Mac environment, it might still be possible to
// bury this dependency in __threading_support.
#ifndef _LIBCPP_HAS_THREAD_API_PTHREAD
#error "How do I pthread_mach_thread_np()?"
#endif
#define LIBCXXABI_HAS_DEADLOCK_DETECTION
#define LOCK_ID_FOR_THREAD() pthread_mach_thread_np(std::__libcpp_thread_get_current_id())
typedef uint32_t lock_type;
#else
#define LOCK_ID_FOR_THREAD() true
typedef bool lock_type;
#endif
enum class OnRelease : char { UNLOCK, UNLOCK_AND_BROADCAST };
struct GlobalMutexGuard {
explicit GlobalMutexGuard(const char* calling_func, OnRelease on_release)
: calling_func(calling_func), on_release(on_release) {
#ifndef _LIBCXXABI_HAS_NO_THREADS
if (std::__libcpp_mutex_lock(&guard_mut))
abort_message("%s failed to acquire mutex", calling_func);
#endif
}
~GlobalMutexGuard() {
#ifndef _LIBCXXABI_HAS_NO_THREADS
if (std::__libcpp_mutex_unlock(&guard_mut))
abort_message("%s failed to release mutex", calling_func);
if (on_release == OnRelease::UNLOCK_AND_BROADCAST) {
if (std::__libcpp_condvar_broadcast(&guard_cv))
abort_message("%s failed to broadcast condition variable",
calling_func);
}
#endif
}
void wait_for_signal() {
#ifndef _LIBCXXABI_HAS_NO_THREADS
if (std::__libcpp_condvar_wait(&guard_cv, &guard_mut))
abort_message("%s condition variable wait failed", calling_func);
#endif
}
private:
GlobalMutexGuard(GlobalMutexGuard const&) = delete;
GlobalMutexGuard& operator=(GlobalMutexGuard const&) = delete;
const char* const calling_func;
OnRelease on_release;
#ifndef _LIBCXXABI_HAS_NO_THREADS
static std::__libcpp_mutex_t guard_mut;
static std::__libcpp_condvar_t guard_cv;
#endif
};
#ifndef _LIBCXXABI_HAS_NO_THREADS
std::__libcpp_mutex_t GlobalMutexGuard::guard_mut = _LIBCPP_MUTEX_INITIALIZER;
std::__libcpp_condvar_t GlobalMutexGuard::guard_cv =
_LIBCPP_CONDVAR_INITIALIZER;
#endif
struct GuardObject;
/// GuardValue - An abstraction for accessing the various fields and bits of
/// the guard object.
struct GuardValue {
private:
explicit GuardValue(guard_type v) : value(v) {}
friend struct GuardObject;
public:
/// Functions returning the values used to represent the uninitialized,
/// initialized, and initialization pending states.
static GuardValue ZERO();
static GuardValue INIT_COMPLETE();
static GuardValue INIT_PENDING();
/// Returns true if the guard value represents that the initialization is
/// complete.
bool is_initialization_complete() const;
/// Returns true if the guard value represents that the initialization is
/// currently pending.
bool is_initialization_pending() const;
/// Returns the lock value for the current guard value.
lock_type get_lock_value() const;
private:
// Returns a guard object corresponding to the specified lock value.
static guard_type guard_value_from_lock(lock_type l);
// Returns the lock value represented by the specified guard object.
static lock_type lock_value_from_guard(guard_type g);
private:
guard_type value;
};
/// GuardObject - Manages correctly reading and writing to the guard object.
struct GuardObject {
explicit GuardObject(guard_type *g) : guard(g) {}
// Read the current value of the guard object.
// TODO: Make this read atomic.
GuardValue read() const;
// Write the specified value to the guard object.
// TODO: Make this atomic
void write(GuardValue new_val);
private:
GuardObject(const GuardObject&) = delete;
GuardObject& operator=(const GuardObject&) = delete;
guard_type *guard;
};
} // unnamed namespace
extern "C"
{
_LIBCXXABI_FUNC_VIS int __cxa_guard_acquire(guard_type* raw_guard_object) {
GlobalMutexGuard gmutex("__cxa_guard_acquire", OnRelease::UNLOCK);
GuardObject guard(raw_guard_object);
GuardValue current_value = guard.read();
if (current_value.is_initialization_complete())
return INIT_COMPLETE;
const GuardValue LOCK_ID = GuardValue::INIT_PENDING();
#ifdef LIBCXXABI_HAS_DEADLOCK_DETECTION
if (current_value.is_initialization_pending() &&
current_value.get_lock_value() == LOCK_ID.get_lock_value()) {
abort_message("__cxa_guard_acquire detected deadlock");
}
#endif
while (current_value.is_initialization_pending()) {
gmutex.wait_for_signal();
current_value = guard.read();
}
if (current_value.is_initialization_complete())
return INIT_COMPLETE;
guard.write(LOCK_ID);
return INIT_NOT_COMPLETE;
SelectedImplementation imp(raw_guard_object);
return static_cast<int>(imp.cxa_guard_acquire());
}
_LIBCXXABI_FUNC_VIS void __cxa_guard_release(guard_type *raw_guard_object) {
GlobalMutexGuard gmutex("__cxa_guard_release",
OnRelease::UNLOCK_AND_BROADCAST);
GuardObject guard(raw_guard_object);
guard.write(GuardValue::ZERO());
guard.write(GuardValue::INIT_COMPLETE());
SelectedImplementation imp(raw_guard_object);
imp.cxa_guard_release();
}
_LIBCXXABI_FUNC_VIS void __cxa_guard_abort(guard_type *raw_guard_object) {
GlobalMutexGuard gmutex("__cxa_guard_abort", OnRelease::UNLOCK_AND_BROADCAST);
GuardObject guard(raw_guard_object);
guard.write(GuardValue::ZERO());
SelectedImplementation imp(raw_guard_object);
imp.cxa_guard_abort();
}
} // extern "C"
//===----------------------------------------------------------------------===//
// GuardObject Definitions
//===----------------------------------------------------------------------===//
GuardValue GuardObject::read() const {
// FIXME: Make this atomic
guard_type val = *guard;
return GuardValue(val);
}
void GuardObject::write(GuardValue new_val) {
// FIXME: make this atomic
*guard = new_val.value;
}
//===----------------------------------------------------------------------===//
// GuardValue Definitions
//===----------------------------------------------------------------------===//
GuardValue GuardValue::ZERO() { return GuardValue(0); }
GuardValue GuardValue::INIT_COMPLETE() {
guard_type value = {0};
#if defined(_LIBCXXABI_GUARD_ABI_ARM)
value |= 1;
#else
char* init_bit = (char*)&value;
*init_bit = 1;
#endif
return GuardValue(value);
}
GuardValue GuardValue::INIT_PENDING() {
return GuardValue(guard_value_from_lock(LOCK_ID_FOR_THREAD()));
}
bool GuardValue::is_initialization_complete() const {
#if defined(_LIBCXXABI_GUARD_ABI_ARM)
return value & 1;
#else
const char* init_bit = (const char*)&value;
return *init_bit;
#endif
}
bool GuardValue::is_initialization_pending() const {
return lock_value_from_guard(value) != 0;
}
lock_type GuardValue::get_lock_value() const {
return lock_value_from_guard(value);
}
// Create a guard object with the lock set to the specified value.
guard_type GuardValue::guard_value_from_lock(lock_type l) {
#if defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
#if __LITTLE_ENDIAN__
return static_cast<guard_type>(l) << 32;
#else
return static_cast<guard_type>(l);
#endif
#else // defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
guard_type f = {0};
memcpy(static_cast<char*>(static_cast<void*>(&f)) + 1, &l, sizeof(lock_type));
return f;
#endif // defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
}
lock_type GuardValue::lock_value_from_guard(guard_type g) {
#if defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
#if __LITTLE_ENDIAN__
return static_cast<lock_type>(g >> 32);
#else
return static_cast<lock_type>(g);
#endif
#else // defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
uint8_t guard_bytes[sizeof(guard_type)];
memcpy(&guard_bytes, &g, sizeof(guard_type));
return guard_bytes[1] != 0;
#endif // defined(__APPLE__) && !defined(_LIBCXXABI_GUARD_ABI_ARM)
}
} // __cxxabiv1

View File

@ -0,0 +1,550 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
#define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
/* cxa_guard_impl.h - Implements the C++ runtime support for function local
* static guards.
* The layout of the guard object is the same across ARM and Itanium.
*
* The first "guard byte" (which is checked by the compiler) is set only upon
* the completion of cxa release.
*
* The second "init byte" does the rest of the bookkeeping. It tracks if
* initialization is complete or pending, and if there are waiting threads.
*
* If the guard variable is 64-bits and the platforms supplies a 32-bit thread
* identifier, it is used to detect recursive initialization. The thread ID of
* the thread currently performing initialization is stored in the second word.
*
* Guard Object Layout:
* -------------------------------------------------------------------------
* |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
* ------------------------------------------------------------------------
*
* Access Protocol:
* For each implementation the guard byte is checked and set before accessing
* the init byte.
*
* Overall Design:
* The implementation was designed to allow each implementation to be tested
* independent of the C++ runtime or platform support.
*
*/
#include "__cxxabi_config.h"
#include "include/atomic_support.h"
#include <unistd.h>
#include <sys/types.h>
#if defined(__has_include)
# if __has_include(<sys/syscall.h>)
# include <sys/syscall.h>
# endif
#endif
#include <stdlib.h>
#include <__threading_support>
// To make testing possible, this header is included from both cxa_guard.cpp
// and a number of tests.
//
// For this reason we place everything in an anonymous namespace -- even though
// we're in a header. We want the actual implementation and the tests to have
// unique definitions of the types in this header (since the tests may depend
// on function local statics).
//
// To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
// defined when including this file. Only `src/cxa_guard.cpp` should define
// the former.
#ifdef BUILDING_CXA_GUARD
# include "abort_message.h"
# define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
#elif defined(TESTING_CXA_GUARD)
# define ABORT_WITH_MESSAGE(...) ::abort()
#else
# error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
#endif
namespace __cxxabiv1 {
// Use an anonymous namespace to ensure that the tests and actual implementation
// have unique definitions of these symbols.
namespace {
//===----------------------------------------------------------------------===//
// Misc Utilities
//===----------------------------------------------------------------------===//
template <class T, T(*Init)()>
struct LazyValue {
LazyValue() : is_init(false) {}
T& get() {
if (!is_init) {
value = Init();
is_init = true;
}
return value;
}
private:
T value;
bool is_init = false;
};
//===----------------------------------------------------------------------===//
// PlatformGetThreadID
//===----------------------------------------------------------------------===//
#if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
uint32_t PlatformThreadID() {
static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
return static_cast<uint32_t>(
pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
}
#elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
uint32_t PlatformThreadID() {
static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
return static_cast<uint32_t>(syscall(SYS_gettid));
}
#else
constexpr uint32_t (*PlatformThreadID)() = nullptr;
#endif
constexpr bool DoesPlatformSupportThreadID() {
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
#endif
return +PlatformThreadID != nullptr;
#ifdef __clang__
#pragma clang diagnostic pop
#endif
}
//===----------------------------------------------------------------------===//
// GuardBase
//===----------------------------------------------------------------------===//
enum class AcquireResult {
INIT_IS_DONE,
INIT_IS_PENDING,
};
constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
static constexpr uint8_t UNSET = 0;
static constexpr uint8_t COMPLETE_BIT = (1 << 0);
static constexpr uint8_t PENDING_BIT = (1 << 1);
static constexpr uint8_t WAITING_BIT = (1 << 2);
template <class Derived>
struct GuardObject {
GuardObject() = delete;
GuardObject(GuardObject const&) = delete;
GuardObject& operator=(GuardObject const&) = delete;
explicit GuardObject(uint32_t* g)
: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
thread_id_address(nullptr) {}
explicit GuardObject(uint64_t* g)
: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
public:
/// Implements __cxa_guard_acquire
AcquireResult cxa_guard_acquire() {
AtomicInt<uint8_t> guard_byte(guard_byte_address);
if (guard_byte.load(std::_AO_Acquire) == COMPLETE_BIT)
return INIT_IS_DONE;
return derived()->acquire_init_byte();
}
/// Implements __cxa_guard_release
void cxa_guard_release() {
AtomicInt<uint8_t> guard_byte(guard_byte_address);
// Store complete first, so that when release wakes other folks, they see
// it as having been completed.
guard_byte.store(COMPLETE_BIT, std::_AO_Release);
derived()->release_init_byte();
}
/// Implements __cxa_guard_abort
void cxa_guard_abort() { derived()->abort_init_byte(); }
public:
/// base_address - the address of the original guard object.
void* const base_address;
/// The address of the guord byte at offset 0.
uint8_t* const guard_byte_address;
/// The address of the byte used by the implementation during initialization.
uint8_t* const init_byte_address;
/// An optional address storing an identifier for the thread performing initialization.
/// It's used to detect recursive initialization.
uint32_t* const thread_id_address;
private:
Derived* derived() { return static_cast<Derived*>(this); }
};
//===----------------------------------------------------------------------===//
// Single Threaded Implementation
//===----------------------------------------------------------------------===//
struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
using GuardObject::GuardObject;
AcquireResult acquire_init_byte() {
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
if (*init_byte_address & PENDING_BIT)
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
}
void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
void abort_init_byte() { *init_byte_address = UNSET; }
};
//===----------------------------------------------------------------------===//
// Global Mutex Implementation
//===----------------------------------------------------------------------===//
struct LibcppMutex;
struct LibcppCondVar;
#ifndef _LIBCXXABI_HAS_NO_THREADS
struct LibcppMutex {
LibcppMutex() = default;
LibcppMutex(LibcppMutex const&) = delete;
LibcppMutex& operator=(LibcppMutex const&) = delete;
bool lock() { return std::__libcpp_mutex_lock(&mutex); }
bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
private:
friend struct LibcppCondVar;
std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
};
struct LibcppCondVar {
LibcppCondVar() = default;
LibcppCondVar(LibcppCondVar const&) = delete;
LibcppCondVar& operator=(LibcppCondVar const&) = delete;
bool wait(LibcppMutex& mut) {
return std::__libcpp_condvar_wait(&cond, &mut.mutex);
}
bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
private:
std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
};
#endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
uint32_t (*GetThreadID)() = PlatformThreadID>
struct InitByteGlobalMutex
: GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
GetThreadID>> {
using BaseT = typename InitByteGlobalMutex::GuardObject;
using BaseT::BaseT;
explicit InitByteGlobalMutex(uint32_t *g)
: BaseT(g), has_thread_id_support(false) {}
explicit InitByteGlobalMutex(uint64_t *g)
: BaseT(g), has_thread_id_support(DoesPlatformSupportThreadID()) {}
public:
AcquireResult acquire_init_byte() {
LockGuard g("__cxa_guard_acquire");
// Check for possible recursive initialization.
if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
if (*thread_id_address == current_thread_id.get())
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
}
// Wait until the pending bit is not set.
while (*init_byte_address & PENDING_BIT) {
*init_byte_address |= WAITING_BIT;
global_cond.wait(global_mutex);
}
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
if (has_thread_id_support)
*thread_id_address = current_thread_id.get();
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
}
void release_init_byte() {
bool has_waiting;
{
LockGuard g("__cxa_guard_release");
has_waiting = *init_byte_address & WAITING_BIT;
*init_byte_address = COMPLETE_BIT;
}
if (has_waiting) {
if (global_cond.broadcast()) {
ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
}
}
}
void abort_init_byte() {
bool has_waiting;
{
LockGuard g("__cxa_guard_abort");
if (has_thread_id_support)
*thread_id_address = 0;
has_waiting = *init_byte_address & WAITING_BIT;
*init_byte_address = UNSET;
}
if (has_waiting) {
if (global_cond.broadcast()) {
ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
}
}
}
private:
using BaseT::init_byte_address;
using BaseT::thread_id_address;
const bool has_thread_id_support;
LazyValue<uint32_t, GetThreadID> current_thread_id;
private:
struct LockGuard {
LockGuard() = delete;
LockGuard(LockGuard const&) = delete;
LockGuard& operator=(LockGuard const&) = delete;
explicit LockGuard(const char* calling_func)
: calling_func(calling_func) {
if (global_mutex.lock())
ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func);
}
~LockGuard() {
if (global_mutex.unlock())
ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func);
}
private:
const char* const calling_func;
};
};
//===----------------------------------------------------------------------===//
// Futex Implementation
//===----------------------------------------------------------------------===//
#if defined(SYS_futex)
void PlatformFutexWait(int* addr, int expect) {
constexpr int WAIT = 0;
syscall(SYS_futex, addr, WAIT, expect, 0);
}
void PlatformFutexWake(int* addr) {
constexpr int WAKE = 1;
syscall(SYS_futex, addr, WAKE, INT_MAX);
}
#else
constexpr void (*PlatformFutexWait)(int*, int) = nullptr;
constexpr void (*PlatformFutexWake)(int*) = nullptr;
#endif
constexpr bool DoesPlatformSupportFutex() {
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
#endif
return +PlatformFutexWait != nullptr;
#ifdef __clang__
#pragma clang diagnostic pop
#endif
}
/// InitByteFutex - Manages initialization using atomics and the futex syscall
/// for waiting and waking.
template <void (*Wait)(int*, int) = PlatformFutexWait,
void (*Wake)(int*) = PlatformFutexWake,
uint32_t (*GetThreadIDArg)() = PlatformThreadID>
struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
using BaseT = typename InitByteFutex::GuardObject;
/// ARM Constructor
explicit InitByteFutex(uint32_t *g) : BaseT(g),
init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg),
thread_id(this->thread_id_address) {}
/// Itanium Constructor
explicit InitByteFutex(uint64_t *g) : BaseT(g),
init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg),
thread_id(this->thread_id_address) {}
public:
AcquireResult acquire_init_byte() {
while (true) {
uint8_t last_val = UNSET;
if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
std::_AO_Acquire)) {
if (has_thread_id_support) {
thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
}
return INIT_IS_PENDING;
}
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
if (last_val & PENDING_BIT) {
// Check for recursive initialization
if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
}
if ((last_val & WAITING_BIT) == 0) {
// This compare exchange can fail for several reasons
// (1) another thread finished the whole thing before we got here
// (2) another thread set the waiting bit we were trying to thread
// (3) another thread had an exception and failed to finish
if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT,
std::_AO_Acq_Rel, std::_AO_Release)) {
// (1) success, via someone else's work!
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
// (3) someone else, bailed on doing the work, retry from the start!
if (last_val == UNSET)
continue;
// (2) the waiting bit got set, so we are happy to keep waiting
}
}
wait_on_initialization();
}
}
}
void release_init_byte() {
uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
if (old & WAITING_BIT)
wake_all();
}
void abort_init_byte() {
if (has_thread_id_support)
thread_id.store(0, std::_AO_Relaxed);
uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel);
if (old & WAITING_BIT)
wake_all();
}
private:
/// Use the futex to wait on the current guard variable. Futex expects a
/// 32-bit 4-byte aligned address as the first argument, so we have to use use
/// the base address of the guard variable (not the init byte).
void wait_on_initialization() {
Wait(static_cast<int*>(this->base_address),
expected_value_for_futex(PENDING_BIT | WAITING_BIT));
}
void wake_all() { Wake(static_cast<int*>(this->base_address)); }
private:
AtomicInt<uint8_t> init_byte;
const bool has_thread_id_support;
// Unsafe to use unless has_thread_id_support
AtomicInt<uint32_t> thread_id;
LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
/// Create the expected integer value for futex `wait(int* addr, int expected)`.
/// We pass the base address as the first argument, So this function creates
/// an zero-initialized integer with `b` copied at the correct offset.
static int expected_value_for_futex(uint8_t b) {
int dest_val = 0;
std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1);
return dest_val;
}
static_assert(Wait != nullptr && Wake != nullptr, "");
};
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
template <class T>
struct GlobalStatic {
static T instance;
};
template <class T>
_LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
enum class Implementation {
NoThreads,
GlobalLock,
Futex
};
template <Implementation Impl>
struct SelectImplementation;
template <>
struct SelectImplementation<Implementation::NoThreads> {
using type = InitByteNoThreads;
};
template <>
struct SelectImplementation<Implementation::GlobalLock> {
using type = InitByteGlobalMutex<
LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
};
template <>
struct SelectImplementation<Implementation::Futex> {
using type =
InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
};
// TODO(EricWF): We should prefer the futex implementation when available. But
// it should be done in a separate step from adding the implementation.
constexpr Implementation CurrentImplementation =
#if defined(_LIBCXXABI_HAS_NO_THREADS)
Implementation::NoThreads;
#elif defined(_LIBCXXABI_USE_FUTEX)
Implementation::Futex;
#else
Implementation::GlobalLock;
#endif
static_assert(CurrentImplementation != Implementation::Futex
|| DoesPlatformSupportFutex(), "Futex selected but not supported");
using SelectedImplementation =
SelectImplementation<CurrentImplementation>::type;
} // end namespace
} // end namespace __cxxabiv1
#endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H

View File

@ -150,7 +150,7 @@ _ValueType __libcpp_atomic_add(_ValueType* __val, _AddType __a,
template <class _ValueType>
inline _LIBCPP_INLINE_VISIBILITY
_ValueType __libcpp_atomic_exchange(_ValueType* __target,
_ValueType __value, int __order = _AO_Seq)
_ValueType __value, int = _AO_Seq)
{
_ValueType old = *__target;
*__target = __value;
@ -177,4 +177,34 @@ bool __libcpp_atomic_compare_exchange(_ValueType* __val,
_LIBCPP_END_NAMESPACE_STD
namespace {
template <class IntType>
class AtomicInt {
public:
using MemoryOrder = std::__libcpp_atomic_order;
explicit AtomicInt(IntType *b) : b(b) {}
AtomicInt(AtomicInt const&) = delete;
AtomicInt& operator=(AtomicInt const&) = delete;
IntType load(MemoryOrder ord) {
return std::__libcpp_atomic_load(b, ord);
}
void store(IntType val, MemoryOrder ord) {
std::__libcpp_atomic_store(b, val, ord);
}
IntType exchange(IntType new_val, MemoryOrder ord) {
return std::__libcpp_atomic_exchange(b, new_val, ord);
}
bool compare_exchange(IntType *expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) {
return std::__libcpp_atomic_compare_exchange(b, expected, desired, ord_success, ord_failure);
}
private:
IntType *b;
};
} // end namespace
#endif // ATOMIC_SUPPORT_H

View File

@ -0,0 +1,154 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// UNSUPPORTED: c++98, c++03
#define TESTING_CXA_GUARD
#include "../src/cxa_guard_impl.h"
using namespace __cxxabiv1;
template <class GuardType, class Impl>
struct Tests {
private:
Tests() : g{}, impl(&g) {}
GuardType g;
Impl impl;
uint8_t first_byte() {
uint8_t first;
std::memcpy(&first, &g, 1);
return first;
}
void reset() { g = {}; }
public:
// Test the post conditions on cxa_guard_acquire, cxa_guard_abort, and
// cxa_guard_release. Specifically, that they leave the first byte with
// the value 0 or 1 as specified by the ARM or Itanium specification.
static void test() {
Tests tests;
tests.test_acquire();
tests.test_abort();
tests.test_release();
}
void test_acquire() {
{
reset();
assert(first_byte() == 0);
assert(impl.cxa_guard_acquire() == INIT_IS_PENDING);
assert(first_byte() == 0);
}
{
reset();
assert(first_byte() == 0);
assert(impl.cxa_guard_acquire() == INIT_IS_PENDING);
impl.cxa_guard_release();
assert(first_byte() == 1);
assert(impl.cxa_guard_acquire() == INIT_IS_DONE);
}
}
void test_release() {
{
reset();
assert(first_byte() == 0);
assert(impl.cxa_guard_acquire() == INIT_IS_PENDING);
assert(first_byte() == 0);
impl.cxa_guard_release();
assert(first_byte() == 1);
}
}
void test_abort() {
{
reset();
assert(first_byte() == 0);
assert(impl.cxa_guard_acquire() == INIT_IS_PENDING);
assert(first_byte() == 0);
impl.cxa_guard_abort();
assert(first_byte() == 0);
assert(impl.cxa_guard_acquire() == INIT_IS_PENDING);
assert(first_byte() == 0);
}
}
};
struct NopMutex {
bool lock() {
assert(!is_locked);
is_locked = true;
return false;
}
bool unlock() {
assert(is_locked);
is_locked = false;
return false;
}
private:
bool is_locked = false;
};
static NopMutex global_nop_mutex = {};
struct NopCondVar {
bool broadcast() { return false; }
bool wait(NopMutex&) { return false; }
};
static NopCondVar global_nop_cond = {};
void NopFutexWait(int*, int) { assert(false); }
void NopFutexWake(int*) { assert(false); }
uint32_t MockGetThreadID() { return 0; }
int main() {
{
#if defined(_LIBCXXABI_HAS_NO_THREADS)
static_assert(CurrentImplementation == Implementation::NoThreads, "");
static_assert(
std::is_same<SelectedImplementation, InitByteNoThreads>::value, "");
#else
static_assert(CurrentImplementation == Implementation::GlobalLock, "");
static_assert(
std::is_same<
SelectedImplementation,
InitByteGlobalMutex<LibcppMutex, LibcppCondVar,
GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance>>::value,
"");
#endif
}
{
#if defined(__APPLE__) || defined(__linux__)
assert(PlatformThreadID);
#endif
if (+PlatformThreadID) {
assert(PlatformThreadID() != 0);
assert(PlatformThreadID() == PlatformThreadID());
}
}
{
Tests<uint32_t, InitByteNoThreads>::test();
Tests<uint64_t, InitByteNoThreads>::test();
}
{
using MutexImpl =
InitByteGlobalMutex<NopMutex, NopCondVar, global_nop_mutex,
global_nop_cond, MockGetThreadID>;
Tests<uint32_t, MutexImpl>::test();
Tests<uint64_t, MutexImpl>::test();
}
{
using FutexImpl =
InitByteFutex<&NopFutexWait, &NopFutexWake, &MockGetThreadID>;
Tests<uint32_t, FutexImpl>::test();
Tests<uint64_t, FutexImpl>::test();
}
}

View File

@ -0,0 +1,378 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// UNSUPPORTED: c++98, c++03
// UNSUPPORTED: libcxxabi-no-threads, libcxxabi-no-exceptions
#define TESTING_CXA_GUARD
#include "../src/cxa_guard_impl.h"
#include <unordered_map>
#include <thread>
#include <atomic>
#include <array>
#include <cassert>
#include <memory>
#include <vector>
using namespace __cxxabiv1;
enum class InitResult {
COMPLETE,
PERFORMED,
WAITED,
ABORTED
};
constexpr InitResult COMPLETE = InitResult::COMPLETE;
constexpr InitResult PERFORMED = InitResult::PERFORMED;
constexpr InitResult WAITED = InitResult::WAITED;
constexpr InitResult ABORTED = InitResult::ABORTED;
template <class Impl, class GuardType, class Init>
InitResult check_guard(GuardType *g, Init init) {
uint8_t *first_byte = reinterpret_cast<uint8_t*>(g);
if (std::__libcpp_atomic_load(first_byte, std::_AO_Acquire) == 0) {
Impl impl(g);
if (impl.cxa_guard_acquire() == INIT_IS_PENDING) {
#ifndef LIBCXXABI_HAS_NO_EXCEPTIONS
try {
#endif
init();
impl.cxa_guard_release();
return PERFORMED;
#ifndef LIBCXXABI_HAS_NO_EXCEPTIONS
} catch (...) {
impl.cxa_guard_abort();
return ABORTED;
}
#endif
}
return WAITED;
}
return COMPLETE;
}
template <class GuardType, class Impl>
struct FunctionLocalStatic {
FunctionLocalStatic() { reset(); }
FunctionLocalStatic(FunctionLocalStatic const&) = delete;
template <class InitFunc>
InitResult access(InitFunc&& init) {
++waiting_threads;
auto res = check_guard<Impl>(&guard_object, init);
--waiting_threads;
++result_counts[static_cast<int>(res)];
return res;
}
struct Accessor {
explicit Accessor(FunctionLocalStatic& obj) : this_obj(&obj) {}
template <class InitFn>
void operator()(InitFn && fn) const {
this_obj->access(std::forward<InitFn>(fn));
}
private:
FunctionLocalStatic *this_obj;
};
Accessor get_access() {
return Accessor(*this);
}
void reset() {
guard_object = 0;
waiting_threads.store(0);
for (auto& counter : result_counts) {
counter.store(0);
}
}
int get_count(InitResult I) const {
return result_counts[static_cast<int>(I)].load();
}
int num_completed() const {
return get_count(COMPLETE) + get_count(PERFORMED) + get_count(WAITED);
}
int num_waiting() const {
return waiting_threads.load();
}
private:
GuardType guard_object;
std::atomic<int> waiting_threads;
std::array<std::atomic<int>, 4> result_counts;
static_assert(static_cast<int>(ABORTED) == 3, "only 4 result kinds expected");
};
struct ThreadGroup {
ThreadGroup() = default;
ThreadGroup(ThreadGroup const&) = delete;
template <class ...Args>
void Create(Args&& ...args) {
threads.emplace_back(std::forward<Args>(args)...);
}
void JoinAll() {
for (auto& t : threads) {
t.join();
}
}
private:
std::vector<std::thread> threads;
};
struct Barrier {
explicit Barrier(int n) : m_wait_for(n) { reset(); }
Barrier(Barrier const&) = delete;
void wait() {
++m_entered;
while (m_entered.load() < m_wait_for) {
std::this_thread::yield();
}
assert(m_entered.load() == m_wait_for);
++m_exited;
}
int num_waiting() const {
return m_entered.load() - m_exited.load();
}
void reset() {
m_entered.store(0);
m_exited.store(0);
}
private:
const int m_wait_for;
std::atomic<int> m_entered;
std::atomic<int> m_exited;
};
struct Notification {
Notification() { reset(); }
Notification(Notification const&) = delete;
int num_waiting() const {
return m_waiting.load();
}
void wait() {
if (m_cond.load())
return;
++m_waiting;
while (!m_cond.load()) {
std::this_thread::yield();
}
--m_waiting;
}
void notify() {
m_cond.store(true);
}
template <class Cond>
void notify_when(Cond &&c) {
if (m_cond.load())
return;
while (!c()) {
std::this_thread::yield();
}
m_cond.store(true);
}
void reset() {
m_cond.store(0);
m_waiting.store(0);
}
private:
std::atomic<bool> m_cond;
std::atomic<int> m_waiting;
};
template <class GuardType, class Impl>
void test_free_for_all() {
const int num_waiting_threads = 10; // one initializing thread, 10 waiters.
FunctionLocalStatic<GuardType, Impl> test_obj;
Barrier start_init_barrier(num_waiting_threads);
bool already_init = false;
ThreadGroup threads;
for (int i=0; i < num_waiting_threads; ++i) {
threads.Create([&]() {
start_init_barrier.wait();
test_obj.access([&]() {
assert(!already_init);
already_init = true;
});
});
}
// wait for the other threads to finish initialization.
threads.JoinAll();
assert(test_obj.get_count(PERFORMED) == 1);
assert(test_obj.get_count(COMPLETE) + test_obj.get_count(WAITED) == 9);
}
template <class GuardType, class Impl>
void test_waiting_for_init() {
const int num_waiting_threads = 10; // one initializing thread, 10 waiters.
Notification init_pending;
Notification init_barrier;
FunctionLocalStatic<GuardType, Impl> test_obj;
auto access_fn = test_obj.get_access();
ThreadGroup threads;
threads.Create(access_fn,
[&]() {
init_pending.notify();
init_barrier.wait();
}
);
init_pending.wait();
assert(test_obj.num_waiting() == 1);
for (int i=0; i < num_waiting_threads; ++i) {
threads.Create(access_fn, []() { assert(false); });
}
// unblock the initializing thread
init_barrier.notify_when([&]() {
return test_obj.num_waiting() == num_waiting_threads + 1;
});
// wait for the other threads to finish initialization.
threads.JoinAll();
assert(test_obj.get_count(PERFORMED) == 1);
assert(test_obj.get_count(WAITED) == 10);
assert(test_obj.get_count(COMPLETE) == 0);
}
template <class GuardType, class Impl>
void test_aborted_init() {
const int num_waiting_threads = 10; // one initializing thread, 10 waiters.
Notification init_pending;
Notification init_barrier;
FunctionLocalStatic<GuardType, Impl> test_obj;
auto access_fn = test_obj.get_access();
ThreadGroup threads;
threads.Create(access_fn,
[&]() {
init_pending.notify();
init_barrier.wait();
throw 42;
}
);
init_pending.wait();
assert(test_obj.num_waiting() == 1);
bool already_init = false;
for (int i=0; i < num_waiting_threads; ++i) {
threads.Create(access_fn, [&]() {
assert(!already_init);
already_init = true;
});
}
// unblock the initializing thread
init_barrier.notify_when([&]() {
return test_obj.num_waiting() == num_waiting_threads + 1;
});
// wait for the other threads to finish initialization.
threads.JoinAll();
assert(test_obj.get_count(ABORTED) == 1);
assert(test_obj.get_count(PERFORMED) == 1);
assert(test_obj.get_count(WAITED) == 9);
assert(test_obj.get_count(COMPLETE) == 0);
}
template <class GuardType, class Impl>
void test_completed_init() {
const int num_waiting_threads = 10; // one initializing thread, 10 waiters.
Notification init_barrier;
FunctionLocalStatic<GuardType, Impl> test_obj;
test_obj.access([]() {});
assert(test_obj.num_waiting() == 0);
assert(test_obj.num_completed() == 1);
assert(test_obj.get_count(PERFORMED) == 1);
auto access_fn = test_obj.get_access();
ThreadGroup threads;
for (int i=0; i < num_waiting_threads; ++i) {
threads.Create(access_fn, []() {
assert(false);
});
}
// wait for the other threads to finish initialization.
threads.JoinAll();
assert(test_obj.get_count(ABORTED) == 0);
assert(test_obj.get_count(PERFORMED) == 1);
assert(test_obj.get_count(WAITED) == 0);
assert(test_obj.get_count(COMPLETE) == 10);
}
template <class Impl>
void test_impl() {
{
test_free_for_all<uint32_t, Impl>();
test_free_for_all<uint32_t, Impl>();
}
{
test_waiting_for_init<uint32_t, Impl>();
test_waiting_for_init<uint64_t, Impl>();
}
{
test_aborted_init<uint32_t, Impl>();
test_aborted_init<uint64_t, Impl>();
}
{
test_completed_init<uint32_t, Impl>();
test_completed_init<uint64_t, Impl>();
}
}
int main() {
using MutexImpl = SelectImplementation<Implementation::GlobalLock>::type;
// Attempt to test the Futex based implementation if it's supported on the
// target platform.
using RealFutexImpl = SelectImplementation<Implementation::Futex>::type;
using FutexImpl = typename std::conditional<
DoesPlatformSupportFutex(),
RealFutexImpl,
MutexImpl
>::type;
// Run each test 5 times to help TSAN catch bugs.
const int num_runs = 5;
for (int i=0; i < num_runs; ++i) {
test_impl<MutexImpl>();
if (DoesPlatformSupportFutex())
test_impl<FutexImpl>();
}
}