llvm-project/libcxxabi/src/cxa_guard_impl.h

594 lines
19 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
#define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H
/* cxa_guard_impl.h - Implements the C++ runtime support for function local
* static guards.
* The layout of the guard object is the same across ARM and Itanium.
*
* The first "guard byte" (which is checked by the compiler) is set only upon
* the completion of cxa release.
*
* The second "init byte" does the rest of the bookkeeping. It tracks if
* initialization is complete or pending, and if there are waiting threads.
*
* If the guard variable is 64-bits and the platforms supplies a 32-bit thread
* identifier, it is used to detect recursive initialization. The thread ID of
* the thread currently performing initialization is stored in the second word.
*
* Guard Object Layout:
* -------------------------------------------------------------------------
* |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... |
* ------------------------------------------------------------------------
*
* Access Protocol:
* For each implementation the guard byte is checked and set before accessing
* the init byte.
*
* Overall Design:
* The implementation was designed to allow each implementation to be tested
* independent of the C++ runtime or platform support.
*
*/
#include "__cxxabi_config.h"
#include "include/atomic_support.h"
#include <unistd.h>
#include <sys/types.h>
#if defined(__has_include)
# if __has_include(<sys/syscall.h>)
# include <sys/syscall.h>
# endif
#endif
#include <stdlib.h>
#include <__threading_support>
#ifndef _LIBCXXABI_HAS_NO_THREADS
#if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB)
#pragma comment(lib, "pthread")
#endif
#endif
// To make testing possible, this header is included from both cxa_guard.cpp
// and a number of tests.
//
// For this reason we place everything in an anonymous namespace -- even though
// we're in a header. We want the actual implementation and the tests to have
// unique definitions of the types in this header (since the tests may depend
// on function local statics).
//
// To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be
// defined when including this file. Only `src/cxa_guard.cpp` should define
// the former.
#ifdef BUILDING_CXA_GUARD
# include "abort_message.h"
# define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__)
#elif defined(TESTING_CXA_GUARD)
# define ABORT_WITH_MESSAGE(...) ::abort()
#else
# error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined"
#endif
#if __has_feature(thread_sanitizer)
extern "C" void __tsan_acquire(void*);
extern "C" void __tsan_release(void*);
#else
#define __tsan_acquire(addr) ((void)0)
#define __tsan_release(addr) ((void)0)
#endif
namespace __cxxabiv1 {
// Use an anonymous namespace to ensure that the tests and actual implementation
// have unique definitions of these symbols.
namespace {
//===----------------------------------------------------------------------===//
// Misc Utilities
//===----------------------------------------------------------------------===//
template <class T, T(*Init)()>
struct LazyValue {
LazyValue() : is_init(false) {}
T& get() {
if (!is_init) {
value = Init();
is_init = true;
}
return value;
}
private:
T value;
bool is_init = false;
};
template <class IntType>
class AtomicInt {
public:
using MemoryOrder = std::__libcpp_atomic_order;
explicit AtomicInt(IntType *b) : b(b) {}
AtomicInt(AtomicInt const&) = delete;
AtomicInt& operator=(AtomicInt const&) = delete;
IntType load(MemoryOrder ord) {
return std::__libcpp_atomic_load(b, ord);
}
void store(IntType val, MemoryOrder ord) {
std::__libcpp_atomic_store(b, val, ord);
}
IntType exchange(IntType new_val, MemoryOrder ord) {
return std::__libcpp_atomic_exchange(b, new_val, ord);
}
bool compare_exchange(IntType *expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) {
return std::__libcpp_atomic_compare_exchange(b, expected, desired, ord_success, ord_failure);
}
private:
IntType *b;
};
//===----------------------------------------------------------------------===//
// PlatformGetThreadID
//===----------------------------------------------------------------------===//
#if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
uint32_t PlatformThreadID() {
static_assert(sizeof(mach_port_t) == sizeof(uint32_t), "");
return static_cast<uint32_t>(
pthread_mach_thread_np(std::__libcpp_thread_get_current_id()));
}
#elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
uint32_t PlatformThreadID() {
static_assert(sizeof(pid_t) == sizeof(uint32_t), "");
return static_cast<uint32_t>(syscall(SYS_gettid));
}
#else
constexpr uint32_t (*PlatformThreadID)() = nullptr;
#endif
constexpr bool PlatformSupportsThreadID() {
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
#endif
return +PlatformThreadID != nullptr;
#ifdef __clang__
#pragma clang diagnostic pop
#endif
}
//===----------------------------------------------------------------------===//
// GuardBase
//===----------------------------------------------------------------------===//
enum class AcquireResult {
INIT_IS_DONE,
INIT_IS_PENDING,
};
constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE;
constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING;
static constexpr uint8_t UNSET = 0;
static constexpr uint8_t COMPLETE_BIT = (1 << 0);
static constexpr uint8_t PENDING_BIT = (1 << 1);
static constexpr uint8_t WAITING_BIT = (1 << 2);
template <class Derived>
struct GuardObject {
GuardObject() = delete;
GuardObject(GuardObject const&) = delete;
GuardObject& operator=(GuardObject const&) = delete;
explicit GuardObject(uint32_t* g)
: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
thread_id_address(nullptr) {}
explicit GuardObject(uint64_t* g)
: base_address(g), guard_byte_address(reinterpret_cast<uint8_t*>(g)),
init_byte_address(reinterpret_cast<uint8_t*>(g) + 1),
thread_id_address(reinterpret_cast<uint32_t*>(g) + 1) {}
public:
/// Implements __cxa_guard_acquire
AcquireResult cxa_guard_acquire() {
AtomicInt<uint8_t> guard_byte(guard_byte_address);
if (guard_byte.load(std::_AO_Acquire) != UNSET)
return INIT_IS_DONE;
return derived()->acquire_init_byte();
}
/// Implements __cxa_guard_release
void cxa_guard_release() {
AtomicInt<uint8_t> guard_byte(guard_byte_address);
// Store complete first, so that when release wakes other folks, they see
// it as having been completed.
guard_byte.store(COMPLETE_BIT, std::_AO_Release);
derived()->release_init_byte();
}
/// Implements __cxa_guard_abort
void cxa_guard_abort() { derived()->abort_init_byte(); }
public:
/// base_address - the address of the original guard object.
void* const base_address;
/// The address of the guard byte at offset 0.
uint8_t* const guard_byte_address;
/// The address of the byte used by the implementation during initialization.
uint8_t* const init_byte_address;
/// An optional address storing an identifier for the thread performing initialization.
/// It's used to detect recursive initialization.
uint32_t* const thread_id_address;
private:
Derived* derived() { return static_cast<Derived*>(this); }
};
//===----------------------------------------------------------------------===//
// Single Threaded Implementation
//===----------------------------------------------------------------------===//
struct InitByteNoThreads : GuardObject<InitByteNoThreads> {
using GuardObject::GuardObject;
AcquireResult acquire_init_byte() {
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
if (*init_byte_address & PENDING_BIT)
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
}
void release_init_byte() { *init_byte_address = COMPLETE_BIT; }
void abort_init_byte() { *init_byte_address = UNSET; }
};
//===----------------------------------------------------------------------===//
// Global Mutex Implementation
//===----------------------------------------------------------------------===//
struct LibcppMutex;
struct LibcppCondVar;
#ifndef _LIBCXXABI_HAS_NO_THREADS
struct LibcppMutex {
LibcppMutex() = default;
LibcppMutex(LibcppMutex const&) = delete;
LibcppMutex& operator=(LibcppMutex const&) = delete;
bool lock() { return std::__libcpp_mutex_lock(&mutex); }
bool unlock() { return std::__libcpp_mutex_unlock(&mutex); }
private:
friend struct LibcppCondVar;
std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER;
};
struct LibcppCondVar {
LibcppCondVar() = default;
LibcppCondVar(LibcppCondVar const&) = delete;
LibcppCondVar& operator=(LibcppCondVar const&) = delete;
bool wait(LibcppMutex& mut) {
return std::__libcpp_condvar_wait(&cond, &mut.mutex);
}
bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); }
private:
std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER;
};
#else
struct LibcppMutex {};
struct LibcppCondVar {};
#endif // !defined(_LIBCXXABI_HAS_NO_THREADS)
template <class Mutex, class CondVar, Mutex& global_mutex, CondVar& global_cond,
uint32_t (*GetThreadID)() = PlatformThreadID>
struct InitByteGlobalMutex
: GuardObject<InitByteGlobalMutex<Mutex, CondVar, global_mutex, global_cond,
GetThreadID>> {
using BaseT = typename InitByteGlobalMutex::GuardObject;
using BaseT::BaseT;
explicit InitByteGlobalMutex(uint32_t *g)
: BaseT(g), has_thread_id_support(false) {}
explicit InitByteGlobalMutex(uint64_t *g)
: BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {}
public:
AcquireResult acquire_init_byte() {
LockGuard g("__cxa_guard_acquire");
// Check for possible recursive initialization.
if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) {
if (*thread_id_address == current_thread_id.get())
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
}
// Wait until the pending bit is not set.
while (*init_byte_address & PENDING_BIT) {
*init_byte_address |= WAITING_BIT;
global_cond.wait(global_mutex);
}
if (*init_byte_address == COMPLETE_BIT)
return INIT_IS_DONE;
if (has_thread_id_support)
*thread_id_address = current_thread_id.get();
*init_byte_address = PENDING_BIT;
return INIT_IS_PENDING;
}
void release_init_byte() {
bool has_waiting;
{
LockGuard g("__cxa_guard_release");
has_waiting = *init_byte_address & WAITING_BIT;
*init_byte_address = COMPLETE_BIT;
}
if (has_waiting) {
if (global_cond.broadcast()) {
ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release");
}
}
}
void abort_init_byte() {
bool has_waiting;
{
LockGuard g("__cxa_guard_abort");
if (has_thread_id_support)
*thread_id_address = 0;
has_waiting = *init_byte_address & WAITING_BIT;
*init_byte_address = UNSET;
}
if (has_waiting) {
if (global_cond.broadcast()) {
ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort");
}
}
}
private:
using BaseT::init_byte_address;
using BaseT::thread_id_address;
const bool has_thread_id_support;
LazyValue<uint32_t, GetThreadID> current_thread_id;
private:
struct LockGuard {
LockGuard() = delete;
LockGuard(LockGuard const&) = delete;
LockGuard& operator=(LockGuard const&) = delete;
explicit LockGuard(const char* calling_func)
: calling_func(calling_func) {
if (global_mutex.lock())
ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func);
}
~LockGuard() {
if (global_mutex.unlock())
ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func);
}
private:
const char* const calling_func;
};
};
//===----------------------------------------------------------------------===//
// Futex Implementation
//===----------------------------------------------------------------------===//
#if defined(SYS_futex)
void PlatformFutexWait(int* addr, int expect) {
constexpr int WAIT = 0;
syscall(SYS_futex, addr, WAIT, expect, 0);
__tsan_acquire(addr);
}
void PlatformFutexWake(int* addr) {
constexpr int WAKE = 1;
__tsan_release(addr);
syscall(SYS_futex, addr, WAKE, INT_MAX);
}
#else
constexpr void (*PlatformFutexWait)(int*, int) = nullptr;
constexpr void (*PlatformFutexWake)(int*) = nullptr;
#endif
constexpr bool PlatformSupportsFutex() {
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wtautological-pointer-compare"
#endif
return +PlatformFutexWait != nullptr;
#ifdef __clang__
#pragma clang diagnostic pop
#endif
}
/// InitByteFutex - Manages initialization using atomics and the futex syscall
/// for waiting and waking.
template <void (*Wait)(int*, int) = PlatformFutexWait,
void (*Wake)(int*) = PlatformFutexWake,
uint32_t (*GetThreadIDArg)() = PlatformThreadID>
struct InitByteFutex : GuardObject<InitByteFutex<Wait, Wake, GetThreadIDArg>> {
using BaseT = typename InitByteFutex::GuardObject;
/// ARM Constructor
explicit InitByteFutex(uint32_t *g) : BaseT(g),
init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg),
thread_id(this->thread_id_address) {}
/// Itanium Constructor
explicit InitByteFutex(uint64_t *g) : BaseT(g),
init_byte(this->init_byte_address),
has_thread_id_support(this->thread_id_address && GetThreadIDArg),
thread_id(this->thread_id_address) {}
public:
AcquireResult acquire_init_byte() {
while (true) {
uint8_t last_val = UNSET;
if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel,
std::_AO_Acquire)) {
if (has_thread_id_support) {
thread_id.store(current_thread_id.get(), std::_AO_Relaxed);
}
return INIT_IS_PENDING;
}
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
if (last_val & PENDING_BIT) {
// Check for recursive initialization
if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) {
ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization");
}
if ((last_val & WAITING_BIT) == 0) {
// This compare exchange can fail for several reasons
// (1) another thread finished the whole thing before we got here
// (2) another thread set the waiting bit we were trying to thread
// (3) another thread had an exception and failed to finish
if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT,
std::_AO_Acq_Rel, std::_AO_Release)) {
// (1) success, via someone else's work!
if (last_val == COMPLETE_BIT)
return INIT_IS_DONE;
// (3) someone else, bailed on doing the work, retry from the start!
if (last_val == UNSET)
continue;
// (2) the waiting bit got set, so we are happy to keep waiting
}
}
wait_on_initialization();
}
}
}
void release_init_byte() {
uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel);
if (old & WAITING_BIT)
wake_all();
}
void abort_init_byte() {
if (has_thread_id_support)
thread_id.store(0, std::_AO_Relaxed);
uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel);
if (old & WAITING_BIT)
wake_all();
}
private:
/// Use the futex to wait on the current guard variable. Futex expects a
/// 32-bit 4-byte aligned address as the first argument, so we have to use use
/// the base address of the guard variable (not the init byte).
void wait_on_initialization() {
Wait(static_cast<int*>(this->base_address),
expected_value_for_futex(PENDING_BIT | WAITING_BIT));
}
void wake_all() { Wake(static_cast<int*>(this->base_address)); }
private:
AtomicInt<uint8_t> init_byte;
const bool has_thread_id_support;
// Unsafe to use unless has_thread_id_support
AtomicInt<uint32_t> thread_id;
LazyValue<uint32_t, GetThreadIDArg> current_thread_id;
/// Create the expected integer value for futex `wait(int* addr, int expected)`.
/// We pass the base address as the first argument, So this function creates
/// an zero-initialized integer with `b` copied at the correct offset.
static int expected_value_for_futex(uint8_t b) {
int dest_val = 0;
std::memcpy(reinterpret_cast<char*>(&dest_val) + 1, &b, 1);
return dest_val;
}
static_assert(Wait != nullptr && Wake != nullptr, "");
};
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
template <class T>
struct GlobalStatic {
static T instance;
};
template <class T>
_LIBCPP_SAFE_STATIC T GlobalStatic<T>::instance = {};
enum class Implementation {
NoThreads,
GlobalLock,
Futex
};
template <Implementation Impl>
struct SelectImplementation;
template <>
struct SelectImplementation<Implementation::NoThreads> {
using type = InitByteNoThreads;
};
template <>
struct SelectImplementation<Implementation::GlobalLock> {
using type = InitByteGlobalMutex<
LibcppMutex, LibcppCondVar, GlobalStatic<LibcppMutex>::instance,
GlobalStatic<LibcppCondVar>::instance, PlatformThreadID>;
};
template <>
struct SelectImplementation<Implementation::Futex> {
using type =
InitByteFutex<PlatformFutexWait, PlatformFutexWake, PlatformThreadID>;
};
// TODO(EricWF): We should prefer the futex implementation when available. But
// it should be done in a separate step from adding the implementation.
constexpr Implementation CurrentImplementation =
#if defined(_LIBCXXABI_HAS_NO_THREADS)
Implementation::NoThreads;
#elif defined(_LIBCXXABI_USE_FUTEX)
Implementation::Futex;
#else
Implementation::GlobalLock;
#endif
static_assert(CurrentImplementation != Implementation::Futex
|| PlatformSupportsFutex(), "Futex selected but not supported");
using SelectedImplementation =
SelectImplementation<CurrentImplementation>::type;
} // end namespace
} // end namespace __cxxabiv1
#endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H