2020-02-18 22:58:34 +08:00
|
|
|
// -*- C++ -*-
|
|
|
|
//===--------------------------- barrier ----------------------------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef _LIBCPP_BARRIER
|
|
|
|
#define _LIBCPP_BARRIER
|
|
|
|
|
|
|
|
/*
|
|
|
|
barrier synopsis
|
|
|
|
|
|
|
|
namespace std
|
|
|
|
{
|
|
|
|
|
|
|
|
template<class CompletionFunction = see below>
|
|
|
|
class barrier
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using arrival_token = see below;
|
|
|
|
|
2020-12-06 22:36:18 +08:00
|
|
|
static constexpr ptrdiff_t max() noexcept;
|
|
|
|
|
2020-02-18 22:58:34 +08:00
|
|
|
constexpr explicit barrier(ptrdiff_t phase_count,
|
|
|
|
CompletionFunction f = CompletionFunction());
|
|
|
|
~barrier();
|
|
|
|
|
|
|
|
barrier(const barrier&) = delete;
|
|
|
|
barrier& operator=(const barrier&) = delete;
|
|
|
|
|
|
|
|
[[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
|
|
|
|
void wait(arrival_token&& arrival) const;
|
|
|
|
|
|
|
|
void arrive_and_wait();
|
|
|
|
void arrive_and_drop();
|
|
|
|
|
|
|
|
private:
|
|
|
|
CompletionFunction completion; // exposition only
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <__config>
|
2020-11-05 04:01:25 +08:00
|
|
|
#include <__availability>
|
2020-02-18 22:58:34 +08:00
|
|
|
#include <atomic>
|
|
|
|
#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
|
|
|
|
# include <memory>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
|
|
#pragma GCC system_header
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef _LIBCPP_HAS_NO_THREADS
|
|
|
|
# error <barrier> is not supported on this single threaded system
|
|
|
|
#endif
|
|
|
|
|
2020-12-03 07:55:01 +08:00
|
|
|
_LIBCPP_PUSH_MACROS
|
|
|
|
#include <__undef_macros>
|
|
|
|
|
2020-02-25 07:12:44 +08:00
|
|
|
#if _LIBCPP_STD_VER >= 14
|
2020-02-18 22:58:34 +08:00
|
|
|
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
|
|
|
|
struct __empty_completion
|
|
|
|
{
|
|
|
|
inline _LIBCPP_INLINE_VISIBILITY
|
|
|
|
void operator()() noexcept
|
|
|
|
{
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
The default implementation of __barrier_base is a classic tree barrier.
|
|
|
|
|
|
|
|
It looks different from literature pseudocode for two main reasons:
|
|
|
|
1. Threads that call into std::barrier functions do not provide indices,
|
|
|
|
so a numbering step is added before the actual barrier algorithm,
|
|
|
|
appearing as an N+1 round to the N rounds of the tree barrier.
|
|
|
|
2. A great deal of attention has been paid to avoid cache line thrashing
|
|
|
|
by flattening the tree structure into cache-line sized arrays, that
|
|
|
|
are indexed in an efficient way.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
using __barrier_phase_t = uint8_t;
|
|
|
|
|
|
|
|
class __barrier_algorithm_base;
|
|
|
|
|
2020-02-24 23:08:41 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
|
2020-02-18 22:58:34 +08:00
|
|
|
__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
|
|
|
|
|
2020-02-24 23:08:41 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
|
2020-02-18 22:58:34 +08:00
|
|
|
bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
|
|
|
|
__barrier_phase_t __old_phase);
|
|
|
|
|
2020-02-24 23:08:41 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
|
2020-02-18 22:58:34 +08:00
|
|
|
void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
|
|
|
|
|
|
|
|
template<class _CompletionF>
|
|
|
|
class __barrier_base {
|
|
|
|
|
|
|
|
ptrdiff_t __expected;
|
|
|
|
unique_ptr<__barrier_algorithm_base,
|
2020-02-24 23:09:29 +08:00
|
|
|
void (*)(__barrier_algorithm_base*)> __base;
|
2020-02-18 22:58:34 +08:00
|
|
|
__atomic_base<ptrdiff_t> __expected_adjustment;
|
|
|
|
_CompletionF __completion;
|
|
|
|
__atomic_base<__barrier_phase_t> __phase;
|
|
|
|
|
|
|
|
public:
|
|
|
|
using arrival_token = __barrier_phase_t;
|
|
|
|
|
|
|
|
static constexpr ptrdiff_t max() noexcept {
|
|
|
|
return numeric_limits<ptrdiff_t>::max();
|
|
|
|
}
|
|
|
|
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
__barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
|
|
|
|
: __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
|
|
|
|
&__destroy_barrier_algorithm_base),
|
|
|
|
__expected_adjustment(0), __completion(move(__completion)), __phase(0)
|
|
|
|
{
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
[[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
arrival_token arrive(ptrdiff_t update)
|
|
|
|
{
|
|
|
|
auto const __old_phase = __phase.load(memory_order_relaxed);
|
|
|
|
for(; update; --update)
|
|
|
|
if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
|
|
|
|
__completion();
|
|
|
|
__expected += __expected_adjustment.load(memory_order_relaxed);
|
|
|
|
__expected_adjustment.store(0, memory_order_relaxed);
|
|
|
|
__phase.store(__old_phase + 2, memory_order_release);
|
|
|
|
__phase.notify_all();
|
|
|
|
}
|
|
|
|
return __old_phase;
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void wait(arrival_token&& __old_phase) const
|
|
|
|
{
|
|
|
|
auto const __test_fn = [=]() -> bool {
|
|
|
|
return __phase.load(memory_order_acquire) != __old_phase;
|
|
|
|
};
|
|
|
|
__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void arrive_and_drop()
|
|
|
|
{
|
|
|
|
__expected_adjustment.fetch_sub(1, memory_order_relaxed);
|
|
|
|
(void)arrive(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
The alternative implementation of __barrier_base is a central barrier.
|
|
|
|
|
|
|
|
Two versions of this algorithm are provided:
|
|
|
|
1. A fairly straightforward implementation of the litterature for the
|
|
|
|
general case where the completion function is not empty.
|
|
|
|
2. An optimized implementation that exploits 2's complement arithmetic
|
|
|
|
and well-defined overflow in atomic arithmetic, to handle the phase
|
|
|
|
roll-over for free.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
template<class _CompletionF>
|
|
|
|
class __barrier_base {
|
|
|
|
|
|
|
|
__atomic_base<ptrdiff_t> __expected;
|
|
|
|
__atomic_base<ptrdiff_t> __arrived;
|
|
|
|
_CompletionF __completion;
|
|
|
|
__atomic_base<bool> __phase;
|
|
|
|
public:
|
|
|
|
using arrival_token = bool;
|
|
|
|
|
|
|
|
static constexpr ptrdiff_t max() noexcept {
|
|
|
|
return numeric_limits<ptrdiff_t>::max();
|
|
|
|
}
|
|
|
|
|
|
|
|
_LIBCPP_INLINE_VISIBILITY
|
|
|
|
__barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
|
|
|
|
: __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
|
|
|
|
{
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
[[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
arrival_token arrive(ptrdiff_t update)
|
|
|
|
{
|
|
|
|
auto const __old_phase = __phase.load(memory_order_relaxed);
|
|
|
|
auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
|
|
|
|
auto const new_expected = __expected.load(memory_order_relaxed);
|
|
|
|
if(0 == __result) {
|
|
|
|
__completion();
|
|
|
|
__arrived.store(new_expected, memory_order_relaxed);
|
|
|
|
__phase.store(!__old_phase, memory_order_release);
|
|
|
|
__phase.notify_all();
|
|
|
|
}
|
|
|
|
return __old_phase;
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void wait(arrival_token&& __old_phase) const
|
|
|
|
{
|
|
|
|
__phase.wait(__old_phase, memory_order_acquire);
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void arrive_and_drop()
|
|
|
|
{
|
|
|
|
__expected.fetch_sub(1, memory_order_relaxed);
|
|
|
|
(void)arrive(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<>
|
|
|
|
class __barrier_base<__empty_completion> {
|
|
|
|
|
|
|
|
static constexpr uint64_t __expected_unit = 1ull;
|
|
|
|
static constexpr uint64_t __arrived_unit = 1ull << 32;
|
|
|
|
static constexpr uint64_t __expected_mask = __arrived_unit - 1;
|
|
|
|
static constexpr uint64_t __phase_bit = 1ull << 63;
|
|
|
|
static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
|
|
|
|
|
|
|
|
__atomic_base<uint64_t> __phase_arrived_expected;
|
|
|
|
|
|
|
|
static _LIBCPP_INLINE_VISIBILITY
|
|
|
|
constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
|
|
|
|
{
|
|
|
|
return ((uint64_t(1u << 31) - __count) << 32)
|
|
|
|
| (uint64_t(1u << 31) - __count);
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
using arrival_token = uint64_t;
|
|
|
|
|
|
|
|
static constexpr ptrdiff_t max() noexcept {
|
|
|
|
return ptrdiff_t(1u << 31) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
_LIBCPP_INLINE_VISIBILITY
|
|
|
|
explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
|
|
|
|
: __phase_arrived_expected(__init(__count))
|
|
|
|
{
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
[[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
arrival_token arrive(ptrdiff_t update)
|
|
|
|
{
|
|
|
|
auto const __inc = __arrived_unit * update;
|
|
|
|
auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
|
|
|
|
if((__old ^ (__old + __inc)) & __phase_bit) {
|
|
|
|
__phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
|
|
|
|
__phase_arrived_expected.notify_all();
|
|
|
|
}
|
|
|
|
return __old & __phase_bit;
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void wait(arrival_token&& __phase) const
|
|
|
|
{
|
|
|
|
auto const __test_fn = [=]() -> bool {
|
|
|
|
uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
|
|
|
|
return ((__current & __phase_bit) != __phase);
|
|
|
|
};
|
|
|
|
__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void arrive_and_drop()
|
|
|
|
{
|
|
|
|
__phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
|
|
|
|
(void)arrive(1);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif //_LIBCPP_HAS_NO_TREE_BARRIER
|
|
|
|
|
|
|
|
template<class _CompletionF = __empty_completion>
|
|
|
|
class barrier {
|
|
|
|
|
|
|
|
__barrier_base<_CompletionF> __b;
|
|
|
|
public:
|
|
|
|
using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
|
|
|
|
|
|
|
|
static constexpr ptrdiff_t max() noexcept {
|
|
|
|
return __barrier_base<_CompletionF>::max();
|
|
|
|
}
|
|
|
|
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
|
[libc++] Consistently replace `std::` qualification with `_VSTD::` or nothing. NFCI.
I used a lot of `git grep` to find places where `std::` was being used
outside of comments and assert-messages. There were three outcomes:
- Qualified function calls, e.g. `std::move` becomes `_VSTD::move`.
This is the most common case.
- Typenames that don't need qualification, e.g. `std::allocator` becomes `allocator`.
Leaving these as `_VSTD::allocator` would also be fine, but I decided
that removing the qualification is more consistent with existing practice.
- Names that specifically need un-versioned `std::` qualification,
or that I wasn't sure about. For example, I didn't touch any code in
<atomic>, <math.h>, <new>, or any ext/ or experimental/ headers;
and I didn't touch any instances of `std::type_info`.
In some deduction guides, we were accidentally using `class Alloc = typename std::allocator<T>`,
despite `std::allocator<T>`'s type-ness not being template-dependent.
Because `std::allocator` is a qualified name, this did parse as we intended;
but what we meant was simply `class Alloc = allocator<T>`.
Differential Revision: https://reviews.llvm.org/D92250
2020-11-28 00:02:06 +08:00
|
|
|
: __b(__count, _VSTD::move(__completion)) {
|
2020-02-18 22:58:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
barrier(barrier const&) = delete;
|
|
|
|
barrier& operator=(barrier const&) = delete;
|
|
|
|
|
2020-02-24 23:09:29 +08:00
|
|
|
[[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
arrival_token arrive(ptrdiff_t update = 1)
|
|
|
|
{
|
|
|
|
return __b.arrive(update);
|
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void wait(arrival_token&& __phase) const
|
|
|
|
{
|
[libc++] Consistently replace `std::` qualification with `_VSTD::` or nothing. NFCI.
I used a lot of `git grep` to find places where `std::` was being used
outside of comments and assert-messages. There were three outcomes:
- Qualified function calls, e.g. `std::move` becomes `_VSTD::move`.
This is the most common case.
- Typenames that don't need qualification, e.g. `std::allocator` becomes `allocator`.
Leaving these as `_VSTD::allocator` would also be fine, but I decided
that removing the qualification is more consistent with existing practice.
- Names that specifically need un-versioned `std::` qualification,
or that I wasn't sure about. For example, I didn't touch any code in
<atomic>, <math.h>, <new>, or any ext/ or experimental/ headers;
and I didn't touch any instances of `std::type_info`.
In some deduction guides, we were accidentally using `class Alloc = typename std::allocator<T>`,
despite `std::allocator<T>`'s type-ness not being template-dependent.
Because `std::allocator` is a qualified name, this did parse as we intended;
but what we meant was simply `class Alloc = allocator<T>`.
Differential Revision: https://reviews.llvm.org/D92250
2020-11-28 00:02:06 +08:00
|
|
|
__b.wait(_VSTD::move(__phase));
|
2020-02-18 22:58:34 +08:00
|
|
|
}
|
2021-04-18 05:03:20 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void arrive_and_wait()
|
|
|
|
{
|
|
|
|
wait(arrive());
|
2021-04-18 05:03:20 +08:00
|
|
|
}
|
2020-02-24 23:09:29 +08:00
|
|
|
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
|
2020-02-18 22:58:34 +08:00
|
|
|
void arrive_and_drop()
|
|
|
|
{
|
|
|
|
__b.arrive_and_drop();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
|
2020-02-25 07:12:44 +08:00
|
|
|
#endif // _LIBCPP_STD_VER >= 14
|
|
|
|
|
2020-12-03 07:55:01 +08:00
|
|
|
_LIBCPP_POP_MACROS
|
|
|
|
|
2020-02-18 22:58:34 +08:00
|
|
|
#endif //_LIBCPP_BARRIER
|