aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/include/std/barrier
diff options
context:
space:
mode:
Diffstat (limited to 'libstdc++-v3/include/std/barrier')
-rw-r--r--libstdc++-v3/include/std/barrier199
1 files changed, 117 insertions, 82 deletions
diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier
index 6c3cfd4..56270c9 100644
--- a/libstdc++-v3/include/std/barrier
+++ b/libstdc++-v3/include/std/barrier
@@ -81,105 +81,142 @@ It looks different from literature pseudocode for two main reasons:
enum class __barrier_phase_t : unsigned char { };
- template<typename _CompletionF>
- class __tree_barrier
+ struct __tree_barrier_base
+ {
+ static constexpr ptrdiff_t
+ max() noexcept
+ { return __PTRDIFF_MAX__ - 1; }
+
+ protected:
+ using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
+ using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
+ static constexpr auto __phase_alignment =
+ __atomic_phase_ref_t::required_alignment;
+
+ using __tickets_t = std::array<__barrier_phase_t, 64>;
+ struct alignas(64) /* naturally-align the heap state */ __state_t
{
- using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
- using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
- static constexpr auto __phase_alignment =
- __atomic_phase_ref_t::required_alignment;
+ alignas(__phase_alignment) __tickets_t __tickets;
+ };
- using __tickets_t = std::array<__barrier_phase_t, 64>;
- struct alignas(64) /* naturally-align the heap state */ __state_t
- {
- alignas(__phase_alignment) __tickets_t __tickets;
- };
+ ptrdiff_t _M_expected;
+ __atomic_base<__state_t*> _M_state{nullptr};
+ __atomic_base<ptrdiff_t> _M_expected_adjustment{0};
+ alignas(__phase_alignment) __barrier_phase_t _M_phase{};
- ptrdiff_t _M_expected;
- unique_ptr<__state_t[]> _M_state;
- __atomic_base<ptrdiff_t> _M_expected_adjustment;
- _CompletionF _M_completion;
+ explicit constexpr
+ __tree_barrier_base(ptrdiff_t __expected)
+ : _M_expected(__expected)
+ {
+ __glibcxx_assert(__expected >= 0 && __expected <= max());
- alignas(__phase_alignment) __barrier_phase_t _M_phase;
+ if (!std::is_constant_evaluated())
+ _M_state.store(_M_alloc_state().release(), memory_order_release);
+ }
- bool
- _M_arrive(__barrier_phase_t __old_phase, size_t __current)
- {
- const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
- const auto __half_step =
- static_cast<__barrier_phase_t>(__old_phase_val + 1);
- const auto __full_step =
- static_cast<__barrier_phase_t>(__old_phase_val + 2);
+ unique_ptr<__state_t[]>
+ _M_alloc_state()
+ {
+ size_t const __count = (_M_expected + 1) >> 1;
+ return std::make_unique<__state_t[]>(__count);
+ }
- size_t __current_expected = _M_expected;
- __current %= ((_M_expected + 1) >> 1);
+ bool
+ _M_arrive(__barrier_phase_t __old_phase, size_t __current)
+ {
+ const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
+ const auto __half_step =
+ static_cast<__barrier_phase_t>(__old_phase_val + 1);
+ const auto __full_step =
+ static_cast<__barrier_phase_t>(__old_phase_val + 2);
+
+ size_t __current_expected = _M_expected;
+ __current %= ((_M_expected + 1) >> 1);
+
+ __state_t* const __state = _M_state.load(memory_order_relaxed);
+
+ for (int __round = 0; ; ++__round)
+ {
+ if (__current_expected <= 1)
+ return true;
+ size_t const __end_node = ((__current_expected + 1) >> 1),
+ __last_node = __end_node - 1;
+ for ( ; ; ++__current)
+ {
+ if (__current == __end_node)
+ __current = 0;
+ auto __expect = __old_phase;
+ __atomic_phase_ref_t __phase(__state[__current]
+ .__tickets[__round]);
+ if (__current == __last_node && (__current_expected & 1))
+ {
+ if (__phase.compare_exchange_strong(__expect, __full_step,
+ memory_order_acq_rel))
+ break; // I'm 1 in 1, go to next __round
+ }
+ else if (__phase.compare_exchange_strong(__expect, __half_step,
+ memory_order_acq_rel))
+ {
+ return false; // I'm 1 in 2, done with arrival
+ }
+ else if (__expect == __half_step)
+ {
+ if (__phase.compare_exchange_strong(__expect, __full_step,
+ memory_order_acq_rel))
+ break; // I'm 2 in 2, go to next __round
+ }
+ }
+ __current_expected = __last_node + 1;
+ __current >>= 1;
+ }
+ }
+ };
- for (int __round = 0; ; ++__round)
- {
- if (__current_expected <= 1)
- return true;
- size_t const __end_node = ((__current_expected + 1) >> 1),
- __last_node = __end_node - 1;
- for ( ; ; ++__current)
- {
- if (__current == __end_node)
- __current = 0;
- auto __expect = __old_phase;
- __atomic_phase_ref_t __phase(_M_state[__current]
- .__tickets[__round]);
- if (__current == __last_node && (__current_expected & 1))
- {
- if (__phase.compare_exchange_strong(__expect, __full_step,
- memory_order_acq_rel))
- break; // I'm 1 in 1, go to next __round
- }
- else if (__phase.compare_exchange_strong(__expect, __half_step,
- memory_order_acq_rel))
- {
- return false; // I'm 1 in 2, done with arrival
- }
- else if (__expect == __half_step)
- {
- if (__phase.compare_exchange_strong(__expect, __full_step,
- memory_order_acq_rel))
- break; // I'm 2 in 2, go to next __round
- }
- }
- __current_expected = __last_node + 1;
- __current >>= 1;
- }
- }
+ template<typename _CompletionF>
+ class __tree_barrier : public __tree_barrier_base
+ {
+ [[no_unique_address]] _CompletionF _M_completion;
+
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 3898. Possibly unintended preconditions for completion functions
+ void _M_invoke_completion() noexcept { _M_completion(); }
public:
using arrival_token = __barrier_phase_t;
- static constexpr ptrdiff_t
- max() noexcept
- { return __PTRDIFF_MAX__; }
-
+ constexpr
__tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
- : _M_expected(__expected), _M_expected_adjustment(0),
- _M_completion(move(__completion)),
- _M_phase(static_cast<__barrier_phase_t>(0))
- {
- size_t const __count = (_M_expected + 1) >> 1;
-
- _M_state = std::make_unique<__state_t[]>(__count);
- }
+ : __tree_barrier_base(__expected), _M_completion(std::move(__completion))
+ { }
[[nodiscard]] arrival_token
arrive(ptrdiff_t __update)
{
+ __glibcxx_assert(__update > 0);
+ // FIXME: Check that update is less than or equal to the expected count
+ // for the current barrier phase.
+
std::hash<std::thread::id> __hasher;
size_t __current = __hasher(std::this_thread::get_id());
__atomic_phase_ref_t __phase(_M_phase);
const auto __old_phase = __phase.load(memory_order_relaxed);
const auto __cur = static_cast<unsigned char>(__old_phase);
- for(; __update; --__update)
+
+ if (__cur == 0 && !_M_state.load(memory_order_relaxed)) [[unlikely]]
+ {
+ auto __p = _M_alloc_state();
+ __state_t* __val = nullptr;
+ if (_M_state.compare_exchange_strong(__val, __p.get(),
+ memory_order_seq_cst,
+ memory_order_acquire))
+ __p.release();
+ }
+
+ for (; __update; --__update)
{
- if(_M_arrive(__old_phase, __current))
+ if (_M_arrive(__old_phase, __current))
{
- _M_completion();
+ _M_invoke_completion();
_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
_M_expected_adjustment.store(0, memory_order_relaxed);
auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
@@ -194,11 +231,7 @@ It looks different from literature pseudocode for two main reasons:
wait(arrival_token&& __old_phase) const
{
__atomic_phase_const_ref_t __phase(_M_phase);
- auto const __test_fn = [=]
- {
- return __phase.load(memory_order_acquire) != __old_phase;
- };
- std::__atomic_wait_address(&_M_phase, __test_fn);
+ __phase.wait(__old_phase, memory_order_acquire);
}
void
@@ -212,6 +245,8 @@ It looks different from literature pseudocode for two main reasons:
template<typename _CompletionF = __empty_completion>
class barrier
{
+ static_assert(is_invocable_v<_CompletionF&>);
+
// Note, we may introduce a "central" barrier algorithm at some point
// for more space constrained targets
using __algorithm_t = __tree_barrier<_CompletionF>;
@@ -236,7 +271,7 @@ It looks different from literature pseudocode for two main reasons:
max() noexcept
{ return __algorithm_t::max(); }
- explicit
+ constexpr explicit
barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
: _M_b(__count, std::move(__completion))
{ }