diff options
Diffstat (limited to 'libstdc++-v3/include/std/barrier')
-rw-r--r-- | libstdc++-v3/include/std/barrier | 199 |
1 files changed, 117 insertions, 82 deletions
diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier index 6c3cfd4..56270c9 100644 --- a/libstdc++-v3/include/std/barrier +++ b/libstdc++-v3/include/std/barrier @@ -81,105 +81,142 @@ It looks different from literature pseudocode for two main reasons: enum class __barrier_phase_t : unsigned char { }; - template<typename _CompletionF> - class __tree_barrier + struct __tree_barrier_base + { + static constexpr ptrdiff_t + max() noexcept + { return __PTRDIFF_MAX__ - 1; } + + protected: + using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; + using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; + static constexpr auto __phase_alignment = + __atomic_phase_ref_t::required_alignment; + + using __tickets_t = std::array<__barrier_phase_t, 64>; + struct alignas(64) /* naturally-align the heap state */ __state_t { - using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; - using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; - static constexpr auto __phase_alignment = - __atomic_phase_ref_t::required_alignment; + alignas(__phase_alignment) __tickets_t __tickets; + }; - using __tickets_t = std::array<__barrier_phase_t, 64>; - struct alignas(64) /* naturally-align the heap state */ __state_t - { - alignas(__phase_alignment) __tickets_t __tickets; - }; + ptrdiff_t _M_expected; + __atomic_base<__state_t*> _M_state{nullptr}; + __atomic_base<ptrdiff_t> _M_expected_adjustment{0}; + alignas(__phase_alignment) __barrier_phase_t _M_phase{}; - ptrdiff_t _M_expected; - unique_ptr<__state_t[]> _M_state; - __atomic_base<ptrdiff_t> _M_expected_adjustment; - _CompletionF _M_completion; + explicit constexpr + __tree_barrier_base(ptrdiff_t __expected) + : _M_expected(__expected) + { + __glibcxx_assert(__expected >= 0 && __expected <= max()); - alignas(__phase_alignment) __barrier_phase_t _M_phase; + if (!std::is_constant_evaluated()) + _M_state.store(_M_alloc_state().release(), memory_order_release); + } - bool - _M_arrive(__barrier_phase_t __old_phase, size_t __current) - { - const auto __old_phase_val = static_cast<unsigned char>(__old_phase); - const auto __half_step = - static_cast<__barrier_phase_t>(__old_phase_val + 1); - const auto __full_step = - static_cast<__barrier_phase_t>(__old_phase_val + 2); + unique_ptr<__state_t[]> + _M_alloc_state() + { + size_t const __count = (_M_expected + 1) >> 1; + return std::make_unique<__state_t[]>(__count); + } - size_t __current_expected = _M_expected; - __current %= ((_M_expected + 1) >> 1); + bool + _M_arrive(__barrier_phase_t __old_phase, size_t __current) + { + const auto __old_phase_val = static_cast<unsigned char>(__old_phase); + const auto __half_step = + static_cast<__barrier_phase_t>(__old_phase_val + 1); + const auto __full_step = + static_cast<__barrier_phase_t>(__old_phase_val + 2); + + size_t __current_expected = _M_expected; + __current %= ((_M_expected + 1) >> 1); + + __state_t* const __state = _M_state.load(memory_order_relaxed); + + for (int __round = 0; ; ++__round) + { + if (__current_expected <= 1) + return true; + size_t const __end_node = ((__current_expected + 1) >> 1), + __last_node = __end_node - 1; + for ( ; ; ++__current) + { + if (__current == __end_node) + __current = 0; + auto __expect = __old_phase; + __atomic_phase_ref_t __phase(__state[__current] + .__tickets[__round]); + if (__current == __last_node && (__current_expected & 1)) + { + if (__phase.compare_exchange_strong(__expect, __full_step, + memory_order_acq_rel)) + break; // I'm 1 in 1, go to next __round + } + else if (__phase.compare_exchange_strong(__expect, __half_step, + memory_order_acq_rel)) + { + return false; // I'm 1 in 2, done with arrival + } + else if (__expect == __half_step) + { + if (__phase.compare_exchange_strong(__expect, __full_step, + memory_order_acq_rel)) + break; // I'm 2 in 2, go to next __round + } + } + __current_expected = __last_node + 1; + __current >>= 1; + } + } + }; - for (int __round = 0; ; ++__round) - { - if (__current_expected <= 1) - return true; - size_t const __end_node = ((__current_expected + 1) >> 1), - __last_node = __end_node - 1; - for ( ; ; ++__current) - { - if (__current == __end_node) - __current = 0; - auto __expect = __old_phase; - __atomic_phase_ref_t __phase(_M_state[__current] - .__tickets[__round]); - if (__current == __last_node && (__current_expected & 1)) - { - if (__phase.compare_exchange_strong(__expect, __full_step, - memory_order_acq_rel)) - break; // I'm 1 in 1, go to next __round - } - else if (__phase.compare_exchange_strong(__expect, __half_step, - memory_order_acq_rel)) - { - return false; // I'm 1 in 2, done with arrival - } - else if (__expect == __half_step) - { - if (__phase.compare_exchange_strong(__expect, __full_step, - memory_order_acq_rel)) - break; // I'm 2 in 2, go to next __round - } - } - __current_expected = __last_node + 1; - __current >>= 1; - } - } + template<typename _CompletionF> + class __tree_barrier : public __tree_barrier_base + { + [[no_unique_address]] _CompletionF _M_completion; + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3898. Possibly unintended preconditions for completion functions + void _M_invoke_completion() noexcept { _M_completion(); } public: using arrival_token = __barrier_phase_t; - static constexpr ptrdiff_t - max() noexcept - { return __PTRDIFF_MAX__; } - + constexpr __tree_barrier(ptrdiff_t __expected, _CompletionF __completion) - : _M_expected(__expected), _M_expected_adjustment(0), - _M_completion(move(__completion)), - _M_phase(static_cast<__barrier_phase_t>(0)) - { - size_t const __count = (_M_expected + 1) >> 1; - - _M_state = std::make_unique<__state_t[]>(__count); - } + : __tree_barrier_base(__expected), _M_completion(std::move(__completion)) + { } [[nodiscard]] arrival_token arrive(ptrdiff_t __update) { + __glibcxx_assert(__update > 0); + // FIXME: Check that update is less than or equal to the expected count + // for the current barrier phase. + std::hash<std::thread::id> __hasher; size_t __current = __hasher(std::this_thread::get_id()); __atomic_phase_ref_t __phase(_M_phase); const auto __old_phase = __phase.load(memory_order_relaxed); const auto __cur = static_cast<unsigned char>(__old_phase); - for(; __update; --__update) + + if (__cur == 0 && !_M_state.load(memory_order_relaxed)) [[unlikely]] + { + auto __p = _M_alloc_state(); + __state_t* __val = nullptr; + if (_M_state.compare_exchange_strong(__val, __p.get(), + memory_order_seq_cst, + memory_order_acquire)) + __p.release(); + } + + for (; __update; --__update) { - if(_M_arrive(__old_phase, __current)) + if (_M_arrive(__old_phase, __current)) { - _M_completion(); + _M_invoke_completion(); _M_expected += _M_expected_adjustment.load(memory_order_relaxed); _M_expected_adjustment.store(0, memory_order_relaxed); auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2); @@ -194,11 +231,7 @@ It looks different from literature pseudocode for two main reasons: wait(arrival_token&& __old_phase) const { __atomic_phase_const_ref_t __phase(_M_phase); - auto const __test_fn = [=] - { - return __phase.load(memory_order_acquire) != __old_phase; - }; - std::__atomic_wait_address(&_M_phase, __test_fn); + __phase.wait(__old_phase, memory_order_acquire); } void @@ -212,6 +245,8 @@ It looks different from literature pseudocode for two main reasons: template<typename _CompletionF = __empty_completion> class barrier { + static_assert(is_invocable_v<_CompletionF&>); + // Note, we may introduce a "central" barrier algorithm at some point // for more space constrained targets using __algorithm_t = __tree_barrier<_CompletionF>; @@ -236,7 +271,7 @@ It looks different from literature pseudocode for two main reasons: max() noexcept { return __algorithm_t::max(); } - explicit + constexpr explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) : _M_b(__count, std::move(__completion)) { } |