[libc++] Refactor the predicate taking variant of `__cxx_atomic_wait` (#80596)

This is a follow-up PR to <https://github.com/llvm/llvm-project/pull/79265>. It aims to be a gentle refactoring of the `__cxx_atomic_wait` function that takes a predicate. The key idea here is that this function's signature is changed to look like this (`std::function` used just for clarity): ```c++ __cxx_atomic_wait_fn(Atp*, std::function<bool(Tp &)> poll, memory_order __order); ``` ...where `Tp` is the corresponding `value_type` to the atomic variable type `Atp`. The function's semantics are similar to `atomic`s `.wait()`, but instead of having a hardcoded predicate (is the loaded value unequal to `old`?) the predicate is specified explicitly. The `poll` function may change its argument, and it is very important that if it returns `false`, it leaves its current understanding of the atomic's value in the argument. Internally, `__cxx_atomic_wait_fn` dispatches to two waiting mechanisms, depending on the type of the atomic variable: 1. If the atomic variable can be waited on directly (for example, Linux's futex mechanism only supports waiting on 32 bit long variables), the value of the atomic variable (which `poll` made its decision on) is then given to the underlying system wait function (e.g. futex). 2. If the atomic variable can not be waited on directly, there is a global pool of atomics that are used for this task. The ["eventcount" pattern](<https://gist.github.com/mratsim/04a29bdd98d6295acda4d0677c4d0041>) is employed to make this possible. The eventcount pattern needs a "monitor" variable which is read before the condition is checked another time. libcxx has the `__libcpp_atomic_monitor` function for this. However, this function only has to be called in case "2", i.e. when the eventcount is actually used. In case "1", the futex is used directly, so the monitor must be the value of the atomic variable that the `poll` function made its decision on to continue blocking. Previously, `__libcpp_atomic_monitor` was _also_ used in case "1". This was the source of the ABA style bug that PR#79265 fixed. However, the solution in PR#79265 has some disadvantages: - It exposes internals such as `cxx_contention_t` or the fact that `__libcpp_thread_poll_with_backoff` needs two functions to higher level constructs such as `semaphore`. - It doesn't prevent consumers calling `__cxx_atomic_wait` in an error prone way, i.e. by providing to it a predicate that doesn't take an argument. This makes ABA style issues more likely to appear. Now, `__cxx_atomic_wait_fn` takes just _one_ function, which is then transformed into the `poll` and `backoff` callables needed by `__libcpp_thread_poll_with_backoff`. Aside from the `__cxx_atomic_wait` changes, the only other change is the weakening of the initial atomic load of `semaphore`'s `try_acquire` into `memory_order_relaxed` and the CAS inside the loop is changed from `strong` to `weak`. Both weakenings should be fine, since the CAS is called in a loop, and the "acquire" semantics of `try_acquire` come from the CAS, not from the initial load.
author: Jan Kokemüller <jan.kokemueller@gmail.com> 2024-02-19 15:28:51 +0100
committer: GitHub <noreply@github.com> 2024-02-19 14:28:51 +0000
commit: 95ebf2be0e6600465a4d0f4e7d81c7ded0559fba (patch)
tree: 22e2d5df0118ba9dbdd931ebad1a39c1cf80fe2c /libcxx
parent: 3f0404aae7ed2f7138526e1bcd100a60dfe08227 (diff)
download: llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.zip
llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.tar.gz
llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.tar.bz2
4 files changed, 80 insertions, 51 deletions
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index e1994dd..d07b358 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -27,6 +27,19 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Atp, class _Poll>
+struct __libcpp_atomic_wait_poll_impl {
+  _Atp* __a_;
+  _Poll __poll_;
+  memory_order __order_;
+
+  _LIBCPP_AVAILABILITY_SYNC
+  _LIBCPP_HIDE_FROM_ABI bool operator()() const {
+    auto __current_val = std::__cxx_atomic_load(__a_, __order_);
+    return __poll_(__current_val);
+  }
+};
+
 #ifndef _LIBCPP_HAS_NO_THREADS
 
 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*);
@@ -43,15 +56,40 @@ __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*);
 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t);
 
-template <class _Atp, class _BackoffTest>
+template <class _Atp, class _Poll>
 struct __libcpp_atomic_wait_backoff_impl {
   _Atp* __a_;
-  _BackoffTest __backoff_test_;
+  _Poll __poll_;
+  memory_order __order_;
+
+  _LIBCPP_AVAILABILITY_SYNC
+  _LIBCPP_HIDE_FROM_ABI bool
+  __poll_or_get_monitor(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor) const {
+    // In case the atomic can be waited on directly, the monitor value is just
+    // the value of the atomic.
+    // `__poll_` takes the current value of the atomic as an in-out argument
+    // to potentially modify it. After it returns, `__monitor` has a value
+    // which can be safely waited on by `std::__libcpp_atomic_wait` without any
+    // ABA style issues.
+    __monitor = std::__cxx_atomic_load(__a_, __order_);
+    return __poll_(__monitor);
+  }
+
+  _LIBCPP_AVAILABILITY_SYNC
+  _LIBCPP_HIDE_FROM_ABI bool __poll_or_get_monitor(void const volatile*, __cxx_contention_t& __monitor) const {
+    // In case we must wait on an atomic from the pool, the monitor comes from
+    // `std::__libcpp_atomic_monitor`.
+    // Only then we may read from `__a_`. This is the "event count" pattern.
+    __monitor          = std::__libcpp_atomic_monitor(__a_);
+    auto __current_val = std::__cxx_atomic_load(__a_, __order_);
+    return __poll_(__current_val);
+  }
+
   _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(64)) {
-      auto __monitor = std::__libcpp_atomic_monitor(__a_);
-      if (__backoff_test_(__monitor))
+      __cxx_contention_t __monitor;
+      if (__poll_or_get_monitor(__a_, __monitor))
         return true;
       std::__libcpp_atomic_wait(__a_, __monitor);
     } else if (__elapsed > chrono::microseconds(4))
@@ -62,26 +100,20 @@ struct __libcpp_atomic_wait_backoff_impl {
   }
 };
 
-template <class _Atp, class _Poll, class _BackoffTest>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
-__cxx_atomic_wait(_Atp* __a, _Poll&& __poll, _BackoffTest&& __backoff_test) {
-  __libcpp_atomic_wait_backoff_impl<_Atp, __decay_t<_BackoffTest> > __backoff_fn = {__a, __backoff_test};
-  return std::__libcpp_thread_poll_with_backoff(__poll, __backoff_fn);
-}
-
-template <class _Poll>
-struct __libcpp_atomic_wait_poll_as_backoff_test {
-  _Poll __poll_;
-
-  _LIBCPP_AVAILABILITY_SYNC
-  _LIBCPP_HIDE_FROM_ABI bool operator()(__cxx_contention_t&) const { return __poll_(); }
-};
-
+// The semantics of this function are similar to `atomic`'s
+// `.wait(T old, std::memory_order order)`, but instead of having a hardcoded
+// predicate (is the loaded value unequal to `old`?), the predicate function is
+// specified as an argument. The loaded value is given as an in-out argument to
+// the predicate. If the predicate function returns `true`,
+// `_cxx_atomic_wait_unless` will return. If the predicate function returns
+// `false`, it must set the argument to its current understanding of the atomic
+// value. The predicate function must not return `false` spuriously.
 template <class _Atp, class _Poll>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_wait(_Atp* __a, _Poll&& __poll) {
-  __libcpp_atomic_wait_backoff_impl<_Atp, __libcpp_atomic_wait_poll_as_backoff_test<_Poll&> > __backoff_fn = {
-      __a, {__poll}};
-  return std::__libcpp_thread_poll_with_backoff(__poll, __backoff_fn);
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+__cxx_atomic_wait_unless(_Atp* __a, _Poll&& __poll, memory_order __order) {
+  __libcpp_atomic_wait_poll_impl<_Atp, __decay_t<_Poll> > __poll_fn       = {__a, __poll, __order};
+  __libcpp_atomic_wait_backoff_impl<_Atp, __decay_t<_Poll> > __backoff_fn = {__a, __poll, __order};
+  (void)std::__libcpp_thread_poll_with_backoff(__poll_fn, __backoff_fn);
 }
 
 #else // _LIBCPP_HAS_NO_THREADS
@@ -90,9 +122,10 @@ template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp> const volatile*) {}
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp> const volatile*) {}
-template <class _Atp, class _Fn>
-_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_wait(_Atp*, _Fn&& __test_fn) {
-  return std::__libcpp_thread_poll_with_backoff(__test_fn, __spinning_backoff_policy());
+template <class _Atp, class _Poll>
+_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_wait_unless(_Atp* __a, _Poll&& __poll, memory_order __order) {
+  __libcpp_atomic_wait_poll_impl<_Atp, __decay_t<_Poll> > __poll_fn = {__a, __poll, __order};
+  (void)std::__libcpp_thread_poll_with_backoff(__poll_fn, __spinning_backoff_policy());
 }
 
 #endif // _LIBCPP_HAS_NO_THREADS
@@ -102,21 +135,19 @@ _LIBCPP_HIDE_FROM_ABI bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp c
   return std::memcmp(std::addressof(__lhs), std::addressof(__rhs), sizeof(_Tp)) == 0;
 }
 
-template <class _Atp, class _Tp>
-struct __cxx_atomic_wait_test_fn_impl {
-  _Atp* __a;
+template <class _Tp>
+struct __atomic_compare_unequal_to {
   _Tp __val;
-  memory_order __order;
-  _LIBCPP_HIDE_FROM_ABI bool operator()() const {
-    return !std::__cxx_nonatomic_compare_equal(std::__cxx_atomic_load(__a, __order), __val);
+  _LIBCPP_HIDE_FROM_ABI bool operator()(_Tp& __current_val) const {
+    return !std::__cxx_nonatomic_compare_equal(__current_val, __val);
   }
 };
 
 template <class _Atp, class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
 __cxx_atomic_wait(_Atp* __a, _Tp const __val, memory_order __order) {
-  __cxx_atomic_wait_test_fn_impl<_Atp, _Tp> __test_fn = {__a, __val, __order};
-  return std::__cxx_atomic_wait(__a, __test_fn);
+  __atomic_compare_unequal_to<_Tp> __poll_fn = {__val};
+  std::__cxx_atomic_wait_unless(__a, __poll_fn, __order);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/latch b/libcxx/include/latch
index ad7b355..dd389d2 100644
--- a/libcxx/include/latch
+++ b/libcxx/include/latch
@@ -97,9 +97,13 @@ public:
     if (__old == __update)
       __a_.notify_all();
   }
-  inline _LIBCPP_HIDE_FROM_ABI bool try_wait() const noexcept { return 0 == __a_.load(memory_order_acquire); }
+  inline _LIBCPP_HIDE_FROM_ABI bool try_wait() const noexcept {
+    auto __value = __a_.load(memory_order_acquire);
+    return try_wait_impl(__value);
+  }
   inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait() const {
-    __cxx_atomic_wait(&__a_.__a_, [this]() -> bool { return try_wait(); });
+    __cxx_atomic_wait_unless(
+        &__a_.__a_, [this](ptrdiff_t& __value) -> bool { return try_wait_impl(__value); }, memory_order_acquire);
   }
   inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait(ptrdiff_t __update = 1) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update >= 0, "latch::arrive_and_wait called with a negative value");
@@ -108,6 +112,9 @@ public:
     count_down(__update);
     wait();
   }
+
+private:
+  inline _LIBCPP_HIDE_FROM_ABI bool try_wait_impl(ptrdiff_t& __value) const noexcept { return __value == 0; }
 };
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore
index 5235d72..448b5fb 100644
--- a/libcxx/include/semaphore
+++ b/libcxx/include/semaphore
@@ -54,7 +54,6 @@ using binary_semaphore = counting_semaphore<1>;
 #include <__assert> // all public C++ headers provide the assertion handler
 #include <__atomic/atomic_base.h>
 #include <__atomic/atomic_sync.h>
-#include <__atomic/contention_t.h>
 #include <__atomic/memory_order.h>
 #include <__availability>
 #include <__chrono/time_point.h>
@@ -100,17 +99,8 @@ public:
     }
   }
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void acquire() {
-    auto const __poll_fn = [this]() -> bool {
-      auto __old = __a_.load(memory_order_relaxed);
-      return (__old != 0) && __a_.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed);
-    };
-    auto const __backoff_test = [this](__cxx_contention_t& __monitor) -> bool {
-      ptrdiff_t __old = __monitor;
-      bool __r        = __try_acquire_impl(__old);
-      __monitor       = static_cast<__cxx_contention_t>(__old);
-      return __r;
-    };
-    __cxx_atomic_wait(&__a_.__a_, __poll_fn, __backoff_test);
+    __cxx_atomic_wait_unless(
+        &__a_.__a_, [this](ptrdiff_t& __old) { return __try_acquire_impl(__old); }, memory_order_relaxed);
   }
   template <class _Rep, class _Period>
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
@@ -121,7 +111,7 @@ public:
     return std::__libcpp_thread_poll_with_backoff(__poll_fn, __libcpp_timed_backoff_policy(), __rel_time);
   }
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool try_acquire() {
-    auto __old = __a_.load(memory_order_acquire);
+    auto __old = __a_.load(memory_order_relaxed);
     return __try_acquire_impl(__old);
   }
 
@@ -130,7 +120,7 @@ private:
     while (true) {
       if (__old == 0)
         return false;
-      if (__a_.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed))
+      if (__a_.compare_exchange_weak(__old, __old - 1, memory_order_acquire, memory_order_relaxed))
         return true;
     }
   }
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 2f0389a..2b67685 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -178,6 +178,7 @@ _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) {
   __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
 }
+// This function is never used, but still exported for ABI compatibility.
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) {
   return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__contention_state, __location);
author	Jan Kokemüller <jan.kokemueller@gmail.com>	2024-02-19 15:28:51 +0100
committer	GitHub <noreply@github.com>	2024-02-19 14:28:51 +0000
commit	95ebf2be0e6600465a4d0f4e7d81c7ded0559fba (patch)
tree	22e2d5df0118ba9dbdd931ebad1a39c1cf80fe2c /libcxx
parent	3f0404aae7ed2f7138526e1bcd100a60dfe08227 (diff)
download	llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.zip llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.tar.gz llvm-95ebf2be0e6600465a4d0f4e7d81c7ded0559fba.tar.bz2