aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/devel/atomics.rst26
-rw-r--r--include/qemu/atomic.h17
2 files changed, 37 insertions, 6 deletions
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst
index 7957310..633df65 100644
--- a/docs/devel/atomics.rst
+++ b/docs/devel/atomics.rst
@@ -27,7 +27,8 @@ provides macros that fall in three camps:
- weak atomic access and manual memory barriers: ``qatomic_read()``,
``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``;
+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``,
+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``;
- sequentially consistent atomic access: everything else.
@@ -472,7 +473,7 @@ and memory barriers, and the equivalents in QEMU:
sequential consistency.
- in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in
- the total ordering enforced by sequentially-consistent operations.
+ the ordering enforced by read-modify-write operations.
This is because QEMU uses the C11 memory model. The following example
is correct in Linux but not in QEMU:
@@ -488,9 +489,24 @@ and memory barriers, and the equivalents in QEMU:
because the read of ``y`` can be moved (by either the processor or the
compiler) before the write of ``x``.
- Fixing this requires an ``smp_mb()`` memory barrier between the write
- of ``x`` and the read of ``y``. In the common case where only one thread
- writes ``x``, it is also possible to write it like this:
+ Fixing this requires a full memory barrier between the write of ``x`` and
+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and
+ ``smp_mb__after_rmw()``; they act both as an optimization,
+ avoiding the memory barrier on processors where it is unnecessary,
+ and as a clarification of this corner case of the C11 memory model:
+
+ +--------------------------------+
+ | QEMU (correct) |
+ +================================+
+ | :: |
+ | |
+ | a = qatomic_fetch_add(&x, 2);|
+ | smp_mb__after_rmw(); |
+ | b = qatomic_read(&y); |
+ +--------------------------------+
+
+ In the common case where only one thread writes ``x``, it is also possible
+ to write it like this:
+--------------------------------+
| QEMU (correct) |
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 874134f..f85834e 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -245,6 +245,20 @@
#define smp_wmb() smp_mb_release()
#define smp_rmb() smp_mb_acquire()
+/*
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
+ * kernel read-modify-write atomics. Provide a macro to obtain
+ * the same semantics.
+ */
+#if !defined(QEMU_SANITIZE_THREAD) && \
+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+# define smp_mb__before_rmw() signal_barrier()
+# define smp_mb__after_rmw() signal_barrier()
+#else
+# define smp_mb__before_rmw() smp_mb()
+# define smp_mb__after_rmw() smp_mb()
+#endif
+
/* qatomic_mb_read/set semantics map Java volatile variables. They are
* less expensive on some platforms (notably POWER) than fully
* sequentially consistent operations.
@@ -259,7 +273,8 @@
#if !defined(QEMU_SANITIZE_THREAD) && \
(defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
/* This is more efficient than a store plus a fence. */
-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i))
+# define qatomic_mb_set(ptr, i) \
+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
#else
# define qatomic_mb_set(ptr, i) \
({ qatomic_store_release(ptr, i); smp_mb(); })