diff options
-rw-r--r-- | docs/devel/atomics.rst | 26 | ||||
-rw-r--r-- | include/qemu/atomic.h | 17 |
2 files changed, 37 insertions, 6 deletions
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst index 7957310..633df65 100644 --- a/docs/devel/atomics.rst +++ b/docs/devel/atomics.rst @@ -27,7 +27,8 @@ provides macros that fall in three camps: - weak atomic access and manual memory barriers: ``qatomic_read()``, ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, - ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; + ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, + ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; - sequentially consistent atomic access: everything else. @@ -472,7 +473,7 @@ and memory barriers, and the equivalents in QEMU: sequential consistency. - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in - the total ordering enforced by sequentially-consistent operations. + the ordering enforced by read-modify-write operations. This is because QEMU uses the C11 memory model. The following example is correct in Linux but not in QEMU: @@ -488,9 +489,24 @@ and memory barriers, and the equivalents in QEMU: because the read of ``y`` can be moved (by either the processor or the compiler) before the write of ``x``. - Fixing this requires an ``smp_mb()`` memory barrier between the write - of ``x`` and the read of ``y``. In the common case where only one thread - writes ``x``, it is also possible to write it like this: + Fixing this requires a full memory barrier between the write of ``x`` and + the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and + ``smp_mb__after_rmw()``; they act both as an optimization, + avoiding the memory barrier on processors where it is unnecessary, + and as a clarification of this corner case of the C11 memory model: + + +--------------------------------+ + | QEMU (correct) | + +================================+ + | :: | + | | + | a = qatomic_fetch_add(&x, 2);| + | smp_mb__after_rmw(); | + | b = qatomic_read(&y); | + +--------------------------------+ + + In the common case where only one thread writes ``x``, it is also possible + to write it like this: +--------------------------------+ | QEMU (correct) | diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 874134f..f85834e 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -245,6 +245,20 @@ #define smp_wmb() smp_mb_release() #define smp_rmb() smp_mb_acquire() +/* + * SEQ_CST is weaker than the older __sync_* builtins and Linux + * kernel read-modify-write atomics. Provide a macro to obtain + * the same semantics. + */ +#if !defined(QEMU_SANITIZE_THREAD) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) +# define smp_mb__before_rmw() signal_barrier() +# define smp_mb__after_rmw() signal_barrier() +#else +# define smp_mb__before_rmw() smp_mb() +# define smp_mb__after_rmw() smp_mb() +#endif + /* qatomic_mb_read/set semantics map Java volatile variables. They are * less expensive on some platforms (notably POWER) than fully * sequentially consistent operations. @@ -259,7 +273,8 @@ #if !defined(QEMU_SANITIZE_THREAD) && \ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) /* This is more efficient than a store plus a fence. */ -# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) +# define qatomic_mb_set(ptr, i) \ + ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) #else # define qatomic_mb_set(ptr, i) \ ({ qatomic_store_release(ptr, i); smp_mb(); }) |