aboutsummaryrefslogtreecommitdiff
path: root/nptl/pthread_rwlock_tryrdlock.c
diff options
context:
space:
mode:
authorTorvald Riegel <triegel@redhat.com>2014-05-22 16:00:12 +0200
committerTorvald Riegel <triegel@redhat.com>2017-01-10 11:50:17 +0100
commitcc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c (patch)
tree52c5358896bc1ce9ad2081f179e72a1edac5fa6f /nptl/pthread_rwlock_tryrdlock.c
parentfbb31e20bc41957c5f3d6550f0178590cf473043 (diff)
downloadglibc-cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c.zip
glibc-cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c.tar.gz
glibc-cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c.tar.bz2
New pthread rwlock that is more scalable.
This replaces the pthread rwlock with a new implementation that uses a more scalable algorithm (primarily through not using a critical section anymore to make state changes). The fast path for rdlock acquisition and release is now basically a single atomic read-modify write or CAS and a few branches. See nptl/pthread_rwlock_common.c for details. * nptl/DESIGN-rwlock.txt: Remove. * nptl/lowlevelrwlock.sym: Remove. * nptl/Makefile: Add new tests. * nptl/pthread_rwlock_common.c: New file. Contains the new rwlock. * nptl/pthreadP.h (PTHREAD_RWLOCK_PREFER_READER_P): Remove. (PTHREAD_RWLOCK_WRPHASE, PTHREAD_RWLOCK_WRLOCKED, PTHREAD_RWLOCK_RWAITING, PTHREAD_RWLOCK_READER_SHIFT, PTHREAD_RWLOCK_READER_OVERFLOW, PTHREAD_RWLOCK_WRHANDOVER, PTHREAD_RWLOCK_FUTEX_USED): New. * nptl/pthread_rwlock_init.c (__pthread_rwlock_init): Adapt to new implementation. * nptl/pthread_rwlock_rdlock.c (__pthread_rwlock_rdlock_slow): Remove. (__pthread_rwlock_rdlock): Adapt. * nptl/pthread_rwlock_timedrdlock.c (pthread_rwlock_timedrdlock): Adapt. * nptl/pthread_rwlock_timedwrlock.c (pthread_rwlock_timedwrlock): Adapt. * nptl/pthread_rwlock_trywrlock.c (pthread_rwlock_trywrlock): Adapt. * nptl/pthread_rwlock_tryrdlock.c (pthread_rwlock_tryrdlock): Adapt. * nptl/pthread_rwlock_unlock.c (pthread_rwlock_unlock): Adapt. * nptl/pthread_rwlock_wrlock.c (__pthread_rwlock_wrlock_slow): Remove. (__pthread_rwlock_wrlock): Adapt. * nptl/tst-rwlock10.c: Adapt. * nptl/tst-rwlock11.c: Adapt. * nptl/tst-rwlock17.c: New file. * nptl/tst-rwlock18.c: New file. * nptl/tst-rwlock19.c: New file. * nptl/tst-rwlock2b.c: New file. * nptl/tst-rwlock8.c: Adapt. * nptl/tst-rwlock9.c: Adapt. * sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/hppa/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/sparc/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * sysdeps/x86/bits/pthreadtypes.h (pthread_rwlock_t): Adapt. * nptl/nptl-printers.py (): Adapt. * nptl/nptl_lock_constants.pysym: Adapt. * nptl/test-rwlock-printers.py: Adapt. * nptl/test-rwlockattr-printers.c: Adapt. * nptl/test-rwlockattr-printers.py: Adapt.
Diffstat (limited to 'nptl/pthread_rwlock_tryrdlock.c')
-rw-r--r--nptl/pthread_rwlock_tryrdlock.c105
1 files changed, 72 insertions, 33 deletions
diff --git a/nptl/pthread_rwlock_tryrdlock.c b/nptl/pthread_rwlock_tryrdlock.c
index cdd73d2..6c3014c 100644
--- a/nptl/pthread_rwlock_tryrdlock.c
+++ b/nptl/pthread_rwlock_tryrdlock.c
@@ -18,56 +18,95 @@
#include <errno.h>
#include "pthreadP.h"
-#include <lowlevellock.h>
-#include <futex-internal.h>
-#include <elide.h>
+#include <atomic.h>
#include <stdbool.h>
+#include "pthread_rwlock_common.c"
+/* See pthread_rwlock_common.c for an overview. */
int
__pthread_rwlock_tryrdlock (pthread_rwlock_t *rwlock)
{
- int result = EBUSY;
- bool wake = false;
- int futex_shared =
- rwlock->__data.__shared == LLL_PRIVATE ? FUTEX_PRIVATE : FUTEX_SHARED;
-
- if (ELIDE_TRYLOCK (rwlock->__data.__rwelision,
- rwlock->__data.__lock == 0
- && rwlock->__data.__nr_readers == 0
- && rwlock->__data.__writer, 0))
- return 0;
-
- lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
-
- if (rwlock->__data.__writer == 0
- && (rwlock->__data.__nr_writers_queued == 0
- || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
+ /* For tryrdlock, we could speculate that we will succeed and go ahead and
+ register as a reader. However, if we misspeculate, we have to do the
+ same steps as a timed-out rdlock, which will increase contention.
+ Therefore, there is a trade-off between being able to use a combinable
+ read-modify-write operation and a CAS loop as used below; we pick the
+ latter because it simplifies the code, and should perform better when
+ tryrdlock is used in cases where writers are infrequent.
+ Because POSIX does not require a failed trylock to "synchronize memory",
+ relaxed MO is sufficient here and on the failure path of the CAS
+ below. */
+ unsigned int r = atomic_load_relaxed (&rwlock->__data.__readers);
+ unsigned int rnew;
+ do
{
- if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
+ if ((r & PTHREAD_RWLOCK_WRPHASE) == 0)
{
- --rwlock->__data.__nr_readers;
- result = EAGAIN;
+ /* If we are in a read phase, try to acquire unless there is a
+ primary writer and we prefer writers and there will be no
+ recursive read locks. */
+ if (((r & PTHREAD_RWLOCK_WRLOCKED) != 0)
+ && (rwlock->__data.__flags
+ == PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP))
+ return EBUSY;
+ rnew = r + (1 << PTHREAD_RWLOCK_READER_SHIFT);
}
else
{
- result = 0;
- /* See pthread_rwlock_rdlock. */
- if (rwlock->__data.__nr_readers == 1
- && rwlock->__data.__nr_readers_queued > 0
- && rwlock->__data.__nr_writers_queued > 0)
+ /* If there is a writer that has acquired the lock and we are in
+ a write phase, fail. */
+ if ((r & PTHREAD_RWLOCK_WRLOCKED) != 0)
+ return EBUSY;
+ else
{
- ++rwlock->__data.__readers_wakeup;
- wake = true;
+ /* If we do not care about potentially waiting writers, just
+ try to acquire. */
+ rnew = (r + (1 << PTHREAD_RWLOCK_READER_SHIFT))
+ ^ PTHREAD_RWLOCK_WRPHASE;
}
}
+ /* If we could have caused an overflow or take effect during an
+ overflow, we just can / need to return EAGAIN. There is no need to
+ have actually modified the number of readers because we could have
+ done that and cleaned up immediately. */
+ if (rnew >= PTHREAD_RWLOCK_READER_OVERFLOW)
+ return EAGAIN;
+ }
+ /* If the CAS fails, we retry; this prevents that tryrdlock fails spuriously
+ (i.e., fails to acquire the lock although there is no writer), which is
+ fine for C++14 but not currently allowed by POSIX.
+ However, because tryrdlock must not appear to block, we should avoid
+ starving this CAS loop due to constant changes to __readers:
+ While normal rdlock readers that won't be able to acquire will just block
+ (and we expect timeouts on timedrdlock to be longer than one retry of the
+ CAS loop), we can have concurrently failing tryrdlock calls due to
+ readers or writers that acquire and release in the meantime. Using
+ randomized exponential back-off to make a live-lock unlikely should be
+ sufficient.
+ TODO Back-off.
+ Acquire MO so we synchronize with prior writers. */
+ while (!atomic_compare_exchange_weak_acquire (&rwlock->__data.__readers,
+ &r, rnew));
+
+ if ((r & PTHREAD_RWLOCK_WRPHASE) != 0)
+ {
+ /* Same as in __pthread_rwlock_rdlock_full:
+ We started the read phase, so we are also responsible for
+ updating the write-phase futex. Relaxed MO is sufficient.
+ Note that there can be no other reader that we have to wake
+ because all other readers will see the read phase started by us
+ (or they will try to start it themselves); if a writer started
+ the read phase, we cannot have started it. Furthermore, we
+ cannot discard a PTHREAD_RWLOCK_FUTEX_USED flag because we will
+ overwrite the value set by the most recent writer (or the readers
+ before it in case of explicit hand-over) and we know that there
+ are no waiting readers. */
+ atomic_store_relaxed (&rwlock->__data.__wrphase_futex, 0);
}
- lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared);
+ return 0;
- if (wake)
- futex_wake (&rwlock->__data.__readers_wakeup, INT_MAX, futex_shared);
- return result;
}
strong_alias (__pthread_rwlock_tryrdlock, pthread_rwlock_tryrdlock)