aboutsummaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-11-02 18:33:07 -0700
committerH.J. Lu <hjl.tools@gmail.com>2021-11-12 10:32:09 -0800
commitd672a98a1af106bd68deb15576710cd61363f7a6 (patch)
treecb697ddc3004aa47fefea927dea0c7e2f4d5ae79 /nptl
parent49302b8fdf9103b6fc0a398678668a22fa19574c (diff)
downloadglibc-d672a98a1af106bd68deb15576710cd61363f7a6.zip
glibc-d672a98a1af106bd68deb15576710cd61363f7a6.tar.gz
glibc-d672a98a1af106bd68deb15576710cd61363f7a6.tar.bz2
Add LLL_MUTEX_READ_LOCK [BZ #28537]
CAS instruction is expensive. From the x86 CPU's point of view, getting a cache line for writing is more expensive than reading. See Appendix A.2 Spinlock in: https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/xeon-lock-scaling-analysis-paper.pdf The full compare and swap will grab the cache line exclusive and cause excessive cache line bouncing. Add LLL_MUTEX_READ_LOCK to do an atomic load and skip CAS in spinlock loop if compare may fail to reduce cache line bouncing on contended locks. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Diffstat (limited to 'nptl')
-rw-r--r--nptl/pthread_mutex_lock.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 59a28cf..762059b 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -64,6 +64,11 @@ lll_mutex_lock_optimized (pthread_mutex_t *mutex)
# define PTHREAD_MUTEX_VERSIONS 1
#endif
+#ifndef LLL_MUTEX_READ_LOCK
+# define LLL_MUTEX_READ_LOCK(mutex) \
+ atomic_load_relaxed (&(mutex)->__data.__lock)
+#endif
+
static int __pthread_mutex_lock_full (pthread_mutex_t *mutex)
__attribute_noinline__;
@@ -141,6 +146,8 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
break;
}
atomic_spin_nop ();
+ if (LLL_MUTEX_READ_LOCK (mutex) != 0)
+ continue;
}
while (LLL_MUTEX_TRYLOCK (mutex) != 0);