aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-07-19 20:56:40 -0700
committerUlrich Drepper <drepper@redhat.com>2009-07-19 20:56:40 -0700
commit42e69bcf1137fccfd7a95645a9d316c6490b9ff9 (patch)
tree54823d4b072c8574a91bc5e6d838fe2ba032afb4
parent515a8908cedcf7432270f410e4a749e4ce07a072 (diff)
downloadglibc-42e69bcf1137fccfd7a95645a9d316c6490b9ff9.zip
glibc-42e69bcf1137fccfd7a95645a9d316c6490b9ff9.tar.gz
glibc-42e69bcf1137fccfd7a95645a9d316c6490b9ff9.tar.bz2
Support requeueing for condvars using PI mutex. x86-64 only.
Add support for the new FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI options of futex.
-rw-r--r--nptl/ChangeLog9
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S26
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S47
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S69
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S60
6 files changed, 177 insertions, 36 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 785100d..c747be4 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,5 +1,14 @@
2009-07-19 Ulrich Drepper <drepper@redhat.com>
+ * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h: Define
+ FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: If mutex
+ is a PI mutex, then use FUTEX_CMP_REQUEUE_PI.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: If mutex
+ is a PI mutex, then use FUTEX_WAIT_REQUEUE_PI.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
(__pthread_cond_timedwait): Make more robust.
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
index 0b7e3bb..9b15bfb 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
@@ -54,6 +54,8 @@
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
+#define FUTEX_WAIT_REQUEUE_PI 11
+#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
index 6155255..0f10ec9 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -70,12 +70,14 @@ __pthread_cond_broadcast:
8: cmpq $-1, %r8
je 9f
- /* XXX: The kernel so far doesn't support requeue to PI futex. */
- /* XXX: The kernel only supports FUTEX_CMP_REQUEUE to the same
- type of futex (private resp. shared). */
- testl $(PI_BIT | PS_BIT), MUTEX_KIND(%r8)
+ /* Do not use requeue for pshared condvars. */
+ testl $PS_BIT, MUTEX_KIND(%r8)
jne 9f
+ /* Requeue to a PI mutex if the PI bit is set. */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ jne 81f
+
/* Wake up all threads. */
#ifdef __ASSUME_PRIVATE_FUTEX
movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi
@@ -97,6 +99,20 @@ __pthread_cond_broadcast:
10: xorl %eax, %eax
retq
+ /* Wake up all threads. */
+81: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
+ movl $SYS_futex, %eax
+ movl $1, %edx
+ movl $0x7fffffff, %r10d
+ syscall
+
+ /* For any kind of error, which mainly is EAGAIN, we try again
+ with WAKE. The general test also covers running on old
+ kernels. */
+ cmpq $-4095, %rax
+ jb 10b
+ jmp 9f
+
.align 16
/* Unlock. */
4: LOCK
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
index 8f65f2c..f1050fe 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2005, 2007, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -21,6 +21,7 @@
#include <shlib-compat.h>
#include <lowlevellock.h>
#include <lowlevelcond.h>
+#include <pthread-pi-defines.h>
#include <kernel-features.h>
#include <pthread-errnos.h>
@@ -56,19 +57,23 @@ __pthread_cond_signal:
/* Wake up one thread. */
cmpq $-1, dep_mutex(%r8)
+ movl $FUTEX_WAKE_OP, %esi
movl $1, %edx
+ movl $SYS_futex, %eax
+ je 8f
+
+ /* Get the address of the mutex used. */
+ movq dep_mutex(%r8), %rcx
+ testl $PI_BIT, MUTEX_KIND(%rcx)
+ jne 9f
+
#ifdef __ASSUME_PRIVATE_FUTEX
- movl $FUTEX_WAKE_OP, %eax
movl $(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi
- cmove %eax, %esi
#else
- movl $0, %eax
- movl %fs:PRIVATE_FUTEX, %esi
- cmove %eax, %esi
- orl $FUTEX_WAKE_OP, %esi
+ orl %fs:PRIVATE_FUTEX, %esi
#endif
- movl $1, %r10d
- movl $SYS_futex, %eax
+
+8: movl $1, %r10d
#if cond_lock != 0
addq $cond_lock, %r8
#endif
@@ -85,9 +90,27 @@ __pthread_cond_signal:
xorl %eax, %eax
retq
-7: /* %esi should be either FUTEX_WAKE_OP or
- FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG from the previous syscall. */
- xorl $(FUTEX_WAKE ^ FUTEX_WAKE_OP), %esi
+ /* Wake up one thread and requeue none in the PI Mutex case. */
+9: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
+ movq %rcx, %r8
+ xorq %r10, %r10
+ movl (%rdi), %r9d // XXX Can this be right?
+ syscall
+
+ leaq -cond_futex(%rdi), %r8
+
+ /* For any kind of error, we try again with WAKE.
+ The general test also covers running on old kernels. */
+ cmpq $-4095, %rax
+ jb 4f
+
+7:
+#ifdef __ASSUME_PRIVATE_FUTEX
+ andl $FUTEX_PRIVATE_FLAG, %esi
+#else
+ andl %fs:PRIVATE_FUTEX, %esi
+#endif
+ orl $FUTEX_WAKE, %esi
movl $SYS_futex, %eax
/* %rdx should be 1 already from $FUTEX_WAKE_OP syscall.
movl $1, %edx */
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 1b19fdb..f81466e 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -21,6 +21,7 @@
#include <shlib-compat.h>
#include <lowlevellock.h>
#include <lowlevelcond.h>
+#include <pthread-pi-defines.h>
#include <pthread-errnos.h>
#include <kernel-features.h>
@@ -58,6 +59,9 @@ __pthread_cond_timedwait:
pushq %r14
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r14, 0)
+ pushq %r15
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r15, 0)
#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
# define FRAME_SIZE 32
#else
@@ -160,9 +164,41 @@ __pthread_cond_timedwait:
movl $FUTEX_WAIT_BITSET, %eax
movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi
cmove %eax, %esi
+ je 60f
+
+ movq dep_mutex(%rdi), %r8
+ /* Requeue to a PI mutex if the PI bit is set. */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ je 60f
+
+ movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
+ xorl %eax, %eax
/* The following only works like this because we only support
two clocks, represented using a single bit. */
+ testl $1, cond_nwaiters(%rdi)
+ movl $FUTEX_CLOCK_REALTIME, %edx
+ cmove %edx, %eax
+ orl %eax, %esi
+ movq %r12, %rdx
+ addq $cond_futex, %rdi
+ movl $SYS_futex, %eax
+ syscall
+
+ movl $1, %r15d
+#ifdef __ASSUME_REQUEUE_PI
+ jmp 62f
+#else
+ cmpq $-4095, %rax
+ jnae 62f
+
+ movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi
+ subq $cond_futex, %rdi
+#endif
+
+60: xorl %r15d, %r15d
xorl %eax, %eax
+ /* The following only works like this because we only support
+ two clocks, represented using a single bit. */
testl $1, cond_nwaiters(%rdi)
movl $FUTEX_CLOCK_REALTIME, %edx
movl $0xffffffff, %r9d
@@ -172,7 +208,7 @@ __pthread_cond_timedwait:
addq $cond_futex, %rdi
movl $SYS_futex, %eax
syscall
- movq %rax, %r14
+62: movq %rax, %r14
movl (%rsp), %edi
callq __pthread_disable_asynccancel
@@ -253,14 +289,23 @@ __pthread_cond_timedwait:
#endif
jne 40f
-41: movq 16(%rsp), %rdi
+ /* If requeue_pi is used the kernel performs the locking of the
+ mutex. */
+41: xorl %eax, %eax
+ testl %r15d, %r15d
+ jnz 63f
+
+ movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
- testq %rax, %rax
+63: testq %rax, %rax
cmoveq %r14, %rax
48: addq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
+ popq %r15
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r15)
popq %r14
cfi_adjust_cfa_offset(-8)
cfi_restore(%r14)
@@ -274,10 +319,11 @@ __pthread_cond_timedwait:
retq
/* Initial locking failed. */
-31: cfi_adjust_cfa_offset(3 * 8 + FRAME_SIZE)
- cfi_rel_offset(%r12, FRAME_SIZE + 16)
- cfi_rel_offset(%r13, FRAME_SIZE + 8)
- cfi_rel_offset(%r14, FRAME_SIZE)
+31: cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE)
+ cfi_rel_offset(%r12, FRAME_SIZE + 24)
+ cfi_rel_offset(%r13, FRAME_SIZE + 16)
+ cfi_rel_offset(%r14, FRAME_SIZE + 8)
+ cfi_rel_offset(%r15, FRAME_SIZE)
#if cond_lock != 0
addq $cond_lock, %rdi
#endif
@@ -353,6 +399,8 @@ __pthread_cond_timedwait:
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
+ xorl %r15d, %r15d
+
/* Get internal lock. */
movl $1, %esi
xorl %eax, %eax
@@ -716,9 +764,10 @@ __condvar_cleanup2:
callq __pthread_mutex_cond_lock
movq 24(%rsp), %rdi
- movq FRAME_SIZE(%rsp), %r14
- movq FRAME_SIZE+8(%rsp), %r13
- movq FRAME_SIZE+16(%rsp), %r12
+ movq FRAME_SIZE(%rsp), %r15
+ movq FRAME_SIZE+8(%rsp), %r14
+ movq FRAME_SIZE+16(%rsp), %r13
+ movq FRAME_SIZE+24(%rsp), %r12
.LcallUR:
call _Unwind_Resume@PLT
hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index c3c879c..e6323ea 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -22,6 +22,7 @@
#include <lowlevellock.h>
#include <lowlevelcond.h>
#include <tcb-offsets.h>
+#include <pthread-pi-defines.h>
#include <kernel-features.h>
@@ -47,6 +48,9 @@ __pthread_cond_wait:
pushq %r12
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%r12, 0)
+ pushq %r13
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r13, 0)
#define FRAME_SIZE 32
subq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)
@@ -124,24 +128,48 @@ __pthread_cond_wait:
movq 8(%rsp), %rdi
xorq %r10, %r10
movq %r12, %rdx
- addq $cond_futex-cond_lock, %rdi
+ // XXX reverse + lea
+ addq $cond_futex, %rdi
cmpq $-1, dep_mutex-cond_futex(%rdi)
#ifdef __ASSUME_PRIVATE_FUTEX
movl $FUTEX_WAIT, %eax
movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
cmove %eax, %esi
#else
- movl $FUTEX_WAIT, %eax
+ movl $0, %eax
movl %fs:PRIVATE_FUTEX, %esi
cmove %eax, %esi
# if FUTEX_WAIT != 0
+# error "cc destroyed by following orl"
orl $FUTEX_WAIT, %esi
# endif
#endif
+ je 60f
+
+ movq dep_mutex-cond_futex(%rdi), %r8
+ /* Requeue to a PI mutex if the PI bit is set. */
+ testl $PI_BIT, MUTEX_KIND(%r8)
+ je 60f
+
+ movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
+ movl $SYS_futex, %eax
+ syscall
+
+ movl $1, %r13d
+#ifdef __ASSUME_REQUEUE_PI
+ jmp 62f
+#else
+ cmpq $-4095, %rax
+ jnae 62f
+
+ movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
+#endif
+
+60: xorl %r13d, %r13d
movl $SYS_futex, %eax
syscall
- movl (%rsp), %edi
+62: movl (%rsp), %edi
callq __pthread_disable_asynccancel
.LcleanupEND:
@@ -209,11 +237,21 @@ __pthread_cond_wait:
#endif
jne 10f
-11: movq 16(%rsp), %rdi
+ /* If requeue_pi is used the kernel performs the locking of the
+ mutex. */
+11: xorl %eax, %eax
+ testl %r13d, %r13d
+ jnz 14f
+
+ movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
+
14: addq $FRAME_SIZE, %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
+ popq %r13
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r13)
popq %r12
cfi_adjust_cfa_offset(-8)
cfi_restore(%r12)
@@ -223,8 +261,9 @@ __pthread_cond_wait:
/* Initial locking failed. */
1:
- cfi_adjust_cfa_offset(8 + FRAME_SIZE)
- cfi_rel_offset(%r12, FRAME_SIZE)
+ cfi_adjust_cfa_offset(16 + FRAME_SIZE)
+ cfi_rel_offset(%r12, FRAME_SIZE + 8)
+ cfi_rel_offset(%r13, FRAME_SIZE)
#if cond_lock != 0
addq $cond_lock, %rdi
#endif
@@ -308,9 +347,11 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
__condvar_cleanup1:
/* Stack frame:
- rsp + 40
+ rsp + 48
+ +--------------------------+
+ rsp + 40 | %r12 |
+--------------------------+
- rsp + 32 | %r12 |
+ rsp + 32 | %r13 |
+--------------------------+
rsp + 24 | unused |
+--------------------------+
@@ -431,7 +472,8 @@ __condvar_cleanup1:
callq __pthread_mutex_cond_lock
movq 24(%rsp), %rdi
- movq 32(%rsp), %r12
+ movq 40(%rsp), %r12
+ movq 32(%rsp), %r13
.LcallUR:
call _Unwind_Resume@PLT
hlt