From fadd2ad9cc36115440d50b0eae9299e65988917d Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Thu, 3 Sep 2015 13:40:21 -0500 Subject: powerpc: Optimize lock elision for pthread_mutex_t With TLE enabled, the adapt count variable update incurs an 8% overhead before entering the critical section of an elided mutex. Instead, if it is done right after leaving the critical section, this serialization can be avoided. This alters the existing behavior of __lll_trylock_elision as it will only decrement the adapt_count if it successfully acquires the lock. * sysdeps/unix/sysv/linux/powerpc/elision-lock.c (__lll_lock_elision): Remove adapt_count decrement... * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c (__lll_trylock_elision): Likewise. * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c (__lll_unlock_elision): ... to here. And utilize new adapt_count parameter. * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h (__lll_unlock_elision): Update to include adapt_count parameter. (lll_unlock_elision): Pass pointer to adapt_count variable. --- ChangeLog | 15 +++++++++++++++ sysdeps/unix/sysv/linux/powerpc/elision-lock.c | 1 - sysdeps/unix/sysv/linux/powerpc/elision-trylock.c | 1 - sysdeps/unix/sysv/linux/powerpc/elision-unlock.c | 12 ++++++++++-- sysdeps/unix/sysv/linux/powerpc/lowlevellock.h | 4 ++-- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6a743e4..917c464 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,20 @@ 2015-10-15 Paul E. Murphy + * sysdeps/unix/sysv/linux/powerpc/elision-lock.c + (__lll_lock_elision): Remove adapt_count decrement... + * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c + (__lll_trylock_elision): Likewise. + * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c + (__lll_unlock_elision): ... to here. And utilize + new adapt_count parameter. + * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h + (__lll_unlock_elision): Update to include adapt_count + parameter. + (lll_unlock_elision): Pass pointer to adapt_count + variable. + +2015-10-15 Paul E. Murphy + * nptl/pthread_mutex_unlock.c (lll_unlock_elision): Add elision adapt_count parameter to list of arguments. * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c index 26d272e..3762732 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c @@ -47,7 +47,6 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared) { if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c index 7b6d1b9..440939c 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c @@ -36,7 +36,6 @@ __lll_trylock_elision (int *futex, short *adapt_count) /* Only try a transaction if it's worth it. */ if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c index f04c339..72b893d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c @@ -21,12 +21,20 @@ #include "htm.h" int -__lll_unlock_elision(int *lock, int pshared) +__lll_unlock_elision (int *lock, short *adapt_count, int pshared) { /* When the lock was free we're in a transaction. */ if (*lock == 0) __builtin_tend (0); else - lll_unlock ((*lock), pshared); + { + lll_unlock ((*lock), pshared); + + /* Update the adapt count AFTER completing the critical section. + Doing this here prevents unneeded stalling when entering + a critical section. Saving about 8% runtime on P8. */ + if (*adapt_count > 0) + (*adapt_count)--; + } return 0; } diff --git a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h index 16479e7..6769c25 100644 --- a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h @@ -32,7 +32,7 @@ extern int __lll_timedlock_elision extern int __lll_lock_elision (int *futex, short *adapt_count, int private) attribute_hidden; -extern int __lll_unlock_elision(int *lock, int private) +extern int __lll_unlock_elision (int *lock, short *adapt_count, int private) attribute_hidden; extern int __lll_trylock_elision(int *lock, short *adapt_count) @@ -41,7 +41,7 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count) #define lll_lock_elision(futex, adapt_count, private) \ __lll_lock_elision (&(futex), &(adapt_count), private) #define lll_unlock_elision(futex, adapt_count, private) \ - __lll_unlock_elision (&(futex), private) + __lll_unlock_elision (&(futex), &(adapt_count), private) #define lll_trylock_elision(futex, adapt_count) \ __lll_trylock_elision (&(futex), &(adapt_count)) -- cgit v1.1