aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Schmidt <will_schmidt@vnet.ibm.com>2011-08-18 11:01:44 -0500
committerRyan S. Arnold <rsa@us.ibm.com>2011-09-12 16:33:08 -0500
commit26c64647052d9e7f1c661bfa16d9b5cf1750f596 (patch)
treeae1e3e8a82c5ccbb5a5db7d67399e8d2845e4f25
parentdb97a804d67dfa6ab8d8fb3665e71882b09a32e4 (diff)
downloadglibc-26c64647052d9e7f1c661bfa16d9b5cf1750f596.zip
glibc-26c64647052d9e7f1c661bfa16d9b5cf1750f596.tar.gz
glibc-26c64647052d9e7f1c661bfa16d9b5cf1750f596.tar.bz2
Provide a throughput boost to the 64-bit power7 strncmp code of approx
15%. The 32-bit throughput is not notably affected by this change, so the change to the 32-bit code is done to keep the two files in sync with each other.
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/powerpc/powerpc32/power7/strncmp.S4
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strncmp.S4
3 files changed, 13 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index afdd883..3c29d6b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2011-08-04 Will Schmidt <will_schmidt@vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc32/power7/strncmp.S: Adjust the alignment
+ and add nop instructions for throughput optimization.
+ * sysdeps/powerpc/powerpc64/power7/strncmp.S: Adjust the alignment
+ and nop instructions for throughput optimization.
+
2011-08-01 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/fpu/e_hypot.c: New file: hypot optimized for POWER.
diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S
index ba72d0a..61b33c3 100644
--- a/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -27,7 +27,7 @@
const char *s2 [r4],
size_t size [r5]) */
-EALIGN (BP_SYM(strncmp),4,0)
+EALIGN (BP_SYM(strncmp),5,0)
#define rTMP r0
#define rRTN r3
@@ -47,9 +47,11 @@ EALIGN (BP_SYM(strncmp),4,0)
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
dcbt 0,rSTR1
+ nop
or rTMP,rSTR2,rSTR1
lis r7F7F,0x7f7f
dcbt 0,rSTR2
+ nop
clrlwi. rTMP,rTMP,30
cmplwi cr1,rN,0
lis rFEFE,-0x101
diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 34f1e52..bba4445 100644
--- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -27,7 +27,7 @@
const char *s2 [r4],
size_t size [r5]) */
-EALIGN (BP_SYM(strncmp),4,0)
+EALIGN (BP_SYM(strncmp),5,0)
CALL_MCOUNT 3
#define rTMP r0
@@ -48,9 +48,11 @@ EALIGN (BP_SYM(strncmp),4,0)
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
dcbt 0,rSTR1
+ nop
or rTMP,rSTR2,rSTR1
lis r7F7F,0x7f7f
dcbt 0,rSTR2
+ nop
clrldi. rTMP,rTMP,61
cmpldi cr1,rN,0
lis rFEFE,-0x101