aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-03-20 11:24:52 -0500
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>2014-03-20 11:24:52 -0500
commit6eaf95cbfa0031ea267682dc2c9c17ed3e3dc167 (patch)
tree93cf4f0efb9ce1654e9f298b6342953a7935965a
parentae3a5dff0f4135cc57ddddf3c19ed5be80285b54 (diff)
downloadglibc-6eaf95cbfa0031ea267682dc2c9c17ed3e3dc167.zip
glibc-6eaf95cbfa0031ea267682dc2c9c17ed3e3dc167.tar.gz
glibc-6eaf95cbfa0031ea267682dc2c9c17ed3e3dc167.tar.bz2
PowerPC: optimized strcspn for PPC64/POWER7
This patch add a optimized strcspn for POWER7 by using a different algorithm than default implementation: it constructs a table based on the 'accept' argument and use this table to check for any occurance on the input string. The idea is similar as x86_64 uses. For PowerPC some tunings were added, such as unroll loops and align stack memory to table to 16 bytes (so VSX clean can ran without alignment issues).
-rw-r--r--ChangeLog16
-rw-r--r--string/strcspn.c21
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/Makefile2
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c8
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S40
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c30
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strcspn.c31
-rw-r--r--sysdeps/powerpc/powerpc64/power7/strcspn.S139
8 files changed, 271 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 4ebda2f..67d4b6a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2014-03-20 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
+
+ * string/strcspn.c (strcspn): Using macro to redefine symbol name.
+ * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strcspn-power7
+ and strcspn-ppc64 objects.
+ * sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+ (__libc_ifunc_impl_list): Add new strcspn optimized symbols.
+ * sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S: New file:
+ multiarch strcspn for POWER7.
+ * sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c: New file:
+ multiarch strcspn for PPC64.
+ * sysdeps/powerpc/powerpc64/multiarch/strcspn.c: New file: strcspn
+ ifunc selector.
+ * sysdeps/powerpc/powerpc64/power7/strcspn.S: New file: optimited
+ strcspn for POWER7.
+
2014-03-20 Joseph Myers <joseph@codesourcery.com>
* math/gen-libm-test.pl (generate_testfile): Expect only function
diff --git a/string/strcspn.c b/string/strcspn.c
index 7c39f79..4316205 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -15,27 +15,18 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if defined _LIBC || HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-# ifndef strchr
-# define strchr index
-# endif
-#endif
+#include <string.h>
#undef strcspn
+#ifndef STRCSPN
+# define STRCSPN strcspn
+#endif
+
/* Return the length of the maximum initial segment of S
which contains no characters from REJECT. */
size_t
-strcspn (s, reject)
- const char *s;
- const char *reject;
+STRCSPN (const char *s, const char *reject)
{
size_t count = 0;
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 3e8010c..c314e6f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -15,7 +15,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \
strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \
strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \
- strspn-power7 strspn-ppc64
+ strspn-power7 strspn-ppc64 strcspn-power7 strcspn-ppc64
CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 20d7918..328b87e 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -262,5 +262,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strspn, 1,
__strspn_ppc))
+ /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */
+ IFUNC_IMPL (i, name, strcspn,
+ IFUNC_IMPL_ADD (array, i, strcspn,
+ hwcap & PPC_FEATURE_HAS_VSX,
+ __strcspn_power7)
+ IFUNC_IMPL_ADD (array, i, strcspn, 1,
+ __strcspn_ppc))
+
return i;
}
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
new file mode 100644
index 0000000..02ffcc8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
@@ -0,0 +1,40 @@
+/* Optimized strcspn implementation for POWER7.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words) \
+ .section ".text"; \
+ ENTRY_2(__strcspn_power7) \
+ .align ALIGNARG(alignt); \
+ EALIGN_W_##words; \
+ BODY_LABEL(__strcspn_power7): \
+ cfi_startproc; \
+ LOCALENTRY(__strcspn_power7)
+
+#undef END
+#define END(name) \
+ cfi_endproc; \
+ TRACEBACK(__strcspn_power7) \
+ END_2(__strcspn_power7)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power7/strcspn.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
new file mode 100644
index 0000000..5f8b610
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
@@ -0,0 +1,30 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#define STRCSPN __strcspn_ppc
+#ifdef SHARED
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strcspn_ppc, __GI_strcspn, __strcspn_ppc);
+#endif
+
+extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
+
+#include <string/strcspn.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
new file mode 100644
index 0000000..3609d93
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
@@ -0,0 +1,31 @@
+/* Multiple versions of strcspn. PowerPC64 version.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef NOT_IN_libc
+# include <string.h>
+# include <shlib-compat.h>
+# include "init-arch.h"
+
+extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
+extern __typeof (strcspn) __strcspn_power7 attribute_hidden;
+
+libc_ifunc (strcspn,
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strcspn_power7
+ : __strcspn_ppc);
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strcspn.S b/sysdeps/powerpc/powerpc64/power7/strcspn.S
new file mode 100644
index 0000000..3f6aa0a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strcspn.S
@@ -0,0 +1,139 @@
+/* Optimized strcspn implementation for PowerPC64.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject) */
+
+ .machine power7
+EALIGN (strcspn, 4, 0)
+ CALL_MCOUNT 3
+
+ /* The idea to speed up the algorithm is to create a lookup table
+ for fast check if input character should be considered. For ASCII
+ or ISO-8859-X character sets it has 256 positions. */
+ lbz r10,0(r4)
+
+ /* First the table should be cleared and to avoid unaligned accesses
+ when using the VSX stores the table address is aligned to 16
+ bytes. */
+ xxlxor v0,v0,v0
+
+ /* PPC64 ELF ABI stack is aligned to 16 bytes. */
+ addi r9,r1,-256
+
+ li r8,48
+ li r5,16
+ li r6,32
+ cmpdi cr7,r10,0 /* reject[0] == '\0' ? */
+ addi r12,r9,64
+ /* Clear the table with 0 values */
+ stxvw4x v0,r0,r9
+ addi r11,r9,128
+ addi r7,r9,192
+ stxvw4x v0,r9,r5
+ stxvw4x v0,r9,r6
+ stxvw4x v0,r9,r8
+ stxvw4x v0,r0,r12
+ stxvw4x v0,r12,r5
+ stxvw4x v0,r12,r6
+ stxvw4x v0,r12,r8
+ stxvw4x v0,r0,r11
+ stxvw4x v0,r11,r5
+ stxvw4x v0,r11,r6
+ stxvw4x v0,r11,r8
+ stxvw4x v0,r0,r7
+ stxvw4x v0,r7,r5
+ stxvw4x v0,r7,r6
+ stxvw4x v0,r7,r8
+ li r8,1
+ beq cr7,L(finish_table) /* If reject[0] == '\0' skip */
+
+ /* Initialize the table as:
+ for (i=0; reject[i]; i++
+ table[reject[i]]] = 1 */
+ .p2align 4,,15
+L(init_table):
+ stbx r8,r9,r10
+ lbzu r10,1(r4)
+ cmpdi cr7,r10,0 /* If reject[0] == '\0' finish */
+ bne cr7,L(init_table)
+L(finish_table):
+ /* set table[0] = 1 */
+ li r10,1
+ stb r10,0(r9)
+ li r10,0
+ b L(mainloop)
+
+ /* Unrool the loop 4 times and check using the table as:
+ i = 0;
+ while (1)
+ {
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ if (table[input[i++]] == 1)
+ return i - 1;
+ } */
+ .p2align 4,,15
+L(unroll):
+ lbz r8,1(r3)
+ addi r10,r10,4
+ lbzx r8,r9,r8
+ cmpwi r7,r8,1
+ beq cr7,L(end)
+ lbz r8,2(r3)
+ addi r3,r3,4
+ lbzx r8,r9,r8
+ cmpwi cr7,r8,1
+ beq cr7,L(end2)
+ lbz r8,3(r7)
+ lbzx r8,r9,r8
+ cmpwi cr7,r8,1
+ beq cr7,L(end3)
+L(mainloop):
+ lbz r8,0(r3)
+ mr r7,r3
+ addi r6,r10,1
+ addi r4,r10,2
+ addi r5,r10,3
+ lbzx r8,r9,8
+ cmpwi cr7,r8,1
+ bne cr7,L(unroll)
+ mr r3,r10
+ blr
+
+ .p2align 4,,15
+L(end):
+ mr r3,r6
+ blr
+
+ .p2align 4,,15
+L(end2):
+ mr r3,r4
+ blr
+
+ .p2align 4,,15
+L(end3):
+ mr r3,r5
+ blr
+END (strcspn)
+libc_hidden_builtin_def (strcspn)