From 5d2ec20a997b87c1667e0e71b3ff1e9df96eac15 Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Tue, 18 Dec 2018 13:57:15 +0100 Subject: S390: Refactor strcspn ifunc handling. The ifunc handling for strcspn is adjusted in order to omit ifunc variants if those will never be used as the minimum architecture level already supports newer CPUs by default. Glibc internal calls will then also use the "newer" ifunc variant. ChangeLog: * sysdeps/s390/multiarch/Makefile (sysdep_routines): Remove strcspn variants. * sysdeps/s390/Makefile (sysdep_routines): Add strcspn variants. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Refactor ifunc handling for strcspn. * sysdeps/s390/multiarch/strcspn-c.c: Move to ... * sysdeps/s390/strcspn-c.c: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/strcspn-vx.S: Move to ... * sysdeps/s390/strcspn-vx.S: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/strcspn.c: Move to ... * sysdeps/s390/strcspn.c: ... here and adjust ifunc handling. * sysdeps/s390/ifunc-strcspn.h: New file. --- ChangeLog | 15 ++ sysdeps/s390/Makefile | 3 +- sysdeps/s390/ifunc-strcspn.h | 52 ++++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 14 +- sysdeps/s390/multiarch/strcspn-c.c | 28 --- sysdeps/s390/multiarch/strcspn-vx.S | 281 ----------------------------- sysdeps/s390/multiarch/strcspn.c | 31 ---- sysdeps/s390/strcspn-c.c | 32 ++++ sysdeps/s390/strcspn-vx.S | 292 +++++++++++++++++++++++++++++++ sysdeps/s390/strcspn.c | 42 +++++ 11 files changed, 449 insertions(+), 344 deletions(-) create mode 100644 sysdeps/s390/ifunc-strcspn.h delete mode 100644 sysdeps/s390/multiarch/strcspn-c.c delete mode 100644 sysdeps/s390/multiarch/strcspn-vx.S delete mode 100644 sysdeps/s390/multiarch/strcspn.c create mode 100644 sysdeps/s390/strcspn-c.c create mode 100644 sysdeps/s390/strcspn-vx.S create mode 100644 sysdeps/s390/strcspn.c diff --git a/ChangeLog b/ChangeLog index 66c5fc8..699cc4c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,21 @@ 2018-12-18 Stefan Liebler * sysdeps/s390/multiarch/Makefile + (sysdep_routines): Remove strcspn variants. + * sysdeps/s390/Makefile (sysdep_routines): Add strcspn variants. + * sysdeps/s390/multiarch/ifunc-impl-list.c + (__libc_ifunc_impl_list): Refactor ifunc handling for strcspn. + * sysdeps/s390/multiarch/strcspn-c.c: Move to ... + * sysdeps/s390/strcspn-c.c: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/strcspn-vx.S: Move to ... + * sysdeps/s390/strcspn-vx.S: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/strcspn.c: Move to ... + * sysdeps/s390/strcspn.c: ... here and adjust ifunc handling. + * sysdeps/s390/ifunc-strcspn.h: New file. + +2018-12-18 Stefan Liebler + + * sysdeps/s390/multiarch/Makefile (sysdep_routines): Remove strpbrk variants. * sysdeps/s390/Makefile (sysdep_routines): Add strpbrk variants. * sysdeps/s390/multiarch/ifunc-impl-list.c diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile index df1f858..c163969 100644 --- a/sysdeps/s390/Makefile +++ b/sysdeps/s390/Makefile @@ -74,5 +74,6 @@ sysdep_routines += bzero memset memset-z900 \ strchrnul strchrnul-vx strchrnul-c \ strrchr strrchr-vx strrchr-c \ strspn strspn-vx strspn-c \ - strpbrk strpbrk-vx strpbrk-c + strpbrk strpbrk-vx strpbrk-c \ + strcspn strcspn-vx strcspn-c endif diff --git a/sysdeps/s390/ifunc-strcspn.h b/sysdeps/s390/ifunc-strcspn.h new file mode 100644 index 0000000..9b70325 --- /dev/null +++ b/sysdeps/s390/ifunc-strcspn.h @@ -0,0 +1,52 @@ +/* strcspn variant information on S/390 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined USE_MULTIARCH && IS_IN (libc) \ + && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define HAVE_STRCSPN_IFUNC 1 +#else +# define HAVE_STRCSPN_IFUNC 0 +#endif + +#ifdef HAVE_S390_VX_ASM_SUPPORT +# define HAVE_STRCSPN_IFUNC_AND_VX_SUPPORT HAVE_STRCSPN_IFUNC +#else +# define HAVE_STRCSPN_IFUNC_AND_VX_SUPPORT 0 +#endif + +#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define STRCSPN_DEFAULT STRCSPN_Z13 +# define HAVE_STRCSPN_C 0 +# define HAVE_STRCSPN_Z13 1 +#else +# define STRCSPN_DEFAULT STRCSPN_C +# define HAVE_STRCSPN_C 1 +# define HAVE_STRCSPN_Z13 HAVE_STRCSPN_IFUNC_AND_VX_SUPPORT +#endif + +#if HAVE_STRCSPN_C +# define STRCSPN_C __strcspn_c +#else +# define STRCSPN_C NULL +#endif + +#if HAVE_STRCSPN_Z13 +# define STRCSPN_Z13 __strcspn_vx +#else +# define STRCSPN_Z13 NULL +#endif diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index 1a3fed9..1578f21 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),string) -sysdep_routines += strcspn strcspn-vx strcspn-c \ - memchr memchr-vx \ +sysdep_routines += memchr memchr-vx \ rawmemchr rawmemchr-vx rawmemchr-c \ memccpy memccpy-vx memccpy-c \ memrchr memrchr-vx memrchr-c diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 8e23416..2d48c99 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -41,6 +41,7 @@ #include #include #include +#include /* Maximum number of IFUNC implementations. */ #define MAX_IFUNC 3 @@ -359,6 +360,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ) #endif /* HAVE_STRPBRK_IFUNC */ +#if HAVE_STRCSPN_IFUNC + IFUNC_IMPL (i, name, strcspn, +# if HAVE_STRCSPN_Z13 + IFUNC_IMPL_ADD (array, i, strcspn, + dl_hwcap & HWCAP_S390_VX, STRCSPN_Z13) +# endif +# if HAVE_STRCSPN_C + IFUNC_IMPL_ADD (array, i, strcspn, 1, STRCSPN_C) +# endif + ) +#endif /* HAVE_STRCSPN_IFUNC */ + #ifdef HAVE_S390_VX_ASM_SUPPORT # define IFUNC_VX_IMPL(FUNC) \ @@ -397,7 +410,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_VX_IMPL (wcspbrk); - IFUNC_VX_IMPL (strcspn); IFUNC_VX_IMPL (wcscspn); IFUNC_VX_IMPL (memchr); diff --git a/sysdeps/s390/multiarch/strcspn-c.c b/sysdeps/s390/multiarch/strcspn-c.c deleted file mode 100644 index 7b454f5..0000000 --- a/sysdeps/s390/multiarch/strcspn-c.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Default strcspn implementation for S/390. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define STRCSPN __strcspn_c -# ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strcspn_c, __GI_strcspn, __strcspn_c); -# endif /* SHARED */ - -# include -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcspn-vx.S b/sysdeps/s390/multiarch/strcspn-vx.S deleted file mode 100644 index ea16687..0000000 --- a/sysdeps/s390/multiarch/strcspn-vx.S +++ /dev/null @@ -1,281 +0,0 @@ -/* Vector optimized 32/64 bit S/390 version of strcspn. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) - -# include "sysdep.h" -# include "asm-syntax.h" - - .text - -/* size_t strcspn (const char *s, const char * reject) - The strcspn() function calculates the length of the initial segment - of s which consists entirely of characters not in reject. - - This method checks the length of reject string. If it fits entirely - in one vector register, a fast algorithm is used, which does not need - to check multiple parts of accept-string. Otherwise a slower full - check of accept-string is used. - - register overview: - r3: pointer to start of reject-string - r2: pointer to start of search-string - r0: loaded byte count of vlbb search-string - r4: found byte index - r1: current return len - v16: search-string - v17: reject-string - v18: temp-vreg - - ONLY FOR SLOW: - v19: first reject-string - v20: zero for preparing acc-vector - v21: global mask; 1 indicates a match between - search-string-vreg and any reject-character - v22: current mask; 1 indicates a match between - search-string-vreg and any reject-character in current acc-vreg - v24: one for result-checking of former string-part - v30, v31: for re-/storing registers r6, r8, r9 - r5: current len of reject-string - r6: zero-index in search-string or 16 if no zero - or min(zero-index, loaded byte count) - r8: >0, if former reject-string-part contains a zero, - otherwise =0; - r9: loaded byte count of vlbb reject-string -*/ -ENTRY(__strcspn_vx) - .machine "z13" - .machinemode "zarch_nohighgprs" - - /* - Check if reject-string fits in one vreg: - ---------------------------------------- - */ - vlbb %v17,0(%r3),6 /* Load reject. */ - lghi %r1,0 /* Zero out current len. */ - lcbb %r0,0(%r3),6 - jo .Lcheck_onbb /* Special case if reject - lays on block-boundary. */ -.Lcheck_notonbb: - vistrbs %v17,%v17 /* Fill with zeros after first zero. */ - je .Lfast /* Zero found -> reject fits in one vreg. */ - j .Lslow /* No zero -> reject exceeds one vreg. */ - - -.Lcheck_onbb: - /* Reject lays on block-boundary. */ - vfenezb %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ - vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ - clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> - Reject fits in one vreg; - Fill with zeros and proceed - with FAST. */ - vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ - j .Lcheck_notonbb /* Check if reject fits in one vreg. */ - - - /* - Search s for reject in one vreg - ------------------------------- - */ -.Lfast: - /* Complete reject-string in v17 and remaining bytes are zero. */ - - vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ - lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ - - vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 - unequal to any in v17 - or first zero element. */ - - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded - bytes, return with found element - index (=equal count). */ - - /* Align s to 16 byte. */ - risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ - lghi %r1,16 /* current_len = 16. */ - slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ - - /* Process s in 16byte aligned loop. */ -.Lfast_loop: - vl %v16,0(%r1,%r2) /* Load search-string. */ - vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any - in v17 or first zero element. */ - jno .Lfast_loop_found - - vl %v16,16(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found16 - - vl %v16,32(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found32 - - vl %v16,48(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found48 - - aghi %r1,64 - j .Lfast_loop /* Loop if no element was unequal to reject - and not zero. */ - - /* Found equal or zero element. */ -.Lfast_loop_found48: - aghi %r1,16 -.Lfast_loop_found32: - aghi %r1,16 -.Lfast_loop_found16: - aghi %r1,16 -.Lfast_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ -.Lfast_loop_found2: - algrk %r2,%r1,%r4 /* Add found index to current len. */ - br %r14 - - - - /* - Search s for reject in multiple vregs - ------------------------------------- - */ -.Lslow: - /* Save registers. */ - vlvgg %v30,%r6,0 - vlvgp %v31,%r8,%r9 - - /* Reject in v17 without zero. */ - vlr %v19,%v17 /* Save first acc-part for a fast reload. */ - vzero %v20 /* Zero for preparing acc-vector. */ - vone %v24 /* One for checking result of former - string-part. */ - - /* Align s to 16 byte. */ - risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and - %r4 = bits 60-63 'and' 15. */ - je .Lslow_loop_str /* If s is aligned, loop aligned. */ - lghi %r0,15 - slr %r0,%r4 /* Compute highest index to load (15-x). */ - vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs - highest index, remaining bytes are 0). */ - ahi %r0,1 /* Work with loaded byte count. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of reject-string to zero. */ - vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first reject-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ - clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ - locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ - j .Lslow_loop_acc - - - /* Process s in 16byte aligned loop. */ -.Lslow_next_str: - /* Check results of former processed str-part. */ - vfeeb %v18,%v21,%v24 /* Find first equal match in global mask - (ones in element). */ - vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ - /* Equal-index < min(zero-index, loaded byte count) - -> Return pointer to equal element. */ - clrjl %r4,%r6,.Lslow_index_found - /* Zero-index < loaded byte count - -> Former str-part was last str-part - -> Return null */ - clrjl %r6,%r0,.Lslow_end_not_found - - /* All elements are zero (=no match) -> Proceed with next str-part. */ - vlr %v17,%v19 /* Load first part of reject (no zero). */ - algfr %r1,%r0 /* Add loaded byte count to current len. */ - -.Lslow_loop_str: - vl %v16,0(%r1,%r2) /* Load search-string. */ - lghi %r0,16 /* Loaded byte count is 16. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of reject to zero. */ - vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first reject-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end /* If first element is zero (end of string) - -> Return current length. */ - -.Lslow_loop_acc: - vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> - Character matches any rejected character in - this reject-string-part) IN=0, RT=1. */ - vlgvb %r4,%v22,0 /* Get result of first element. */ - /* First element is equal to any rejected characters? - (all other parts of reject cannot lead to a match before this one) - -> Return current len, which is pointing to this element. */ - clijh %r4,0,.Lslow_end - vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ - /* Proceed with next acc until end of acc is reached. */ - - -.Lslow_next_acc: - clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part - -> Add found index to current len - and end. */ - vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ - aghi %r5,16 /* Increment current len of reject-string. */ - lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ - jo .Lslow_next_acc_onbb /* Jump away if reject-string is - on block-boundary. */ -.Lslow_next_acc_notonbb: - vistrbs %v17,%v17 /* Fill with zeros after first zero. */ - jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ - -.Lslow_next_acc_prepare_zero: - /* Zero in reject-part: fill zeros with first-reject-character. */ - vlgvb %r8,%v17,0 /* Load first element of reject-part. */ - clije %r8,0,.Lslow_next_str /* Process next str-part if first - character in this part of reject - is a zero. */ - /* r8>0 -> zero found in this acc-part. */ - vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ - vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars - by comparing with 0 (v20). */ - vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ - j .Lslow_loop_acc /* Reject-string part is prepared. */ - -.Lslow_next_acc_onbb: - vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ - vlgvb %r8,%v18,7 /* Load byte index of zero. */ - clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes - -> Prepare vreg. */ - vl %v17,0(%r5,%r3) /* Load over boundary ... */ - lghi %r8,0 /* r8=0 -> no zero in this part of acc, - check for zero is in jump-target. */ - j .Lslow_next_acc_notonbb /* ... and search for zero in - fully loaded vreg again. */ - -.Lslow_end_not_found: - algfr %r1,%r6 /* Add zero-index to current len. */ - j .Lslow_end -.Lslow_index_found: - algfr %r1,%r4 /* Add found index of char to current len. */ -.Lslow_end: - lgr %r2,%r1 - /* Restore registers. */ - vlgvg %r6,%v30,0 - vlgvg %r8,%v31,0 - vlgvg %r9,%v31,1 - br %r14 -END(__strcspn_vx) -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strcspn.c b/sysdeps/s390/multiarch/strcspn.c deleted file mode 100644 index 418ffcd..0000000 --- a/sysdeps/s390/multiarch/strcspn.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Multiple versions of strcspn. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define strcspn __redirect_strcspn -/* Omit the strcspn inline definitions because it would redefine strcspn. */ -# define __NO_STRING_INLINES -# include -# undef strcspn -# include - -s390_vx_libc_ifunc2_redirected (__redirect_strcspn, __strcspn, strcspn) - -#else -# include -#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/strcspn-c.c b/sysdeps/s390/strcspn-c.c new file mode 100644 index 0000000..9f51f92 --- /dev/null +++ b/sysdeps/s390/strcspn-c.c @@ -0,0 +1,32 @@ +/* Default strcspn implementation for S/390. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRCSPN_C +# if HAVE_STRCSPN_IFUNC +# define STRCSPN STRCSPN_C +# if defined SHARED && IS_IN (libc) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcspn_c, __GI_strcspn, __strcspn_c); +# endif +# endif + +# include +#endif diff --git a/sysdeps/s390/strcspn-vx.S b/sysdeps/s390/strcspn-vx.S new file mode 100644 index 0000000..ff5b1be --- /dev/null +++ b/sysdeps/s390/strcspn-vx.S @@ -0,0 +1,292 @@ +/* Vector optimized 32/64 bit S/390 version of strcspn. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRCSPN_Z13 + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strcspn (const char *s, const char * reject) + The strcspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(STRCSPN_Z13) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load reject. */ + lghi %r1,0 /* Zero out current len. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> Proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (all other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvb %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + lgr %r2,%r1 + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +END(STRCSPN_Z13) + +# if ! HAVE_STRCSPN_IFUNC +strong_alias (STRCSPN_Z13, strcspn) +# endif + +# if ! HAVE_STRCSPN_C && defined SHARED && IS_IN (libc) +strong_alias (STRCSPN_Z13, __GI_strcspn) +# endif + +#endif /* HAVE_STRCSPN_Z13 */ diff --git a/sysdeps/s390/strcspn.c b/sysdeps/s390/strcspn.c new file mode 100644 index 0000000..a3f35d3 --- /dev/null +++ b/sysdeps/s390/strcspn.c @@ -0,0 +1,42 @@ +/* Multiple versions of strcspn. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRCSPN_IFUNC +# define strcspn __redirect_strcspn +/* Omit the strcspn inline definitions because it would redefine strcspn. */ +# define __NO_STRING_INLINES +# include +# undef strcspn +# include + +# if HAVE_STRCSPN_C +extern __typeof (__redirect_strcspn) STRCSPN_C attribute_hidden; +# endif + +# if HAVE_STRCSPN_Z13 +extern __typeof (__redirect_strcspn) STRCSPN_Z13 attribute_hidden; +# endif + +s390_libc_ifunc_expr (__redirect_strcspn, strcspn, + (HAVE_STRCSPN_Z13 && (hwcap & HWCAP_S390_VX)) + ? STRCSPN_Z13 + : STRCSPN_DEFAULT + ) +#endif /* HAVE_STRCSPN_IFUNC */ -- cgit v1.1