From 9c38deec96ebe1b052a0c1bef137b90967755f18 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 5 Jul 2022 17:06:41 -0700 Subject: x86: Remove generic strncat, strncpy, and stpncpy implementations These functions all have optimized versions: __strncat_sse2_unaligned, __strncpy_sse2_unaligned, and stpncpy_sse2_unaligned which are faster than their respective generic implementations. Since the sse2 versions can run on baseline x86_64, we should use these as the baseline implementation and can remove the generic implementations. Geometric mean of N=20 runs of the entire benchmark suite on: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz (Tigerlake) __strncat_sse2_unaligned / __strncat_generic: .944 __strncpy_sse2_unaligned / __strncpy_generic: .726 __stpncpy_sse2_unaligned / __stpncpy_generic: .650 Tested build with and without multiarch and full check with multiarch. --- sysdeps/x86_64/multiarch/Makefile | 3 -- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 9 ++---- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 ++--- sysdeps/x86_64/multiarch/ifunc-strncpy.h | 48 ++++++++++++++++++++++++++++++ sysdeps/x86_64/multiarch/stpncpy-generic.c | 26 ---------------- sysdeps/x86_64/multiarch/stpncpy.c | 3 +- sysdeps/x86_64/multiarch/strncat-generic.c | 21 ------------- sysdeps/x86_64/multiarch/strncat.c | 3 +- sysdeps/x86_64/multiarch/strncpy-generic.c | 24 --------------- sysdeps/x86_64/multiarch/strncpy.c | 3 +- 10 files changed, 56 insertions(+), 92 deletions(-) create mode 100644 sysdeps/x86_64/multiarch/ifunc-strncpy.h delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c delete mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c delete mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 7ba9ad5..d6b62af 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -45,7 +45,6 @@ sysdep_routines += \ stpcpy-sse2-unaligned \ stpncpy-avx2 \ stpncpy-avx2-rtm \ - stpncpy-generic \ stpncpy-evex \ stpncpy-sse2-unaligned \ strcasecmp_l-avx2 \ @@ -91,7 +90,6 @@ sysdep_routines += \ strncase_l-sse4_2 \ strncat-avx2 \ strncat-avx2-rtm \ - strncat-generic \ strncat-evex \ strncat-sse2-unaligned \ strncmp-avx2 \ @@ -101,7 +99,6 @@ sysdep_routines += \ strncmp-sse4_2 \ strncpy-avx2 \ strncpy-avx2-rtm \ - strncpy-generic \ strncpy-evex \ strncpy-sse2-unaligned \ strnlen-avx2 \ diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index adf7d4b..2c96cb6 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -403,8 +403,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && CPU_FEATURE_USABLE (AVX512BW)), __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, - __stpncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) + __stpncpy_sse2_unaligned)) /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ IFUNC_IMPL (i, name, stpcpy, @@ -618,8 +617,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && CPU_FEATURE_USABLE (AVX512BW)), __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, 1, - __strncat_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) + __strncat_sse2_unaligned)) /* Support sysdeps/x86_64/multiarch/strncpy.c. */ IFUNC_IMPL (i, name, strncpy, @@ -634,8 +632,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && CPU_FEATURE_USABLE (AVX512BW)), __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, 1, - __strncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) + __strncpy_sse2_unaligned)) /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index 8052945..a15afa4 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -20,11 +20,7 @@ #include -#ifndef GENERIC -# define GENERIC sse2 -#endif - -extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; @@ -53,5 +49,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); - return OPTIMIZE (GENERIC); + return OPTIMIZE (sse2); } diff --git a/sysdeps/x86_64/multiarch/ifunc-strncpy.h b/sysdeps/x86_64/multiarch/ifunc-strncpy.h new file mode 100644 index 0000000..323225a --- /dev/null +++ b/sysdeps/x86_64/multiarch/ifunc-strncpy.h @@ -0,0 +1,48 @@ +/* Common definition for ifunc st{r|p}n{cpy|cat} + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURE_USABLE_P (cpu_features, AVX2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)) + return OPTIMIZE (evex); + + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } + + return OPTIMIZE (sse2_unaligned); +} diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c deleted file mode 100644 index 8782684..0000000 --- a/sysdeps/x86_64/multiarch/stpncpy-generic.c +++ /dev/null @@ -1,26 +0,0 @@ -/* stpncpy. - Copyright (C) 2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - - -#define STPNCPY __stpncpy_generic -#undef weak_alias -#define weak_alias(ignored1, ignored2) -#undef libc_hidden_def -#define libc_hidden_def(stpncpy) - -#include diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c index 879bc83..a8d083f 100644 --- a/sysdeps/x86_64/multiarch/stpncpy.c +++ b/sysdeps/x86_64/multiarch/stpncpy.c @@ -25,9 +25,8 @@ # undef stpncpy # undef __stpncpy -# define GENERIC generic # define SYMBOL_NAME stpncpy -# include "ifunc-strcpy.h" +# include "ifunc-strncpy.h" libc_ifunc_redirected (__redirect_stpncpy, __stpncpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c deleted file mode 100644 index 0090669..0000000 --- a/sysdeps/x86_64/multiarch/strncat-generic.c +++ /dev/null @@ -1,21 +0,0 @@ -/* strncat. - Copyright (C) 2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - - -#define STRNCAT __strncat_generic -#include diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c index 50fba8a..a590c25 100644 --- a/sysdeps/x86_64/multiarch/strncat.c +++ b/sysdeps/x86_64/multiarch/strncat.c @@ -24,8 +24,7 @@ # undef strncat # define SYMBOL_NAME strncat -# define GENERIC generic -# include "ifunc-strcpy.h" +# include "ifunc-strncpy.h" libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); strong_alias (strncat, __strncat); diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c deleted file mode 100644 index 9916153..0000000 --- a/sysdeps/x86_64/multiarch/strncpy-generic.c +++ /dev/null @@ -1,24 +0,0 @@ -/* strncpy. - Copyright (C) 2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - - -#define STRNCPY __strncpy_generic -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(strncpy) - -#include diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c index 7fc7d72..c83440f 100644 --- a/sysdeps/x86_64/multiarch/strncpy.c +++ b/sysdeps/x86_64/multiarch/strncpy.c @@ -24,8 +24,7 @@ # undef strncpy # define SYMBOL_NAME strncpy -# define GENERIC generic -# include "ifunc-strcpy.h" +# include "ifunc-strncpy.h" libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); -- cgit v1.1