diff options
Diffstat (limited to 'sysdeps/powerpc')
46 files changed, 134 insertions, 1478 deletions
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index 5e6cb07..5cdb64f 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so + +# The test checks if the __tls_get_addr does not clobber caller-saved +# register, so disable the powerpc specific optimization to force a +# __tls_get_addr call. +LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize endif ifneq (no,$(multi-arch)) diff --git a/sysdeps/powerpc/fpu/math-use-builtins-trunc.h b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h new file mode 100644 index 0000000..3e6a55d --- /dev/null +++ b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h @@ -0,0 +1,9 @@ +#ifdef _ARCH_PWR5X +# define USE_TRUNCF_BUILTIN 1 +# define USE_TRUNC_BUILTIN 1 +#else +# define USE_TRUNCF_BUILTIN 0 +# define USE_TRUNC_BUILTIN 0 +#endif +#define USE_TRUNCL_BUILTIN 0 +#define USE_TRUNCF128_BUILTIN 0 diff --git a/sysdeps/powerpc/fpu/s_modf.c b/sysdeps/powerpc/fpu/s_modf.c deleted file mode 100644 index 831072b..0000000 --- a/sysdeps/powerpc/fpu/s_modf.c +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <https://www.gnu.org/licenses/>. */ - -/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make - generic implementation faster. Also disables for old ISAs that do not - have ceil/floor instructions. */ -#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X) -# include <sysdeps/ieee754/ldbl-opt/s_modf.c> -#else -# include <math.h> -# include <math_ldbl_opt.h> -# include <libm-alias-double.h> - -double -__modf (double x, double *iptr) -{ - if (__builtin_isinf (x)) - { - *iptr = x; - return copysign (0.0, x); - } - else if (__builtin_isnan (x)) - { - *iptr = NAN; - return NAN; - } - - if (x >= 0.0) - { - *iptr = floor (x); - return copysign (x - *iptr, x); - } - else - { - *iptr = ceil (x); - return copysign (x - *iptr, x); - } -} -# ifndef __modf -libm_alias_double (__modf, modf) -# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) -compat_symbol (libc, __modf, modfl, GLIBC_2_0); -# endif -# endif -#endif diff --git a/sysdeps/powerpc/fpu/s_modff.c b/sysdeps/powerpc/fpu/s_modff.c deleted file mode 100644 index 79eeb7b..0000000 --- a/sysdeps/powerpc/fpu/s_modff.c +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <https://www.gnu.org/licenses/>. */ - -/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make - generic implementation faster. Also disables for old ISAs that do not - have ceil/floor instructions. */ -#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X) -# include <sysdeps/ieee754/flt-32/s_modff.c> -#else -# include <math.h> -# include <libm-alias-float.h> - -float -__modff (float x, float *iptr) -{ - if (__builtin_isinff (x)) - { - *iptr = x; - return copysignf (0.0, x); - } - else if (__builtin_isnanf (x)) - { - *iptr = NAN; - return NAN; - } - - if (x >= 0.0) - { - *iptr = floorf (x); - return copysignf (x - *iptr, x); - } - else - { - *iptr = ceilf (x); - return copysignf (x - *iptr, x); - } -} -# ifndef __modff -libm_alias_float (__modf, modf) -# endif -#endif diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c index b8315c5..48f3a19 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modf __modf_power5plus -#include <sysdeps/powerpc/fpu/s_modf.c> +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c index 69591da..15bfa0b 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modff __modff_power5plus -#include <sysdeps/powerpc/fpu/s_modff.c> +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile index 0a4e828..3a49b85 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile @@ -6,12 +6,9 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \ rawmemchr-ppc32 strlen-power7 strlen-ppc32 strnlen-power7 \ strnlen-ppc32 \ - strcasecmp-power7 strcasecmp_l-power7 strncase-power7 \ - strncase_l-power7 strchrnul-power7 strchrnul-ppc32 \ + strcasecmp-power7 strcasecmp_l-power7 \ + strchrnul-power7 strchrnul-ppc32 \ strchr-power7 strchr-ppc32 \ wordcopy-power7 wordcopy-ppc32 \ memmove-power7 memmove-ppc - -CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops -CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops endif diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c index 68a3f9d..7537f3a 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c @@ -138,21 +138,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, __strcasecmp_l_ppc)) - /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c. */ - IFUNC_IMPL (i, name, strncasecmp, - IFUNC_IMPL_ADD (array, i, strncasecmp, - hwcap & PPC_FEATURE_HAS_VSX, - __strncasecmp_power7) - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) - - /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c. */ - IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - hwcap & PPC_FEATURE_HAS_VSX, - __strncasecmp_l_power7) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_ppc)) - /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c. */ IFUNC_IMPL (i, name, strchrnul, IFUNC_IMPL_ADD (array, i, strchrnul, diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c deleted file mode 100644 index 5b45ed5..0000000 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c +++ /dev/null @@ -1,26 +0,0 @@ -/* Optimized strcasecmp_l implementation for POWER7. - Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - - -#include <string.h> - -#define __strncasecmp __strncasecmp_power7 - -extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; - -#include <string/strncase.c> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c deleted file mode 100644 index 3cd6433..0000000 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c +++ /dev/null @@ -1,41 +0,0 @@ -/* Multiple versions of strncasecmp. - Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <string.h> -# define strncasecmp __strncasecmp_ppc -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; -#endif - -#include <string/strncase.c> -#undef strncasecmp - -#if IS_IN (libc) -# include <shlib-compat.h> -# include "init-arch.h" - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__strncasecmp) __libc_strncasecmp; -libc_ifunc (__libc_strncasecmp, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __strncasecmp_power7 - : __strncasecmp_ppc); -weak_alias (__libc_strncasecmp, strncasecmp) -#endif diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c deleted file mode 100644 index 388d482..0000000 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c +++ /dev/null @@ -1,42 +0,0 @@ -/* Multiple versions of strncasecmp_l. - Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <string.h> -# define strncasecmp_l __strncasecmp_l_ppc -extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; -extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; -#endif - -#include <string/strncase_l.c> -#undef strncasecmp_l - -#if IS_IN (libc) -# include <shlib-compat.h> -# include "init-arch.h" - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; -libc_ifunc (__libc_strncasecmp_l, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __strncasecmp_l_power7 - : __strncasecmp_l_ppc); - -weak_alias (__libc_strncasecmp_l, strncasecmp_l) -#endif diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c index b8315c5..48f3a19 100644 --- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c +++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modf __modf_power5plus -#include <sysdeps/powerpc/fpu/s_modf.c> +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c index 69591da..15bfa0b 100644 --- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c +++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modff __modff_power5plus -#include <sysdeps/powerpc/fpu/s_modff.c> +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/powerpc/powerpc64/le/configure b/sysdeps/powerpc/powerpc64/le/configure index 7092f61..ef17f24 100644 --- a/sysdeps/powerpc/powerpc64/le/configure +++ b/sysdeps/powerpc/powerpc64/le/configure @@ -137,75 +137,5 @@ then : critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously." fi -for ac_prog in $OBJCOPY -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_OBJCOPY+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OBJCOPY"; then - ac_cv_prog_OBJCOPY="$OBJCOPY" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_OBJCOPY="$ac_prog" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -OBJCOPY=$ac_cv_prog_OBJCOPY -if test -n "$OBJCOPY"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJCOPY" >&5 -printf "%s\n" "$OBJCOPY" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - - test -n "$OBJCOPY" && break -done - -if test -z "$OBJCOPY"; then - ac_verc_fail=yes -else - # Found it, now check the version. - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking version of $OBJCOPY" >&5 -printf %s "checking version of $OBJCOPY... " >&6; } - ac_prog_version=`$OBJCOPY --version 2>&1 | sed -n 's/^.*GNU objcopy.* \([0-9]*\.[0-9.]*\).*$/\1/p'` - case $ac_prog_version in - '') ac_prog_version="v. ?.??, bad"; ac_verc_fail=yes;; - 2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*) - ac_prog_version="$ac_prog_version, ok"; ac_verc_fail=no;; - *) ac_prog_version="$ac_prog_version, bad"; ac_verc_fail=yes;; - - esac - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_prog_version" >&5 -printf "%s\n" "$ac_prog_version" >&6; } -fi -if test $ac_verc_fail = yes; then - AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le" -fi - - test -n "$critic_missing" && as_fn_error $? "*** $critic_missing" "$LINENO" 5 diff --git a/sysdeps/powerpc/powerpc64/le/configure.ac b/sysdeps/powerpc/powerpc64/le/configure.ac index 48d7089..79b3d43 100644 --- a/sysdeps/powerpc/powerpc64/le/configure.ac +++ b/sysdeps/powerpc/powerpc64/le/configure.ac @@ -66,11 +66,4 @@ CFLAGS="$save_CFLAGS"]) AS_IF([test "$libc_cv_compiler_powerpc64le_ldbl128_mabi" = "no"], [critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."]) -dnl objcopy (binutils) 2.26 or newer required to support the --update-section -dnl feature for fixing up .gnu.attribute section with IEEE ldbl. -AC_CHECK_PROG_VER(OBJCOPY, $OBJCOPY, --version, - [GNU objcopy.* \([0-9]*\.[0-9.]*\)], - [2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*], - AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le") - test -n "$critic_missing" && AC_MSG_ERROR([*** $critic_missing]) diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c index d425680..89e7498 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c +++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c @@ -1,5 +1,5 @@ -/* Optimized strcasecmp_l implementation for POWER7. - Copyright (C) 2013-2025 Free Software Foundation, Inc. +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,11 +16,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <string.h> +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include "math_config.h" -#define __strncasecmp_l __strncasecmp_l_power7 -#define USE_IN_EXTENDED_LOCALE_MODEL 1 - -extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; - -#include <string/strncase.c> +int +__ieee754_ilogb (double x) +{ + uint64_t ux = asuint64 (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (ex == 0) /* zero or subnormal */ + { + /* Clear sign and exponent */ + ux <<= 12; + if (ux == 0) + return FP_ILOGB0; + /* subnormal */ + return -1023 - stdc_leading_zeros (ux); + } + if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */ + return ux << 12 ? FP_ILOGBNAN : INT_MAX; + return ex - 1023; +} diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c index c9d2f4e..1c2a8a5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S +++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c @@ -1,5 +1,5 @@ -/* Optimized memchr implementation for POWER10/PPC64. - Copyright (C) 2016-2025 Free Software Foundation, Inc. +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define MEMCHR __memchr_power10 +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include "sysdeps/ieee754/flt-32/math_config.h" -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(name,alias) - -#include <sysdeps/powerpc/powerpc64/le/power10/memchr.S> -#endif +int +__ieee754_ilogbf (float x) +{ + uint32_t ux = asuint (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (ex == 0) /* zero or subnormal */ + { + /* Clear sign and exponent. */ + ux <<= 1 + EXPONENT_WIDTH; + if (ux == 0) + return FP_ILOGB0; + /* sbunormal */ + return -127 - stdc_leading_zeros (ux); + } + if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */ + return ux << (1 + EXPONENT_WIDTH) ? FP_ILOGBNAN : INT_MAX; + return ex - 127; +} diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile index 6d74f09..1a0e496 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile @@ -91,8 +91,8 @@ gen-libm-f128-ifunc-routines = \ e_expf128 e_fmodf128 e_hypotf128 e_j0f128 e_j1f128 e_jnf128 \ e_lgammaf128_r e_logf128 e_log10f128 e_powf128 e_remainderf128 \ e_sinhf128 e_sqrtf128 e_gammaf128_r e_ilogbf128 k_tanf128 s_asinhf128 \ - s_atanf128 s_cbrtf128 s_ceilf128 s_cosf128 s_erff128 s_exp10m1f128 \ - s_exp2m1f128 s_expm1f128 \ + s_atanf128 s_cbrtf128 s_ceilf128 s_compoundnf128 s_cosf128 s_erff128 \ + s_exp10m1f128 s_exp2m1f128 s_expm1f128 \ s_fabsf128 s_floorf128 s_log1pf128 s_logbf128 \ s_rintf128 s_scalblnf128 s_sinf128 s_tanf128 \ s_tanhf128 s_truncf128 s_remquof128 e_log2f128 \ @@ -232,6 +232,7 @@ CFLAGS-s_cexpf128-ifunc.c += -fno-builtin-cexpf64x CFLAGS-s_cimagf128-ifunc.c += -fno-builtin-cimagf64x CFLAGS-s_clogf128-ifunc.c += -fno-builtin-clogf64x CFLAGS-s_clog10f128-ifunc.c += -fno-builtin-clog10f64x +CFLAGS-s_compoundnf128-ifunc.c += -fno-builtin-compoundnf64x CFLAGS-s_conjf128-ifunc.c += -fno-builtin-conjf64x CFLAGS-s_copysignf128-ifunc.c += -fno-builtin-copysignf64x CFLAGS-s_cosf128-ifunc.c += -fno-builtin-cosf64x diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h index 18d09bc..b0cc6fc 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h @@ -28,6 +28,7 @@ F128_REDIR_PFX_R (floorf128, __,); F128_REDIR_PFX_R (truncf128, __,); F128_REDIR_PFX_R (roundf128, __,); F128_REDIR_PFX_R (fabsf128, __,); +F128_REDIR_PFX_R (fmaf128, __,); extern __typeof (ldexpf128) F128_SFX_APPEND (__ldexpf128); diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h index d8d743c..1e3d8dc 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h @@ -55,6 +55,7 @@ F128_REDIR (__asinhf128) F128_REDIR (__atanf128) F128_REDIR (__cbrtf128) F128_REDIR (__ceilf128) +F128_REDIR (__compoundnf128) F128_REDIR (__cosf128) F128_REDIR (__erfcf128) F128_REDIR (__erff128) diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h index f125b88..56d1bb7 100644 --- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h +++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h @@ -78,6 +78,7 @@ F128_REDIR (__ldexpf128); F128_REDIR (__cargf128); F128_REDIR (__cimagf128); F128_REDIR (__crealf128); +F128_REDIR (__compoundnf128); F128_REDIR (__conjf128); F128_REDIR (__cprojf128); F128_REDIR (__cabsf128); diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c new file mode 100644 index 0000000..9c26217 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c new file mode 100644 index 0000000..047ad4b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c new file mode 100644 index 0000000..5e8891a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_llogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c new file mode 100644 index 0000000..edb7e9a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_llogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S deleted file mode 100644 index 96ad5a2..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S +++ /dev/null @@ -1,315 +0,0 @@ -/* Optimized memchr implementation for POWER10 LE. - Copyright (C) 2021-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -# ifndef MEMCHR -# define MEMCHR __memchr -# endif -# define M_VREG_ZERO v20 -# define M_OFF_START_LOOP 256 -# define MEMCHR_SUBTRACT_VECTORS \ - vsububm v4,v4,v18; \ - vsububm v5,v5,v18; \ - vsububm v6,v6,v18; \ - vsububm v7,v7,v18; -# define M_TAIL(vreg,increment) \ - vctzlsbb r4,vreg; \ - cmpld r5,r4; \ - ble L(null); \ - addi r4,r4,increment; \ - add r3,r6,r4; \ - blr - -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define M_VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define M_LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define CHECK16B(vreg,offset,addr,label) \ - lxv vreg+32,offset(addr); \ - vcmpequb. vreg,vreg,v18; \ - bne cr6,L(label); \ - cmpldi r5,16; \ - ble L(null); \ - addi r5,r5,-16; - -/* Load 4 quadwords, merge into one VR for speed and check for NULLs. r6 has # - of bytes already checked. */ -#define CHECK64B(offset,addr,label) \ - M_LXVP(v4+32,offset,addr); \ - M_LXVP(v6+32,offset+32,addr); \ - MEMCHR_SUBTRACT_VECTORS; \ - vminub v14,v4,v5; \ - vminub v15,v6,v7; \ - vminub v16,v14,v15; \ - vcmpequb. v0,v16,M_VREG_ZERO; \ - beq cr6,$+12; \ - li r7,offset; \ - b L(label); \ - cmpldi r5,64; \ - ble L(null); \ - addi r5,r5,-64 - -/* Implements the function - void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]). */ - - .machine power9 - -ENTRY_TOCLESS (MEMCHR) - CALL_MCOUNT 3 - - cmpldi r5,0 - beq L(null) - mr r0,r5 - xori r6,r4,0xff - - mtvsrd v18+32,r4 /* matching char in v18 */ - mtvsrd v19+32,r6 /* non matching char in v19 */ - - vspltb v18,v18,7 /* replicate */ - vspltb v19,v19,7 /* replicate */ - vspltisb M_VREG_ZERO,0 - - /* Next 16B-aligned address. Prepare address for L(aligned). */ - addi r6,r3,16 - clrrdi r6,r6,4 - - /* Align data and fill bytes not loaded with non matching char. */ - lvx v0,0,r3 - lvsr v1,0,r3 - vperm v0,v19,v0,v1 - - vcmpequb. v6,v0,v18 - bne cr6,L(found) - sub r4,r6,r3 - cmpld r5,r4 - ble L(null) - sub r5,r5,r4 - - /* Test up to OFF_START_LOOP-16 bytes in 16B chunks. The main loop is - optimized for longer strings, so checking the first bytes in 16B - chunks benefits a lot small strings. */ - .p2align 5 -L(aligned): - cmpldi r5,0 - beq L(null) - - CHECK16B(v0,0,r6,tail1) - CHECK16B(v1,16,r6,tail2) - CHECK16B(v2,32,r6,tail3) - CHECK16B(v3,48,r6,tail4) - CHECK16B(v4,64,r6,tail5) - CHECK16B(v5,80,r6,tail6) - CHECK16B(v6,96,r6,tail7) - CHECK16B(v7,112,r6,tail8) - CHECK16B(v8,128,r6,tail9) - CHECK16B(v9,144,r6,tail10) - CHECK16B(v10,160,r6,tail11) - CHECK16B(v0,176,r6,tail12) - CHECK16B(v1,192,r6,tail13) - CHECK16B(v2,208,r6,tail14) - CHECK16B(v3,224,r6,tail15) - - cmpdi cr5,r4,0 /* Check if c == 0. This will be useful to - choose how we will perform the main loop. */ - - /* Prepare address for the loop. */ - addi r4,r3,M_OFF_START_LOOP - clrrdi r4,r4,6 - sub r6,r4,r3 - sub r5,r0,r6 - addi r6,r4,128 - - /* If c == 0, use the loop without the vsububm. */ - beq cr5,L(loop) - - /* This is very similar to the block after L(loop), the difference is - that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract - each byte loaded by the char we are looking for, this way we can keep - using vminub to merge the results and checking for nulls. */ - .p2align 5 -L(memchr_loop): - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(memchr_loop) - /* Switch to a more aggressive approach checking 64B each time. Use 2 - pointers 128B apart and unroll the loop once to make the pointer - updates and usages separated enough to avoid stalls waiting for - address calculation. */ - .p2align 5 -L(loop): -#undef MEMCHR_SUBTRACT_VECTORS -#define MEMCHR_SUBTRACT_VECTORS /* nothing */ - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(loop) - - .p2align 5 -L(pre_tail_64b): - mr r6,r4 -L(tail_64b): - /* OK, we found a null byte. Let's look for it in the current 64-byte - block and mark it in its corresponding VR. lxvp vx,0(ry) puts the - low 16B bytes into vx+1, and the high into vx, so the order here is - v5, v4, v7, v6. */ - vcmpequb v1,v5,M_VREG_ZERO - vcmpequb v2,v4,M_VREG_ZERO - vcmpequb v3,v7,M_VREG_ZERO - vcmpequb v4,v6,M_VREG_ZERO - - /* Take into account the other 64B blocks we had already checked. */ - add r6,r6,r7 - /* Extract first bit of each byte. */ - M_VEXTRACTBM(r8,v1) - M_VEXTRACTBM(r9,v2) - M_VEXTRACTBM(r10,v3) - M_VEXTRACTBM(r11,v4) - - /* Shift each value into their corresponding position. */ - sldi r9,r9,16 - sldi r10,r10,32 - sldi r11,r11,48 - - /* Merge the results. */ - or r8,r8,r9 - or r9,r10,r11 - or r11,r9,r8 - - cnttzd r0,r11 /* Count trailing zeros before the match. */ - cmpld r5,r0 - ble L(null) - add r3,r6,r0 /* Compute final address. */ - blr - - .p2align 5 -L(tail1): - M_TAIL(v0,0) - - .p2align 5 -L(tail2): - M_TAIL(v1,16) - - .p2align 5 -L(tail3): - M_TAIL(v2,32) - - .p2align 5 -L(tail4): - M_TAIL(v3,48) - - .p2align 5 -L(tail5): - M_TAIL(v4,64) - - .p2align 5 -L(tail6): - M_TAIL(v5,80) - - .p2align 5 -L(tail7): - M_TAIL(v6,96) - - .p2align 5 -L(tail8): - M_TAIL(v7,112) - - .p2align 5 -L(tail9): - M_TAIL(v8,128) - - .p2align 5 -L(tail10): - M_TAIL(v9,144) - - .p2align 5 -L(tail11): - M_TAIL(v10,160) - - .p2align 5 -L(tail12): - M_TAIL(v0,176) - - .p2align 5 -L(tail13): - M_TAIL(v1,192) - - .p2align 5 -L(tail14): - M_TAIL(v2,208) - - .p2align 5 -L(tail15): - M_TAIL(v3,224) - - .p2align 5 -L(found): - vctzlsbb r7,v6 - cmpld r5,r7 - ble L(null) - add r3,r3,r7 - blr - - .p2align 5 -L(null): - li r3,0 - blr - -END (MEMCHR) - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S index f32dc38..734bf5f 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S @@ -18,26 +18,10 @@ #include <sysdep.h> -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - /* Compare 32 bytes. */ #define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\ - LXVP(32+vr1,offset,r3); \ - LXVP(32+vr2,offset,r4); \ + lxvp 32+vr1,offset(r3); \ + lxvp 32+vr2,offset(r4); \ vcmpneb. v5,vr1+1,vr2+1; \ bne cr6,L(tail_2); \ vcmpneb. v4,vr1,vr2; \ @@ -56,7 +40,7 @@ #ifndef MEMCMP # define MEMCMP memcmp #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMCMP, 4) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S index ed7a9f5..f2a503e 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S @@ -26,7 +26,7 @@ /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. */ - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMCPY, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memmove.S b/sysdeps/powerpc/powerpc64/le/power10/memmove.S index 47c2ac3..4aaa1ef 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memmove.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memmove.S @@ -28,7 +28,7 @@ #ifndef MEMMOVE # define MEMMOVE memmove #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMMOVE, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S index 29d5114..f9442e7 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memset.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S @@ -25,7 +25,7 @@ # define MEMSET memset #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMSET, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S deleted file mode 100644 index fffa1ee..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +++ /dev/null @@ -1,233 +0,0 @@ -/* Optimized strcmp implementation for PowerPC64/POWER10. - Copyright (C) 2021-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ -#include <sysdep.h> - -#ifndef STRCMP -# define STRCMP strcmp -#endif - -/* Implements the function - int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - -#define TAIL(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; \ - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r0,len_reg,56; \ - lxvl 32+v4,reg1,r0; \ - lxvl 32+v5,reg2,r0; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - - .machine power9 -ENTRY_TOCLESS (STRCMP, 4) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r5,r9,16 /* r5(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r5) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r3-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - - .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) - - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 - mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) - -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - - clrldi r6,r3,59 - subfic r5,r6,32 - add r3,r3,r5 - add r4,r4,r5 - andi. r5,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - TAIL(v4,v5) - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL(v15,v17) -L(tail2): TAIL(v14,v16) -L(tail3): TAIL(v19,v21) -L(tail4): TAIL(v18,v20) -L(tail5): TAIL(v23,v25) -L(tail6): TAIL(v22,v24) -L(tail7): TAIL(v27,v29) -L(tail8): TAIL(v26,v28) - -END (STRCMP) -libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S index 4985a92..ec644d5 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S @@ -63,22 +63,6 @@ blr #endif /* USE_AS_RAWMEMCHR */ -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - #define CHECK16(vreg,offset,addr,label) \ lxv vreg+32,offset(addr); \ vcmpequb. vreg,vreg,v18; \ @@ -88,8 +72,8 @@ of bytes already checked. */ #define CHECK64(offset,addr,label) \ li r6,offset; \ - LXVP(v4+32,offset,addr); \ - LXVP(v6+32,offset+32,addr); \ + lxvp v4+32,offset(addr); \ + lxvp v6+32,offset+32(addr); \ RAWMEMCHR_SUBTRACT_VECTORS; \ vminub v14,v4,v5; \ vminub v15,v6,v7; \ @@ -108,7 +92,7 @@ The implementation can load bytes past a matching byte, but only up to the next 64B boundary, so it never crosses a page. */ -.machine power9 +.machine power10 ENTRY_TOCLESS (FUNCNAME, 4) CALL_MCOUNT MCOUNT_NARGS @@ -234,10 +218,10 @@ L(tail_64b): add r5,r5,r6 /* Extract first bit of each byte. */ - VEXTRACTBM(r7,v1) - VEXTRACTBM(r8,v2) - VEXTRACTBM(r9,v3) - VEXTRACTBM(r10,v4) + vextractbm r7,v1 + vextractbm r8,v2 + vextractbm r9,v3 + vextractbm r10,v4 /* Shift each value into their corresponding position. */ sldi r8,r8,16 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S deleted file mode 100644 index 10700dd..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S +++ /dev/null @@ -1,271 +0,0 @@ -/* Optimized strncmp implementation for PowerPC64/POWER10. - Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* Implements the function - - int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) - - The implementation uses unaligned doubleword access to avoid specialized - code paths depending of data alignment for first 32 bytes and uses - vectorised loops after that. */ - -#ifndef STRNCMP -# define STRNCMP strncmp -#endif - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - cmpldi cr7,r5,16; \ - ble cr7,L(ret0); \ - addi r5,r5,-16; - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - cmpldi cr7,r5,32; \ - ble cr7,L(ret0); \ - addi r5,r5,-32; - -#define TAIL_FIRST_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - cmpld cr7,r5,r6; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define TAIL_SECOND_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - addi r0,r6,16; \ - cmpld cr7,r5,r0; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r6,len_reg,56; \ - lxvl 32+v4,reg1,r6; \ - lxvl 32+v5,reg2,r6; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - cmpld cr7,r5,len_reg; \ - ble cr7,L(ret0); \ - sub r5,r5,len_reg; \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 -ENTRY_TOCLESS (STRNCMP, 4) - /* Check if size is 0. */ - cmpdi cr0,r5,0 - beq cr0,L(ret0) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r8,r9,16 /* r8(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r8 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r8) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - - .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) - - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 - mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) - -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r5,r5,32 - - clrldi r6,r3,59 - subfic r7,r6,32 - add r3,r3,r7 - add r4,r4,r7 - subf r5,r7,r5 - andi. r7,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - TAIL_FIRST_16B(v4,v5) - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL_FIRST_16B(v15,v17) -L(tail2): TAIL_SECOND_16B(v14,v16) -L(tail3): TAIL_FIRST_16B(v19,v21) -L(tail4): TAIL_SECOND_16B(v18,v20) -L(tail5): TAIL_FIRST_16B(v23,v25) -L(tail6): TAIL_SECOND_16B(v22,v24) -L(tail7): TAIL_FIRST_16B(v27,v29) -L(tail8): TAIL_SECOND_16B(v26,v28) - - .p2align 5 -L(ret0): - li r3,0 - blr - -END(STRNCMP) -libc_hidden_builtin_def(strncmp) diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S index 83b21c6..f0cde81 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S @@ -28,21 +28,6 @@ The implementation uses unaligned doubleword access for first 32 bytes as in POWER8 patch and uses vectorised loops after that. */ -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -61,10 +46,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when the minimum required binutils - allows it. */ - - .machine power7 + .machine power9 ENTRY_TOCLESS (STRCMP, 4) li r0, 0 @@ -116,7 +98,7 @@ L(align): /* Both s1 and s2 are unaligned. */ GET16BYTES(v4, r7, v10) GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -136,28 +118,28 @@ L(match): L(s1_align): lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(s1_align) @@ -167,37 +149,37 @@ L(s1_align): L(aligned): lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vctzlsbb r6, v7 + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S index 60c74ab..5a25f94 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S @@ -29,21 +29,6 @@ # define STRNCMP strncmp #endif -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -64,9 +49,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when minimum binutils - is upgraded to 2.27. */ - .machine power7 + .machine power9 ENTRY_TOCLESS (STRNCMP, 4) /* Check if size is 0. */ cmpdi cr0, r5, 0 @@ -163,7 +146,7 @@ L(align): clrldi r6, r3, 60 subfic r11, r6, 16 GET16BYTES(v4, r3, v10) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -186,7 +169,7 @@ L(match): L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -196,7 +179,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -206,7 +189,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -216,7 +199,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -228,7 +211,7 @@ L(s1_align): L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -238,7 +221,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -248,7 +231,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -258,7 +241,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -268,11 +251,11 @@ L(aligned): b L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) + vctzlsbb r6, v7 cmplw cr7, r5, r6 ble cr7, L(ret0) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index dc7c5b1..e321ce5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -11,7 +11,6 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strlen-power7 strlen-ppc64 \ strnlen-power8 strnlen-power7 strnlen-ppc64 \ strcasecmp-power7 strcasecmp_l-power7 \ - strncase-power7 strncase_l-power7 \ strncmp-power8 strncmp-ppc64 \ strchr-power8 strchr-power7 strchr-ppc64 \ strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ @@ -31,15 +30,12 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strncase-power8 ifneq (,$(filter %le,$(config-machine))) -sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \ - memmove-power10 memset-power10 rawmemchr-power9 \ - rawmemchr-power10 strcmp-power9 strcmp-power10 \ - strncmp-power9 strncmp-power10 strcpy-power9 strcat-power10 \ - stpcpy-power9 strlen-power9 strncpy-power9 stpncpy-power9 \ - strlen-power10 +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ + rawmemchr-power9 rawmemchr-power10 \ + strcmp-power9 strncmp-power9 \ + strcpy-power9 strcat-power10 stpcpy-power9 \ + strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif -CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops -CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops endif # Called during static initialization diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 0a31a58..016d05f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ IFUNC_IMPL (i, name, strncmp, #ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __strncmp_power10) IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strncmp_power9) @@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, -#ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, memchr, - hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __memchr_power10) -#endif IFUNC_IMPL_ADD (array, i, memchr, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, @@ -311,19 +302,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strncasecmp_power8) - IFUNC_IMPL_ADD (array, i, strncasecmp, - hwcap & PPC_FEATURE_ARCH_2_06, - __strncasecmp_power7) IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strncase_l.c. */ - IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - hwcap & PPC_FEATURE_ARCH_2_06, - __strncasecmp_l_power7) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_ppc)) - /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c. */ IFUNC_IMPL (i, name, strrchr, IFUNC_IMPL_ADD (array, i, strrchr, @@ -387,10 +367,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcmp, #ifdef __LITTLE_ENDIAN__ IFUNC_IMPL_ADD (array, i, strcmp, - (hwcap2 & PPC_FEATURE2_ARCH_3_1) - && (hwcap & PPC_FEATURE_HAS_VSX), - __strcmp_power10) - IFUNC_IMPL_ADD (array, i, strcmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strcmp_power9) diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c index b63c796..3abd64a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden; extern __typeof (__memchr) __memchr_power7 attribute_hidden; extern __typeof (__memchr) __memchr_power8 attribute_hidden; -# ifdef __LITTLE_ENDIAN__ -extern __typeof (__memchr) __memchr_power10 attribute_hidden; -# endif /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__memchr, -# ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __memchr_power10 : -# endif - (hwcap2 & PPC_FEATURE2_ARCH_2_07 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __memchr_power8 : - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __memchr_power7 - : __memchr_ppc); + (hwcap2 & PPC_FEATURE2_ARCH_2_07 + && hwcap & PPC_FEATURE_HAS_ALTIVEC) + ? __memchr_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memchr_power7 + : __memchr_ppc); weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S deleted file mode 100644 index 7b45fcd..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S +++ /dev/null @@ -1,26 +0,0 @@ -/* Optimized strcmp implementation for POWER10/PPC64. - Copyright (C) 2021-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRCMP __strcmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S> -#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */ diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c index 3c636e3..7c77c08 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden; extern __typeof (strcmp) __strcmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strcmp) __strcmp_power9 attribute_hidden; -extern __typeof (strcmp) __strcmp_power10 attribute_hidden; # endif # undef strcmp libc_ifunc_redirected (__redirect_strcmp, strcmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strcmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strcmp_power9 : diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c deleted file mode 100644 index a5b0685..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c +++ /dev/null @@ -1,24 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <string.h> - -#define __strncasecmp __strncasecmp_power7 - -extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; - -#include <string/strncase.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c index a83c11a..807b0af 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c @@ -23,15 +23,12 @@ extern __typeof (__strncasecmp) __libc_strncasecmp; extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; libc_ifunc (__libc_strncasecmp, (hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __strncasecmp_power8: - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __strncasecmp_power7 - : __strncasecmp_ppc); + ? __strncasecmp_power8 + : __strncasecmp_ppc); weak_alias (__libc_strncasecmp, strncasecmp) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c deleted file mode 100644 index e9c9022..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <string.h> - -#define __strncasecmp_l __strncasecmp_l_power7 - -#undef weak_alias -#define weak_alias(a,b) - -#undef libc_hidden_def -#define libc_hidden_def(name) - -extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; - -#include <string/strncase_l.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c b/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c deleted file mode 100644 index 1f28448..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c +++ /dev/null @@ -1,42 +0,0 @@ -/* Multiple versions of strncasecmp_l - Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <string.h> -# define strncasecmp_l __strncasecmp_l_ppc -extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; -extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; -#endif - -#include <string/strncase_l.c> -#undef strncasecmp_l - -#if IS_IN (libc) -# include <shlib-compat.h> -# include "init-arch.h" - -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; -libc_ifunc (__libc_strncasecmp_l, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __strncasecmp_l_power7 - : __strncasecmp_l_ppc); - -weak_alias (__libc_strncasecmp_l, strncasecmp_l) -#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S deleted file mode 100644 index 4387908..0000000 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRNCMP __strncmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) - -#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S> -#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c index 0a664a6..4cfe27f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; extern __typeof (strncmp) __strncmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strncmp) __strncmp_power9 attribute_hidden; -extern __typeof (strncmp) __strncmp_power10 attribute_hidden; # endif # undef strncmp @@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden; ifunc symbol properly. */ libc_ifunc_redirected (__redirect_strncmp, strncmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strncmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strncmp_power9 : |