363 files changed, 7799 insertions, 4293 deletions
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index 4b7f8a5..bb97d31 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -41,15 +41,18 @@ gen-as-const-headers += \
   dl-link.sym \
   rtld-global-offsets.sym
 
-tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
+tests-internal += \
+  tst-ifunc-arg-1 \
+  tst-ifunc-arg-2 \
+  tst-ifunc-arg-3 \
+  tst-ifunc-arg-4 \
+  # tests-internal
 
-ifeq (yes,$(aarch64-variant-pcs))
 tests += tst-vpcs
 modules-names += tst-vpcs-mod
 LDFLAGS-tst-vpcs-mod.so = -Wl,-z,lazy
 $(objpfx)tst-vpcs: $(objpfx)tst-vpcs-mod.so
 endif
-endif
 
 ifeq ($(subdir),csu)
 gen-as-const-headers += \
@@ -75,7 +78,9 @@ sysdep_routines += \
   __alloc_gcs
 
 tests += \
-  tst-sme-jmp
+  tst-sme-jmp \
+  tst-sme-za-state \
+  # tests
 endif
 
 ifeq ($(subdir),malloc)
diff --git a/sysdeps/aarch64/__alloc_gcs.c b/sysdeps/aarch64/__alloc_gcs.c
index e70b459..b98e5fc 100644
--- a/sysdeps/aarch64/__alloc_gcs.c
+++ b/sysdeps/aarch64/__alloc_gcs.c
@@ -15,6 +15,8 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "aarch64-gcs.h"
+
 #include <sysdep.h>
 #include <unistd.h>
 #include <sys/mman.h>
@@ -34,7 +36,7 @@ map_shadow_stack (void *addr, size_t size, unsigned long flags)
 #define GCS_ALTSTACK_RESERVE 160
 
 void *
-__alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
+__alloc_gcs (size_t stack_size, struct gcs_record *gcs)
 {
   size_t size = (stack_size / 2 + GCS_ALTSTACK_RESERVE) & -8UL;
   if (size > GCS_MAX_SIZE)
@@ -45,9 +47,6 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
   if (base == MAP_FAILED)
     return NULL;
 
-  *ss_base = base;
-  *ss_size = size;
-
   uint64_t *gcsp = (uint64_t *) ((char *) base + size);
   /* Skip end of GCS token.  */
   gcsp--;
@@ -58,6 +57,14 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
       __munmap (base, size);
       return NULL;
     }
+
+  if (gcs != NULL)
+    {
+      gcs->gcs_base = base;
+      gcs->gcs_token = gcsp;
+      gcs->gcs_size = size;
+    }
+
   /* Return the target GCS pointer for context switch.  */
   return gcsp + 1;
 }
diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S
index 6290803..92f4814 100644
--- a/sysdeps/aarch64/__arm_za_disable.S
+++ b/sysdeps/aarch64/__arm_za_disable.S
@@ -88,10 +88,8 @@ L(save_loop):
 L(end):
 	ret
 L(fail):
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-#endif
+	paciasp
+	cfi_negate_ra_state
 	stp	x29, x30, [sp, -32]!
 	cfi_adjust_cfa_offset (32)
 	cfi_rel_offset (x29, 0)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 981bf80..70ac02c 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -24,51 +24,43 @@
 /* __longjmp(jmpbuf, val) */
 
 ENTRY (__longjmp)
-	cfi_def_cfa(x0, 0)
-	cfi_offset(x19, JB_X19<<3)
-	cfi_offset(x20, JB_X20<<3)
-	cfi_offset(x21, JB_X21<<3)
-	cfi_offset(x22, JB_X22<<3)
-	cfi_offset(x23, JB_X23<<3)
-	cfi_offset(x24, JB_X24<<3)
-	cfi_offset(x25, JB_X25<<3)
-	cfi_offset(x26, JB_X26<<3)
-	cfi_offset(x27, JB_X27<<3)
-	cfi_offset(x28, JB_X28<<3)
-	cfi_offset(x29, JB_X29<<3)
-	cfi_offset(x30, JB_LR<<3)
-
-	cfi_offset( d8, JB_D8<<3)
-	cfi_offset( d9, JB_D9<<3)
-	cfi_offset(d10, JB_D10<<3)
-	cfi_offset(d11, JB_D11<<3)
-	cfi_offset(d12, JB_D12<<3)
-	cfi_offset(d13, JB_D13<<3)
-	cfi_offset(d14, JB_D14<<3)
-	cfi_offset(d15, JB_D15<<3)
 
 #if IS_IN(libc)
-	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.  */
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# endif
-	stp	x29, x30, [sp, -16]!
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
+	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.
+	   The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
 	bl	__libc_arm_za_disable
-	ldp	x29, x30, [sp], 16
-	cfi_adjust_cfa_offset (-16)
-	cfi_restore (x29)
-	cfi_restore (x30)
-# if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-# endif
+	mov	x30, x13
+	cfi_register (x13, x30)
 #endif
 
+	cfi_def_cfa (x0, 0)
+	cfi_offset (x19, JB_X19<<3)
+	cfi_offset (x20, JB_X20<<3)
+	cfi_offset (x21, JB_X21<<3)
+	cfi_offset (x22, JB_X22<<3)
+	cfi_offset (x23, JB_X23<<3)
+	cfi_offset (x24, JB_X24<<3)
+	cfi_offset (x25, JB_X25<<3)
+	cfi_offset (x26, JB_X26<<3)
+	cfi_offset (x27, JB_X27<<3)
+	cfi_offset (x28, JB_X28<<3)
+	cfi_offset (x29, JB_X29<<3)
+	cfi_offset (x30, JB_LR<<3)
+
+	cfi_offset ( d8, JB_D8<<3)
+	cfi_offset ( d9, JB_D9<<3)
+	cfi_offset (d10, JB_D10<<3)
+	cfi_offset (d11, JB_D11<<3)
+	cfi_offset (d12, JB_D12<<3)
+	cfi_offset (d13, JB_D13<<3)
+	cfi_offset (d14, JB_D14<<3)
+	cfi_offset (d15, JB_D15<<3)
+
 	ldp	x19, x20, [x0, #JB_X19<<3]
 	ldp	x21, x22, [x0, #JB_X21<<3]
 	ldp	x23, x24, [x0, #JB_X23<<3]
diff --git a/sysdeps/aarch64/aarch64-gcs.h b/sysdeps/aarch64/aarch64-gcs.h
index 162ef18..8e253ed 100644
--- a/sysdeps/aarch64/aarch64-gcs.h
+++ b/sysdeps/aarch64/aarch64-gcs.h
@@ -23,6 +23,21 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-void *__alloc_gcs (size_t, void **, size_t *) attribute_hidden;
+struct gcs_record
+{
+  void *gcs_base;
+  void *gcs_token;
+  size_t gcs_size;
+};
+
+void *__alloc_gcs (size_t, struct gcs_record *) attribute_hidden;
+
+static inline bool
+has_gcs (void)
+{
+  register unsigned long x16 asm ("x16") = 1;
+  asm ("hint	40" /* chkfeat x16 */ : "+r" (x16));
+  return x16 == 0;
+}
 
 #endif
diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
index 4bd5496..26a0989 100755
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -185,218 +185,11 @@ else
 default-abi = lp64"
 fi
 
-# Only consider BTI supported if -mbranch-protection=bti is
-# on by default in the compiler and the linker produces
-# binaries with GNU property notes in PT_GNU_PROPERTY segment.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BTI support" >&5
-printf %s "checking for BTI support... " >&6; }
-if test ${libc_cv_aarch64_bti+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-void foo (void) { }
-EOF
-  libc_cv_aarch64_bti=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -lW conftest.so | grep -q GNU_PROPERTY'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_bti=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_bti" >&5
-printf "%s\n" "$libc_cv_aarch64_bti" >&6; }
-config_vars="$config_vars
-aarch64-bti = $libc_cv_aarch64_bti"
-if test $libc_cv_aarch64_bti = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_BTI 1" >>confdefs.h
-
-fi
-
-# Check if glibc is built with return address signing, i.e.
-# if -mbranch-protection=pac-ret is on. We need this because
-# pac-ret relies on unwinder support so it's not safe to use
-# it in assembly code unconditionally, but there is no
-# feature test macro for it in gcc.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if pac-ret is enabled" >&5
-printf %s "checking if pac-ret is enabled... " >&6; }
-if test ${libc_cv_aarch64_pac_ret+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-int bar (void);
-int foo (void) { return bar () + 1; }
-EOF
-  libc_cv_aarch64_pac_ret=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='grep -q -E '\''(hint( |	)+25|paciasp)'\'' conftest.s'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_pac_ret=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_pac_ret" >&5
-printf "%s\n" "$libc_cv_aarch64_pac_ret" >&6; }
-if test $libc_cv_aarch64_pac_ret = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_PAC_RET 1" >>confdefs.h
-
-fi
-
-# Check if binutils supports variant PCS symbols.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for variant PCS support" >&5
-printf %s "checking for variant PCS support... " >&6; }
-if test ${libc_cv_aarch64_variant_pcs+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.S <<EOF
-.global foo
-.type foo, %function
-.variant_pcs foo
-foo:
-	ret
-.global bar
-.type bar, %function
-bar:
-	b foo
-EOF
-  libc_cv_aarch64_variant_pcs=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_variant_pcs=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_variant_pcs" >&5
-printf "%s\n" "$libc_cv_aarch64_variant_pcs" >&6; }
-config_vars="$config_vars
-aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs"
-
-# Check if asm support armv8.2-a+sve
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SVE support in assembler" >&5
-printf %s "checking for SVE support in assembler... " >&6; }
-if test ${libc_cv_aarch64_sve_asm+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e) cat > conftest.s <<\EOF
-	.arch armv8.2-a+sve
-	ptrue p0.b
-EOF
-if { ac_try='${CC-cc} -c conftest.s 1>&5'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-  libc_cv_aarch64_sve_asm=yes
-else
-  libc_cv_aarch64_sve_asm=no
-fi
-rm -f conftest* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_asm" >&5
-printf "%s\n" "$libc_cv_aarch64_sve_asm" >&6; }
-if test $libc_cv_aarch64_sve_asm = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h
-
-fi
-
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
-# Check if compiler supports SVE ACLE.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for availability of SVE ACLE" >&5
-printf %s "checking for availability of SVE ACLE... " >&6; }
-if test ${libc_cv_aarch64_sve_acle+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-#include <arm_sve.h>
-EOF
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-    libc_cv_aarch64_sve_acle=yes
-  else
-    libc_cv_aarch64_sve_acle=no
-  fi
-  rm conftest.c ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_acle" >&5
-printf "%s\n" "$libc_cv_aarch64_sve_acle" >&6; }
-
-# Check if compiler is sufficient to build mathvec
-if test $build_mathvec = yes; then
-  fail=no
-  if test $libc_cv_aarch64_variant_pcs = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but linker does not support variant PCS." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but linker does not support variant PCS." >&2;}
-  fi
-  if test $libc_cv_aarch64_sve_asm = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but assembler does not support SVE." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but assembler does not support SVE." >&2;}
-  fi
-  if test $libc_cv_aarch64_sve_acle = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&2;}
-  fi
-  if test $fail = yes; then
-    as_fn_error $? "use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI)." "$LINENO" 5
-  fi
-else
+if test $build_mathvec = no; then
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is disabled, this results in incomplete ABI." >&5
 printf "%s\n" "$as_me: WARNING: mathvec is disabled, this results in incomplete ABI." >&2;}
 fi
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 56d12d6..22fca8b 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -24,119 +24,10 @@ else
   LIBC_CONFIG_VAR([default-abi], [lp64])
 fi
 
-# Only consider BTI supported if -mbranch-protection=bti is
-# on by default in the compiler and the linker produces
-# binaries with GNU property notes in PT_GNU_PROPERTY segment.
-AC_CACHE_CHECK([for BTI support], [libc_cv_aarch64_bti], [dnl
-  cat > conftest.c <<EOF
-void foo (void) { }
-EOF
-  libc_cv_aarch64_bti=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c]) \
-     && AC_TRY_COMMAND([$READELF -lW conftest.so | grep -q GNU_PROPERTY]) \
-     && AC_TRY_COMMAND([$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"])
-  then
-    libc_cv_aarch64_bti=yes
-  fi
-  rm -rf conftest.*])
-LIBC_CONFIG_VAR([aarch64-bti], [$libc_cv_aarch64_bti])
-if test $libc_cv_aarch64_bti = yes; then
-  AC_DEFINE(HAVE_AARCH64_BTI)
-fi
-
-# Check if glibc is built with return address signing, i.e.
-# if -mbranch-protection=pac-ret is on. We need this because
-# pac-ret relies on unwinder support so it's not safe to use
-# it in assembly code unconditionally, but there is no
-# feature test macro for it in gcc.
-AC_CACHE_CHECK([if pac-ret is enabled], [libc_cv_aarch64_pac_ret], [dnl
-  cat > conftest.c <<EOF
-int bar (void);
-int foo (void) { return bar () + 1; }
-EOF
-  libc_cv_aarch64_pac_ret=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
-     && AC_TRY_COMMAND([grep -q -E '\''(hint( |	)+25|paciasp)'\'' conftest.s])
-  then
-    libc_cv_aarch64_pac_ret=yes
-  fi
-  rm -rf conftest.*])
-if test $libc_cv_aarch64_pac_ret = yes; then
-  AC_DEFINE(HAVE_AARCH64_PAC_RET)
-fi
-
-# Check if binutils supports variant PCS symbols.
-AC_CACHE_CHECK([for variant PCS support], [libc_cv_aarch64_variant_pcs], [dnl
-  cat > conftest.S <<EOF
-.global foo
-.type foo, %function
-.variant_pcs foo
-foo:
-	ret
-.global bar
-.type bar, %function
-bar:
-	b foo
-EOF
-  libc_cv_aarch64_variant_pcs=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S]) \
-     && AC_TRY_COMMAND([$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS])
-  then
-    libc_cv_aarch64_variant_pcs=yes
-  fi
-  rm -rf conftest.*])
-LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
-
-# Check if asm support armv8.2-a+sve
-AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl
-cat > conftest.s <<\EOF
-	.arch armv8.2-a+sve
-	ptrue p0.b
-EOF
-if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
-  libc_cv_aarch64_sve_asm=yes
-else
-  libc_cv_aarch64_sve_asm=no
-fi
-rm -f conftest*])
-if test $libc_cv_aarch64_sve_asm = yes; then
-  AC_DEFINE(HAVE_AARCH64_SVE_ASM)
-fi
-
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
-# Check if compiler supports SVE ACLE.
-AC_CACHE_CHECK(for availability of SVE ACLE, libc_cv_aarch64_sve_acle, [dnl
-  cat > conftest.c <<EOF
-#include <arm_sve.h>
-EOF
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c]); then
-    libc_cv_aarch64_sve_acle=yes
-  else
-    libc_cv_aarch64_sve_acle=no
-  fi
-  rm conftest.c])
-
-# Check if compiler is sufficient to build mathvec
-if test $build_mathvec = yes; then
-  fail=no
-  if test $libc_cv_aarch64_variant_pcs = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but linker does not support variant PCS.])
-  fi
-  if test $libc_cv_aarch64_sve_asm = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but assembler does not support SVE.])
-  fi
-  if test $libc_cv_aarch64_sve_acle = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but compiler does not have SVE ACLE.])
-  fi
-  if test $fail = yes; then
-    AC_MSG_ERROR([use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI).])
-  fi
-else
+if test $build_mathvec = no; then
   AC_MSG_WARN([mathvec is disabled, this results in incomplete ABI.])
 fi
diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S
index 0c3ee40..e9e530c 100644
--- a/sysdeps/aarch64/crti.S
+++ b/sysdeps/aarch64/crti.S
@@ -65,7 +65,7 @@ call_weak_fn:
 	cbz	x0, 1f
 	b	PREINIT_FUNCTION
 1:
-	RET
+	ret
 	.size	call_weak_fn, .-call_weak_fn
 #endif
 
@@ -75,11 +75,7 @@ call_weak_fn:
 	.hidden	_init
 	.type	_init, %function
 _init:
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-#else
-	BTI_C
-#endif
+	paciasp
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
 #if PREINIT_FUNCTION_WEAK
@@ -94,10 +90,6 @@ _init:
 	.hidden	_fini
 	.type	_fini, %function
 _fini:
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-#else
-	BTI_C
-#endif
+	paciasp
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
diff --git a/sysdeps/aarch64/crtn.S b/sysdeps/aarch64/crtn.S
index b52b10e..653a548 100644
--- a/sysdeps/aarch64/crtn.S
+++ b/sysdeps/aarch64/crtn.S
@@ -41,14 +41,10 @@
 
 	.section .init,"ax",%progbits
 	ldp	x29, x30, [sp], 16
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-#endif
-	RET
+	autiasp
+	ret
 
 	.section .fini,"ax",%progbits
 	ldp	x29, x30, [sp], 16
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-#endif
-	RET
+	autiasp
+	ret
diff --git a/sysdeps/aarch64/dl-irel.h b/sysdeps/aarch64/dl-irel.h
index ae402bc..7bae3c3 100644
--- a/sysdeps/aarch64/dl-irel.h
+++ b/sysdeps/aarch64/dl-irel.h
@@ -21,11 +21,26 @@
 #define _DL_IREL_H
 
 #include <stdio.h>
-#include <unistd.h>
 #include <ldsodefs.h>
-#include <sysdep.h>
 #include <sys/ifunc.h>
 
+#define _IFUNC_ARG_SIZE_VER0 24 /* sizeof 1st published __ifunc_arg_t */
+#define _IFUNC_ARG_SIZE_VER1 40 /* sizeof 2nd published __ifunc_arg_t */
+
+#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+  (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER))
+
+_Static_assert (sizeof (__ifunc_arg_t) == _IFUNC_ARG_SIZE_VER1,
+  "sizeof (__ifunc_arg_t) != _IFUNC_ARG_SIZE_VER1");
+
+_Static_assert (_IFUNC_ARG_SIZE_VER1
+  == (_IFUNC_HWCAP_MAX + 1) * sizeof (unsigned long),
+  "_IFUNC_ARG_SIZE_VER1 and _IFUNC_HWCAP_MAX mismatch");
+
+#undef offsetofend
+#undef sizeof_field
+
 #define ELF_MACHINE_IRELA	1
 
 static inline ElfW(Addr)
@@ -37,6 +52,8 @@ elf_ifunc_invoke (ElfW(Addr) addr)
   arg._size = sizeof (arg);
   arg._hwcap = GLRO(dl_hwcap);
   arg._hwcap2 = GLRO(dl_hwcap2);
+  arg._hwcap3 = GLRO(dl_hwcap3);
+  arg._hwcap4 = GLRO(dl_hwcap4);
   return ((ElfW(Addr) (*) (uint64_t, const __ifunc_arg_t *)) (addr))
 	 (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg);
 }
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index fc40d66..2ff8d95 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,9 +74,9 @@
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
-	BTI_C
+	bti	c
 	ldr	x0, [x0, 8]
-	RET
+	ret
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
@@ -95,7 +95,7 @@ _dl_tlsdesc_return:
 	cfi_startproc
 	.align  2
 _dl_tlsdesc_undefweak:
-	BTI_C
+	bti	c
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
 	ldr	x0, [x0, 8]
@@ -103,7 +103,7 @@ _dl_tlsdesc_undefweak:
 	sub	x0, x0, x1
 	ldr	x1, [sp], #16
 	cfi_adjust_cfa_offset (-16)
-	RET
+	ret
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
@@ -141,12 +141,8 @@ _dl_tlsdesc_undefweak:
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_dynamic:
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# else
-	BTI_C
-# endif
+	paciasp
+	cfi_negate_ra_state
 
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
@@ -177,12 +173,10 @@ _dl_tlsdesc_dynamic:
 1:
 	ldp	 x3,  x4, [sp, #16]
 	ldp	 x1,  x2, [sp], #32
-# if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-# endif
+	autiasp
+	cfi_negate_ra_state
 	cfi_adjust_cfa_offset (-32)
-	RET
+	ret
 2:
 	/* This is the slow path. We need to call __tls_get_addr() which
 	   means we need to save and restore all the register that the
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index d6bed96..d628b01 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -34,7 +34,7 @@
 	cfi_startproc
 	.align 2
 _dl_runtime_resolve:
-	BTI_C
+	bti	c
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -127,12 +127,8 @@ _dl_runtime_resolve:
 	cfi_startproc
 	.align 2
 _dl_runtime_profile:
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# else
-	BTI_C
-# endif
+	paciasp
+	cfi_negate_ra_state
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -251,17 +247,12 @@ _dl_runtime_profile:
 	cfi_restore(x29)
 	cfi_restore(x30)
 
-# if HAVE_AARCH64_PAC_RET
 	add	sp, sp, SF_SIZE
 	cfi_adjust_cfa_offset (-SF_SIZE)
-	AUTIASP
-	cfi_window_save
+	autiasp
+	cfi_negate_ra_state
 	add	sp, sp, 16
 	cfi_adjust_cfa_offset (-16)
-# else
-	add	sp, sp, SF_SIZE + 16
-	cfi_adjust_cfa_offset (- SF_SIZE - 16)
-# endif
 
 	/* Jump to the newly found address.  */
 	br	ip0
@@ -321,10 +312,8 @@ _dl_runtime_profile:
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
 	cfi_restore(lr)
-# if HAVE_AARCH64_PAC_RET
 	/* Note: LR restored from La_aarch64_reg has no PAC.  */
-	cfi_window_save
-# endif
+	cfi_negate_ra_state
 	mov	sp, x29
 	cfi_def_cfa_register (sp)
 	ldr	x29, [x29, #0]
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index aadedf1..068c11c 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -1,10 +1,14 @@
 libmvec-supported-funcs = acos \
                           acosh \
+                          acospi \
                           asin \
                           asinh \
+                          asinpi \
                           atan \
                           atanh \
+                          atanpi \
                           atan2 \
+                          atan2pi \
                           cbrt \
                           cos \
                           cosh \
@@ -52,8 +56,11 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
                   v_powf_data
 endif
 
-sve-cflags = -march=armv8-a+sve
+# Enable SVE for building libmvec.  Since CFLAGS may contain a -mcpu or -march,
+# add a generic -mcpu and -march with SVE enabled.  Also use a tune for a modern
+# SVE core.
 
+sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1
 
 ifeq ($(build-mathvec),yes)
 bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 0f9503f..2980cb7 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -157,4 +157,26 @@ libmvec {
     _ZGVsMxv_tanpi;
     _ZGVsMxv_tanpif;
   }
+  GLIBC_2.42 {
+    _ZGVnN2v_acospi;
+    _ZGVnN2v_acospif;
+    _ZGVnN4v_acospif;
+    _ZGVsMxv_acospi;
+    _ZGVsMxv_acospif;
+    _ZGVnN2v_asinpi;
+    _ZGVnN2v_asinpif;
+    _ZGVnN4v_asinpif;
+    _ZGVsMxv_asinpi;
+    _ZGVsMxv_asinpif;
+    _ZGVnN2v_atanpi;
+    _ZGVnN2v_atanpif;
+    _ZGVnN4v_atanpif;
+    _ZGVsMxv_atanpi;
+    _ZGVsMxv_atanpif;
+    _ZGVnN2vv_atan2pi;
+    _ZGVnN2vv_atan2pif;
+    _ZGVnN4vv_atan2pif;
+    _ZGVsMxvv_atan2pi;
+    _ZGVsMxvv_atan2pif;
+  }
 }
diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
index 7709b54..453f780 100644
--- a/sysdeps/aarch64/fpu/acos_advsimd.c
+++ b/sysdeps/aarch64/fpu/acos_advsimd.c
@@ -18,24 +18,23 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
-  float64x2_t pi, pi_over_2;
+  double c1, c3, c5, c7, c9, c11;
+  float64x2_t c0, c2, c4, c6, c8, c10;
   uint64x2_t abs_mask;
+  float64x2_t pi, pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi = V2 (0x1.921fb54442d18p+1),
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+  .c0 = V2 (0x1.555555555554ep-3),     .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),   .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi = V2 (0x1.921fb54442d18p+1),     .pi_over_2 = V2 (0x1.921fb54442d18p+0),
   .abs_mask = V2 (0x7fffffffffffffff),
 };
 
@@ -63,7 +62,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
-   The largest observed error in this region is 1.18 ulps,
+   The largest observed error in this region is 1.18 ulp:
    _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
 				       want 0x1.0d54d1985c069p+0.
 
@@ -71,9 +70,9 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 1.52 ulps,
-   _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
-				       want 0x1.edbbedf8a7d6cp-1.  */
+   The largest observed error in this region is 1.50 ulp:
+   _ZGVnN2v_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
+				       want 0x1.ec1a46aa829p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -99,13 +98,32 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
   float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z3 = vmulq_f64 (z2, z);
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
-  float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
 
-  /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+  /* Order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p411 = vfmaq_f64 (p47, z8, p811);
+  float64x2_t p = vfmaq_f64 (p03, z8, p411);
+
+  /* Finalize polynomial: z + z3 * P(z2).  */
+  p = vfmaq_f64 (z, z3, p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c
index 74e2f7d..104f0d7 100644
--- a/sysdeps/aarch64/fpu/acos_sve.c
+++ b/sysdeps/aarch64/fpu/acos_sve.c
@@ -18,20 +18,21 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[12];
-  float64_t pi, pi_over_2;
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5,
-	    0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
-	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8,
-	    0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
-  .pi = 0x1.921fb54442d18p+1,
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
@@ -42,20 +43,21 @@ static const struct data
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
-   The largest observed error in this region is 1.18 ulps,
-   _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0
-				       want 0x1.0d4d0f55667f7p+0.
+   The largest observed error in this region is 1.18 ulp:
+   _ZGVsMxv_acos (0x1.fbb7c9079b429p-2) got 0x1.0d51266607582p+0
+				       want 0x1.0d51266607583p+0.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 1.52 ulps,
-   _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1
-				       want 0x1.ed82df4243f0bp-1.  */
+   The largest observed error in this region is 1.50 ulp:
+   _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
+				       want 0x1.ec1a46aa829p-1.  */
 svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
@@ -70,24 +72,41 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
-  svfloat64_t z4 = svmul_x (pg, z2, z2);
-  svfloat64_t z8 = svmul_x (pg, z4, z4);
-  svfloat64_t z16 = svmul_x (pg, z8, z8);
-  svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+  svfloat64_t z3 = svmul_x (ptrue, z2, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmad_x (pg, p411, z8, p03);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+  p = svmad_x (pg, p, z3, z);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
-  svfloat64_t y
-      = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign));
-
-  svbool_t is_neg = svcmplt (pg, x, 0.0);
-  svfloat64_t off = svdup_f64_z (is_neg, d->pi);
-  svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0));
-  svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2));
-
-  return svmla_x (pg, add, mul, y);
+  svfloat64_t mul = svreinterpret_f64 (
+      svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10));
+  mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign));
+  svfloat64_t add = svreinterpret_f64 (
+      svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->pi_over_2))));
+  add = svsub_m (a_gt_half, sv_f64 (d->pi_over_2), add);
+
+  return svmsb_x (pg, p, mul, add);
 }
diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c
index 326b2cc..3a84959 100644
--- a/sysdeps/aarch64/fpu/acosh_sve.c
+++ b/sysdeps/aarch64/fpu/acosh_sve.c
@@ -30,10 +30,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 }
 
 /* SVE approximation for double-precision acosh, based on log1p.
-   The largest observed error is 3.19 ULP in the region where the
+   The largest observed error is 3.14 ULP in the region where the
    argument to log1p falls in the k=0 interval, i.e. x close to 1:
-   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
-					   want 0x1.ed23399f51373p-2.  */
+   SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2
+					   want 0x1.ef0cee7c33ce4p-2.  */
 svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
 {
   /* (ix - One) >= (BigBound - One).  */
diff --git a/sysdeps/aarch64/fpu/acospi_advsimd.c b/sysdeps/aarch64/fpu/acospi_advsimd.c
new file mode 100644
index 0000000..bb6c209
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospi_advsimd.c
@@ -0,0 +1,118 @@
+/* Double-Precision vector (Advanced SIMD) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10;
+  uint64x2_t abs_mask;
+  float64x2_t one, inv_pi;
+  double c1, c3, c5, c7, c9, c11;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = V2 (0x1.b2995e7b7b5fbp-5),     .c1 = 0x1.8723a1d58d83p-6,
+  .c2 = V2 (0x1.d1a452eacf2fep-7),     .c3 = 0x1.3ce52c4d75582p-7,
+  .c4 = V2 (0x1.d2b2a0aea27d5p-8),     .c5 = 0x1.6a0b9b92cad8bp-8,
+  .c6 = V2 (0x1.2290c84438caep-8),     .c7 = 0x1.efba896580d02p-9,
+  .c8 = V2 (0x1.44446707af38p-9),      .c9 = 0x1.5070b3e7aa03ep-8,
+  .c10 = V2 (-0x1.c70015d0ebdafp-9),   .c11 = 0x1.27029c383fed9p-7,
+  .abs_mask = V2 (0x7fffffffffffffff), .one = V2 (1.0),
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+};
+
+/* Double-precision implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order-11 polynomial P to approximate asinpi
+   such that the final approximation of acospi is an odd polynomial:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.35 ulp:
+   _ZGVnN2v_acospi (0x1.fb16ed35a6d64p-2) got 0x1.5722a3dbcafb4p-2
+					 want 0x1.5722a3dbcafb5p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.55 ulp:
+   _ZGVnN2v_acospi (0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
+					 want 0x1.ffd43d5dd3a9bp-4.  */
+float64x2_t VPCS_ATTR NOINLINE V_NAME_D1 (acospi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t ia = vandq_u64 (ix, d->abs_mask);
+
+  float64x2_t ax = vreinterpretq_f64_u64 (ia);
+  uint64x2_t a_le_half = vcaltq_f64 (x, v_f64 (0.5));
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x),
+			      vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
+  float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p411 = vfmaq_f64 (p47, z8, p811);
+  float64x2_t p = vfmaq_f64 (p03, z8, p411);
+
+  /* Finalize polynomial: z + z * z2 * P(z2).  */
+  p = vfmaq_f64 (d->inv_pi, z2, p);
+  p = vmulq_f64 (p, z);
+
+  /* acospi(|x|)
+		= 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+		= 2 Q(|x|)              , for  0.5 < x < 1.0
+		= 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  float64x2_t y = vbslq_f64 (d->abs_mask, p, x);
+  uint64x2_t is_neg = vcltzq_f64 (x);
+  float64x2_t off = vreinterpretq_f64_u64 (
+      vandq_u64 (is_neg, vreinterpretq_u64_f64 (d->one)));
+  float64x2_t mul = vbslq_f64 (a_le_half, d->one, v_f64 (-2.0));
+  float64x2_t add = vbslq_f64 (a_le_half, v_f64 (0.5), off);
+
+  return vfmsq_f64 (add, mul, y);
+}
diff --git a/sysdeps/aarch64/fpu/acospi_sve.c b/sysdeps/aarch64/fpu/acospi_sve.c
new file mode 100644
index 0000000..e41eaad
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospi_sve.c
@@ -0,0 +1,112 @@
+/* Double-Precision vector (SVE) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t inv_pi, half;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = 0x1.b2995e7b7b5fbp-5,	  .c1 = 0x1.8723a1d58d83p-6,
+  .c2 = 0x1.d1a452eacf2fep-7,	  .c3 = 0x1.3ce52c4d75582p-7,
+  .c4 = 0x1.d2b2a0aea27d5p-8,	  .c5 = 0x1.6a0b9b92cad8bp-8,
+  .c6 = 0x1.2290c84438caep-8,	  .c7 = 0x1.efba896580d02p-9,
+  .c8 = 0x1.44446707af38p-9,	  .c9 = 0x1.5070b3e7aa03ep-8,
+  .c10 = -0x1.c70015d0ebdafp-9,	  .c11 = 0x1.27029c383fed9p-7,
+  .inv_pi = 0x1.45f306dc9c883p-2, .half = 0.5,
+};
+
+/* Double-precision SVE implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 11 polynomial P to approximate asinpi
+   such that the final approximation of acospi is:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.35 ulp:
+   _ZGVsMxv_acospi (0x1.fb014996aea18p-2) got 0x1.572a91755bbf6p-2
+					 want 0x1.572a91755bbf7p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.55 ulp:
+   _ZGVsMxv_acospi(0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
+					want 0x1.ffd43d5dd3a9bp-4.  */
+svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+  svfloat64_t ax = svabs_x (pg, x);
+  svbool_t a_gt_half = svacgt (pg, x, 0.5f);
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  svfloat64_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+			  svmul_x (ptrue, x, x));
+  svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
+  p = svmla_x (pg, sv_f64 (d->inv_pi), z2, p);
+  p = svmul_x (ptrue, p, z);
+
+  /* acospi(|x|) = 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+		 = 2 Q(|x|)              , for  0.5 < x < 1.0
+		 = 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  svfloat64_t mul = svreinterpret_f64 (
+      svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10));
+  mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign));
+  svfloat64_t add = svreinterpret_f64 (
+      svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->half))));
+  add = svsub_m (a_gt_half, sv_f64 (d->half), add);
+
+  return svmsb_x (pg, p, mul, add);
+}
diff --git a/sysdeps/aarch64/fpu/acospif_advsimd.c b/sysdeps/aarch64/fpu/acospif_advsimd.c
new file mode 100644
index 0000000..8486b62
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospif_advsimd.c
@@ -0,0 +1,106 @@
+/* Single-Precision vector (Advanced SIMD) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, inv_pi;
+  float c1, c3, c5, null;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = V4 (0x1.b2995ep-5f),	 .c1 = 0x1.8724ep-6f,
+  .c2 = V4 (0x1.d1301ep-7f),	 .c3 = 0x1.446d3cp-7f,
+  .c4 = V4 (0x1.654848p-8f),	 .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = V4 (0x1.45f306p-2f),
+};
+
+#define AbsMask 0x7fffffff
+
+/* Single-precision implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi
+   such that the final approximation of acospi is an odd polynomial:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.23 ulps,
+      _ZGVnN4v_acospif (0x1.fee13ep-2) got 0x1.55beb4p-2 want 0x1.55beb2p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.53 ulps,
+   _ZGVnN4v_acospif (0x1.6ad644p-1) got 0x1.fe8f96p-3
+				   want 0x1.fe8f9cp-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acospi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+  float32x4_t ax = vreinterpretq_f32_u32 (ia);
+  uint32x4_t a_le_half = vcaltq_f32 (x, v_f32 (0.5f));
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+
+  float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x),
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
+  float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+
+  /* Order-5 Estrin evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+  float32x4_t c135 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2);
+  float32x4_t p = vfmaq_f32 (p03, z8, p45);
+  /* Add 1/pi as final coeff.  */
+  p = vfmaq_f32 (d->inv_pi, z2, p);
+
+  /* Finalize polynomial: z * P(z^2).  */
+  p = vmulq_f32 (z, p);
+
+  /* acospi(|x|)
+			= 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+			= 2 Q(|x|)              , for  0.5 < x < 1.0
+			= 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+
+  float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x);
+  uint32x4_t is_neg = vcltzq_f32 (x);
+  float32x4_t off = vreinterpretq_f32_u32 (
+      vandq_u32 (vreinterpretq_u32_f32 (v_f32 (1.0f)), is_neg));
+  float32x4_t mul = vbslq_f32 (a_le_half, v_f32 (1.0f), v_f32 (-2.0f));
+  float32x4_t add = vbslq_f32 (a_le_half, v_f32 (0.5f), off);
+
+  return vfmsq_f32 (add, mul, y);
+}
+libmvec_hidden_def (V_NAME_F1 (acospi))
+HALF_WIDTH_ALIAS_F1 (acospi)
diff --git a/sysdeps/aarch64/fpu/acospif_sve.c b/sysdeps/aarch64/fpu/acospif_sve.c
new file mode 100644
index 0000000..ea4fc4a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospif_sve.c
@@ -0,0 +1,91 @@
+/* Single-Precision vector (SVE) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c0, c1, c2, c3, c4, inv_pi, half;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error.  */
+  .c0 = 0x1.b29968p-5f, .c1 = 0x1.871424p-6f, .c2 = 0x1.d56e44p-7f,
+  .c3 = 0x1.149bb8p-7f, .c4 = 0x1.8e07fep-7f, .inv_pi = 0x1.45f306p-2f,
+  .half = 0.5f,
+};
+
+/* Single-precision SVE implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi
+   such that the final approximation of acospi is:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+    The largest observed error in this region is 1.3 ulps,
+      _ZGVsMxv_acospif(0x1.ffa9d2p-2) got 0x1.557504p-2
+				     want 0x1.557502p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.61 ulps,
+   _ZGVsMxv_acospif (0x1.6b232ep-1) got 0x1.fe04bap-3
+				   want 0x1.fe04cp-3.  */
+svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+  svfloat32_t ax = svabs_x (pg, x);
+  svbool_t a_gt_half = svacgt (pg, x, 0.5f);
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  svfloat32_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f32 (0.5f), ax, 0.5f),
+			  svmul_x (ptrue, x, x));
+  svfloat32_t z = svsqrt_m (ax, a_gt_half, z2);
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  svfloat32_t p = svmla_x (pg, sv_f32 (d->c3), z2, d->c4);
+  p = svmad_x (pg, z2, p, d->c2);
+  p = svmad_x (pg, z2, p, d->c1);
+  p = svmad_x (pg, z2, p, d->c0);
+  /* Add 1/pi as final coeff.  */
+  p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p);
+  /* Finalize polynomial: z * P(z^2).  */
+  p = svmul_x (ptrue, z, p);
+
+  /* acospi(|x|)
+			  = 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+			  = 2 Q(|x|)              , for  0.5 < x < 1.0
+			  = 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  svfloat32_t y
+      = svreinterpret_f32 (svorr_x (ptrue, svreinterpret_u32 (p), sign));
+  svfloat32_t mul = svsel (a_gt_half, sv_f32 (2.0f), sv_f32 (-1.0f));
+  svfloat32_t add = svreinterpret_f32 (
+      svorr_x (ptrue, sign, svreinterpret_u32 (sv_f32 (d->half))));
+  add = svsub_m (a_gt_half, sv_f32 (d->half), add);
+
+  return svmad_x (pg, y, mul, add);
+}
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 38681a4..c202bda 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -19,10 +19,13 @@
 
 libmvec_hidden_proto (V_NAME_F1(acos));
 libmvec_hidden_proto (V_NAME_F1(acosh));
+libmvec_hidden_proto (V_NAME_F1(acospi));
 libmvec_hidden_proto (V_NAME_F1(asin));
 libmvec_hidden_proto (V_NAME_F1(asinh));
+libmvec_hidden_proto (V_NAME_F1(asinpi));
 libmvec_hidden_proto (V_NAME_F1(atan));
 libmvec_hidden_proto (V_NAME_F1(atanh));
+libmvec_hidden_proto (V_NAME_F1(atanpi));
 libmvec_hidden_proto (V_NAME_F1(cbrt));
 libmvec_hidden_proto (V_NAME_F1(cos));
 libmvec_hidden_proto (V_NAME_F1(cosh));
@@ -47,3 +50,4 @@ libmvec_hidden_proto (V_NAME_F1(tan));
 libmvec_hidden_proto (V_NAME_F1(tanh));
 libmvec_hidden_proto (V_NAME_F1(tanpi));
 libmvec_hidden_proto (V_NAME_F2(atan2));
+libmvec_hidden_proto (V_NAME_F2(atan2pi));
diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c
index 4142116..f74141c 100644
--- a/sysdeps/aarch64/fpu/asin_advsimd.c
+++ b/sysdeps/aarch64/fpu/asin_advsimd.c
@@ -18,24 +18,23 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
+  float64x2_t c0, c2, c4, c6, c8, c10;
   float64x2_t pi_over_2;
   uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-  .abs_mask = V2 (0x7fffffffffffffff),
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
@@ -68,8 +67,8 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				       want 0x1.110d7e85fdd53p-1.  */
+   _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -86,7 +85,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
     return special_case (x, x, AllMask);
 #endif
 
-  uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5));
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
@@ -99,7 +98,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
   float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+  /* order-11 estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c
index 9314466..975f408 100644
--- a/sysdeps/aarch64/fpu/asin_sve.c
+++ b/sysdeps/aarch64/fpu/asin_sve.c
@@ -18,45 +18,43 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[12];
-  float64_t pi_over_2f;
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4,
-	    0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6,
-	    0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
-	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,
-	    0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6,
-	    -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
-  .pi_over_2f = 0x1.921fb54442d18p+0,
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
-#define P(i) sv_f64 (d->poly[i])
-
 /* Double-precision SVE implementation of vector asin(x).
 
    For |x| in [0, 0.5], use an order 11 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
-   The largest observed error in this region is 0.52 ulps,
-   _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2
-				      want 0x1.ec13757305f26p-2.
-
-   For |x| in [0.5, 1.0], use same approximation with a change of variable
+   The largest observed error in this region is 0.98 ulp:
+   _ZGVsMxv_asin (0x1.d98f6a748ed8ap-2) got 0x1.ec4eb661a73d3p-2
+				       want 0x1.ec4eb661a73d2p-2.
 
-     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+   For |x| in [0.5, 1.0], use same approximation with a change of variable:
+   asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 2.69 ulps,
-   _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				      want 0x1.110d7e85fdd53p-1.  */
+   The largest observed error in this region is 2.66 ulp:
+   _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1
+				       want 0x1.10b9586f087abp-1.  */
 svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
@@ -70,17 +68,37 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
+  svfloat64_t z3 = svmul_x (pg, z2, z);
   svfloat64_t z4 = svmul_x (pg, z2, z2);
   svfloat64_t z8 = svmul_x (pg, z4, z4);
-  svfloat64_t z16 = svmul_x (pg, z8, z8);
-  svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  /* Order-11 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
   /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+  p = svmla_x (pg, z, z3, p);
 
-  /* asin(|x|) = Q(|x|)         , for |x| < 0.5
-	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
-  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f);
+  /* asin(|x|) = Q(|x|), for |x| <  0.5
+	    = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2);
 
-  /* Copy sign.  */
+  /* Reinsert the sign from the argument.  */
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
diff --git a/sysdeps/aarch64/fpu/asinf_advsimd.c b/sysdeps/aarch64/fpu/asinf_advsimd.c
index 52c7c0e..013936c 100644
--- a/sysdeps/aarch64/fpu/asinf_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinf_advsimd.c
@@ -18,22 +18,21 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
+  float32x4_t c0, c2, c4;
+  float c1, c3;
   float32x4_t pi_over_2f;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
-  .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
-	    V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
-  .pi_over_2f = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (0x1.55555ep-3f), .c1 = 0x1.33261ap-4f,
+  .c2 = V4 (0x1.70d7dcp-5f), .c3 = 0x1.b059dp-6f,
+  .c4 = V4 (0x1.3af7d8p-5f), .pi_over_2f = V4 (0x1.921fb6p+0f),
 };
 
 #define AbsMask 0x7fffffff
-#define Half 0x3f000000
 #define One 0x3f800000
 #define Small 0x39800000 /* 2^-12.  */
 
@@ -47,11 +46,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
 /* Single-precision implementation of vector asin(x).
 
-   For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
-   rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
-   following approximation.
 
-   For |x| in [Small, 0.5], use order 4 polynomial P such that the final
+   For |x| <0.5, use order 4 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
     The largest observed error in this region is 0.83 ulps,
@@ -80,24 +76,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x)
 #endif
 
   float32x4_t ax = vreinterpretq_f32_u32 (ia);
-  uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half));
+  uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
      z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
-			      vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
   float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
-  float32x4_t p = v_horner_4_f32 (z2, d->poly);
+
+  /* PW Horner 3 evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t c13 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c13, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c13, 1);
+  float32x4_t p = vfmaq_f32 (p23, d->c4, z4);
+  p = vfmaq_f32 (p01, p, z4);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   float32x4_t y
-      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0));
+      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0f));
 
   /* Copy sign.  */
   return vbslq_f32 (v_u32 (AbsMask), y, x);
diff --git a/sysdeps/aarch64/fpu/asinpi_advsimd.c b/sysdeps/aarch64/fpu/asinpi_advsimd.c
new file mode 100644
index 0000000..b11f98b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpi_advsimd.c
@@ -0,0 +1,109 @@
+/* Double-Precision vector (Advanced SIMD) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10;
+  float64x2_t pi_over_2, inv_pi;
+  uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+     on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+};
+
+/* Double-precision implementation of vector asinpi(x).
+
+   For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+   approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+   asinpi(x) = asin(x) * 1/pi.
+
+   The largest observed error in this region is 1.63 ulps,
+   _ZGVnN2v_asinpi (0x1.9125919fa617p-19) got 0x1.fec183497ea53p-21
+					 want 0x1.fec183497ea51p-21.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.04 ulps,
+   _ZGVnN2v_asinpi (0x1.0479b7bd98553p-1) got 0x1.5beebec797326p-3
+					 want 0x1.5beebec797329p-3.  */
+
+float64x2_t VPCS_ATTR V_NAME_D1 (asinpi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t ax = vabsq_f64 (x);
+
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x),
+			      vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
+  float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  /* order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
+
+  /* Finalize polynomial: z + z * z2 * P(z2).  */
+  p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+
+  /* asin(|x|) = Q(|x|)          , for |x| < 0.5
+	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  float64x2_t y = vbslq_f64 (a_lt_half, p, vfmsq_n_f64 (d->pi_over_2, p, 2.0));
+  /* asinpi(|x|) = asin(|x|) /pi.  */
+  y = vmulq_f64 (y, d->inv_pi);
+
+  /* Copy sign.  */
+  return vbslq_f64 (d->abs_mask, y, x);
+}
diff --git a/sysdeps/aarch64/fpu/asinpi_sve.c b/sysdeps/aarch64/fpu/asinpi_sve.c
new file mode 100644
index 0000000..71ef8ce
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpi_sve.c
@@ -0,0 +1,107 @@
+/* Double-Precision vector (SVE) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2, inv_pi;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+     on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = 0x1.921fb54442d18p+0, .inv_pi = 0x1.45f306dc9c883p-2,
+};
+
+/* Double-precision SVE implementation of vector asinpi(x).
+
+   For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+   approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+   The largest observed error in this region is 1.32 ulp:
+   _ZGVsMxv_asinpi (0x1.fc12356dbdefbp-2) got 0x1.5272e9658ba66p-3
+					 want 0x1.5272e9658ba64p-3
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable:
+  asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.48 ulp:
+   _ZGVsMxv_asinpi (0x1.03da0c2295424p-1) got 0x1.5b02b3dcafaefp-3
+					 want 0x1.5b02b3dcafaf2p-3.  */
+svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+  svfloat64_t ax = svabs_x (pg, x);
+  svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+			  svmul_x (ptrue, x, x));
+  svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  /* Order-11 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
+  /* Finalize polynomial: z + z3 * P(z2).  */
+  p = svmla_x (pg, z, z3, p);
+
+  /* asin(|x|) = Q(|x|)         , for |x| < 0.5
+	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2);
+
+  /* Reinsert the sign from the argument.  */
+  svfloat64_t inv_pi = svreinterpret_f64 (
+      svorr_x (pg, svreinterpret_u64 (sv_f64 (d->inv_pi)), sign));
+
+  return svmul_x (pg, y, inv_pi);
+}
diff --git a/sysdeps/aarch64/fpu/asinpif_advsimd.c b/sysdeps/aarch64/fpu/asinpif_advsimd.c
new file mode 100644
index 0000000..1483ea8
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpif_advsimd.c
@@ -0,0 +1,95 @@
+/* Single-Precision vector (Advanced SIMD) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, inv_pi;
+  float c1, c3, c5, null;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in Sollya file.  */
+  .c0 = V4 (0x1.b2995ep-5f),	 .c1 = 0x1.8724ep-6f,
+  .c2 = V4 (0x1.d1301ep-7f),	 .c3 = 0x1.446d3cp-7f,
+  .c4 = V4 (0x1.654848p-8f),	 .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = V4 (0x1.45f306p-2f),
+};
+
+#define AbsMask 0x7fffffff
+
+/* Single-precision implementation of vector asinpi(x).
+
+    For |x| < 0.5, use order 5 polynomial P such that the final
+   approximation is an odd polynomial: asinpif(x) ~ x/pi + x^3 P(x^2).
+
+    The largest observed error in this region is 1.68 ulps,
+      _ZGVnN4v_asinpif (0x1.86e514p-2) got 0x1.fea8c8p-4 want 0x1.fea8ccp-4.
+
+    For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+    asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.49 ulps,
+   _ZGVnN4v_asinpif(0x1.0d93fep-1) got 0x1.697aap-3 want 0x1.697a9ap-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asinpi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+  float32x4_t ax = vreinterpretq_f32_u32 (ia);
+  uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f));
+
+  /* Evaluate polynomial Q(x) = y/pi + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
+  float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+
+  /* Order-5 Estrin evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+  float32x4_t c135 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2);
+  float32x4_t p = vfmaq_f32 (p03, z8, p45);
+  /* Add 1/pi as final coeff.  */
+  p = vfmaq_f32 (d->inv_pi, z2, p);
+
+  /* Finalize polynomial: z * P(z2).  */
+  p = vmulq_f32 (z, p);
+
+  /*  asinpi(|x|) = Q(|x|), for |x| < 0.5
+	       =  1/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  float32x4_t y
+      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (v_f32 (0.5f), p, 2.0f));
+
+  /* Copy sign.  */
+  return vbslq_f32 (v_u32 (AbsMask), y, x);
+}
+libmvec_hidden_def (V_NAME_F1 (asinpi))
+HALF_WIDTH_ALIAS_F1 (asinpi)
diff --git a/sysdeps/aarch64/fpu/asinpif_sve.c b/sysdeps/aarch64/fpu/asinpif_sve.c
new file mode 100644
index 0000000..046b258
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpif_sve.c
@@ -0,0 +1,88 @@
+/* Single-Precision vector (SVE) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c1, c3, c5;
+  float32_t c0, c2, c4, inv_pi;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
+    [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
+  .c0 = 0x1.b2995ep-5f,	    .c1 = 0x1.8724ep-6f,  .c2 = 0x1.d1301ep-7f,
+  .c3 = 0x1.446d3cp-7f,	    .c4 = 0x1.654848p-8f, .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = 0x1.45f306p-2f,
+};
+
+/* Single-precision SVE implementation of vector asin(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P such that the final
+   approximation is an odd polynomial: asinpi(x) ~ x/pi + x^3 P(x^2).
+
+    The largest observed error in this region is 1.96 ulps:
+    _ZGVsMxv_asinpif (0x1.8e534ep-3) got 0x1.fe6ab4p-5
+				    want 0x1.fe6ab8p-5.
+
+    For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+    asinpi(x) = 1/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.46 ulps:
+   _ZGVsMxv_asinpif (0x1.0df892p-1) got 0x1.6a114cp-3
+				   want 0x1.6a1146p-3.  */
+svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+   z = x ^ 2 and y = |x|            , if |x| < 0.5
+   z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  svfloat32_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
+			  svmul_x (pg, x, x));
+  svfloat32_t z = svsqrt_m (ax, a_ge_half, z2);
+
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t c135_two = svld1rq (ptrue, &d->c1);
+
+  /* Order-5 Pairwise Horner evaluation scheme.  */
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, c135_two, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, c135_two, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, c135_two, 2);
+
+  svfloat32_t p25 = svmla_x (pg, p23, z4, p45);
+  svfloat32_t p = svmla_x (pg, p01, z4, p25);
+
+  /* Add 1/pi as final coeff.  */
+  p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p);
+  p = svmul_x (pg, p, z);
+
+  /*  asinpi(|x|) = Q(|x|), for |x| < 0.5
+	       =  1/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat32_t y = svmsb_m (a_ge_half, p, sv_f32 (2.0), 0.5);
+
+  /* Reinsert sign from argument.  */
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c
index 00b4a4f..a31d52f 100644
--- a/sysdeps/aarch64/fpu/atan2_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan2_advsimd.c
@@ -19,40 +19,38 @@
 
 #include "math_config.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
   float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
   float64x2_t pi_over_2;
-  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
-  uint64x2_t zeroinfnan, minustwo;
+  uint64x2_t zeroinfnan;
 } data = {
-  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-	      [2**-1022, 1.0].  */
-  .c0 = V2 (-0x1.5555555555555p-2),
-  .c1 = 0x1.99999999996c1p-3,
-  .c2 = V2 (-0x1.2492492478f88p-3),
-  .c3 = 0x1.c71c71bc3951cp-4,
-  .c4 = V2 (-0x1.745d160a7e368p-4),
-  .c5 = 0x1.3b139b6a88ba1p-4,
-  .c6 = V2 (-0x1.11100ee084227p-4),
-  .c7 = 0x1.e1d0f9696f63bp-5,
-  .c8 = V2 (-0x1.aebfe7b418581p-5),
-  .c9 = 0x1.842dbe9b0d916p-5,
-  .c10 = V2 (-0x1.5d30140ae5e99p-5),
-  .c11 = 0x1.338e31eb2fbbcp-5,
-  .c12 = V2 (-0x1.00e6eece7de8p-5),
-  .c13 = 0x1.860897b29e5efp-6,
-  .c14 = V2 (-0x1.0051381722a59p-6),
-  .c15 = 0x1.14e9dc19a4a4ep-7,
-  .c16 = V2 (-0x1.d0062b42fe3bfp-9),
-  .c17 = 0x1.17739e210171ap-10,
-  .c18 = V2 (-0x1.ab24da7be7402p-13),
-  .c19 = 0x1.358851160a528p-16,
+  /* Coefficients of polynomial P such that
+     atan(x)~x+x*P(x^2) on [2^-1022, 1.0].  */
+  .c0 = V2 (-0x1.555555555552ap-2),
+  .c1 = 0x1.9999999995aebp-3,
+  .c2 = V2 (-0x1.24924923923f6p-3),
+  .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = V2 (-0x1.745d11fb3d32bp-4),
+  .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = V2 (-0x1.110e6d985f496p-4),
+  .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = V2 (-0x1.ae644e28058c3p-5),
+  .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = V2 (-0x1.59d7f901566cbp-5),
+  .c11 = 0x1.2c982855ab069p-5,
+  .c12 = V2 (-0x1.eb49592998177p-6),
+  .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = V2 (-0x1.ca980345c4204p-7),
+  .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = V2 (-0x1.7ea70755b8eccp-9),
+  .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = V2 (-0x1.44a4b059b6f67p-13),
+  .c19 = 0x1.c4a45029e5a91p-17,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
   .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
-  .minustwo = V2 (0xc000000000000000),
 };
 
 #define SignMask v_u64 (0x8000000000000000)
@@ -77,10 +75,9 @@ zeroinfnan (uint64x2_t i, const struct data *d)
 }
 
 /* Fast implementation of vector atan2.
-   Maximum observed error is 2.8 ulps:
-   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
-	got 0x1.92d628ab678ccp-1
-       want 0x1.92d628ab678cfp-1.  */
+   Maximum observed error is 1.97 ulps:
+   _ZGVnN2vv_atan2 (0x1.42337dba73768p+5, 0x1.422d748cd3e29p+5)
+   got 0x1.9224810264efcp-1 want 0x1.9224810264efep-1.  */
 float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -101,26 +98,29 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
   uint64x2_t pred_xlt0 = vcltzq_f64 (x);
   uint64x2_t pred_aygtax = vcagtq_f64 (y, x);
 
-  /* Set up z for call to atan.  */
-  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
-  float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax);
-  float64x2_t z = vdivq_f64 (n, q);
-
-  /* Work out the correct shift.  */
-  float64x2_t shift
-      = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo));
-  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
-  shift = vmulq_f64 (shift, d->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+  /* Set up z for evaluation of atan.  */
+  float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0))));
+  float64x2_t shift2 = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (1.0))));
+  shift = vaddq_f64 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
   float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
+  float64x2_t z3 = vmulq_f64 (z2, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
 
   float64x2_t c13 = vld1q_f64 (&d->c1);
   float64x2_t c57 = vld1q_f64 (&d->c5);
@@ -128,45 +128,43 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
   float64x2_t c1315 = vld1q_f64 (&d->c13);
   float64x2_t c1719 = vld1q_f64 (&d->c17);
 
-  /* estrin_7.  */
+  /* Order-7 Estrin.  */
   float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
   float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
-  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
 
   float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
   float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
-  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
 
-  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
 
-  /* estrin_11.  */
+  /* Order-11 Estrin.  */
   float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
   float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
-  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
 
   float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
   float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
-  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+  float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415);
 
   float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
   float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
-  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+  float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819);
 
-  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
-  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+  float64x2_t p815 = vfmaq_f64 (p811, z8, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, z16, p1619);
 
-  float64x2_t ret = vfmaq_f64 (p07, p819, x8);
+  float64x2_t poly = vfmaq_f64 (p07, p819, z16);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
-  ret = vaddq_f64 (ret, shift);
+  float64x2_t ret = vfmaq_f64 (z, shift, d->pi_over_2);
+  ret = vfmaq_f64 (ret, z3, poly);
 
   if (__glibc_unlikely (v_any_u64 (special_cases)))
     return special_case (y, x, ret, sign_xy, special_cases);
 
   /* Account for the sign of x and y.  */
-  ret = vreinterpretq_f64_u64 (
+  return vreinterpretq_f64_u64 (
       veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
-
-  return ret;
 }
diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
index 163f613..9e2dd24 100644
--- a/sysdeps/aarch64/fpu/atan2_sve.c
+++ b/sysdeps/aarch64/fpu/atan2_sve.c
@@ -19,25 +19,25 @@
 
 #include "math_config.h"
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[20];
-  float64_t pi_over_2;
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
-  .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
-            0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-            -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
-            0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-            -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
-            0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-            -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
-  .pi_over_2 = 0x1.921fb54442d18p+0,
+  .c0 = -0x1.555555555552ap-2,	 .c1 = 0x1.9999999995aebp-3,
+  .c2 = -0x1.24924923923f6p-3,	 .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = -0x1.745d11fb3d32bp-4,	 .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = -0x1.110e6d985f496p-4,	 .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = -0x1.ae644e28058c3p-5,	 .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = -0x1.59d7f901566cbp-5,	 .c11 = 0x1.2c982855ab069p-5,
+  .c12 = -0x1.eb49592998177p-6,	 .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = -0x1.ca980345c4204p-7,	 .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = -0x1.7ea70755b8eccp-9,	 .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17,
 };
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
 static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
@@ -56,15 +56,17 @@ zeroinfnan (svuint64_t i, const svbool_t pg)
 }
 
 /* Fast implementation of SVE atan2. Errors are greatest when y and
-   x are reasonably close together. The greatest observed error is 2.28 ULP:
-   _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
-   got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1.  */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+   x are reasonably close together. The greatest observed error is 1.94 ULP:
+   _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5)
+   got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1.  */
+svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+				const svbool_t pg)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
 
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t iy = svreinterpret_u64 (y);
+  svbool_t ptrue = svptrue_b64 ();
 
   svbool_t cmp_x = zeroinfnan (ix, pg);
   svbool_t cmp_y = zeroinfnan (iy, pg);
@@ -81,32 +83,67 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
 
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
-  /* Set up z for call to atan.  */
-  svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
-  svfloat64_t d = svsel (pred_aygtax, ay, ax);
-  svfloat64_t z = svdiv_x (pg, n, d);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atan.  */
+  svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat64_t den = svsel (pred_aygtax, ay, ax);
+  svfloat64_t z = svdiv_x (pg, num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  svfloat64_t shift_mul = svreinterpret_f64 (
+      svorr_x (pg, sign_x, svreinterpret_u64 (sv_f64 (0x1.921fb54442d18p+0))));
   shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
-  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
-  shift = svmul_x (pg, shift, data_ptr->pi_over_2);
+  shift = svmla_x (pg, z, shift, shift_mul);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
   svfloat64_t z2 = svmul_x (pg, z, z);
-  svfloat64_t x2 = svmul_x (pg, z2, z2);
-  svfloat64_t x4 = svmul_x (pg, x2, x2);
-  svfloat64_t x8 = svmul_x (pg, x4, x4);
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+  svfloat64_t z16 = svmul_x (pg, z8, z8);
 
-  svfloat64_t ret = svmla_x (
-      pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly),
-      sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8);
+  /* Order-7 Estrin.  */
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
 
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat64_t z3 = svmul_x (pg, z2, z);
-  ret = svmla_x (pg, z, z3, ret);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, z8, p47);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+  svfloat64_t c1315 = svld1rq (ptrue, &d->c13);
+  svfloat64_t c1719 = svld1rq (ptrue, &d->c17);
 
-  ret = svadd_m (pg, ret, shift);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1);
+  svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415);
+
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0);
+  svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1);
+  svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819);
+
+  svfloat64_t p815 = svmla_x (pg, p811, z8, p1215);
+  svfloat64_t p819 = svmla_x (pg, p815, z16, p1619);
+
+  svfloat64_t poly = svmla_x (pg, p07, z16, p819);
+
+  /* y = shift + z + z^3 * P(z^2).  */
+  svfloat64_t ret = svmla_x (pg, shift, z3, poly);
 
   /* Account for the sign of x and y.  */
   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c
index e65406f..75d8738 100644
--- a/sysdeps/aarch64/fpu/atan2f_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c
@@ -18,22 +18,22 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
 
 static const struct data
 {
-  float32x4_t c0, pi_over_2, c4, c6, c2;
+  float32x4_t c0, c4, c6, c2;
   float c1, c3, c5, c7;
   uint32x4_t comp_const;
+  float32x4_t pi;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .c0 = V4 (-0x1.55555p-2f),	    .c1 = 0x1.99935ep-3f,
-  .c2 = V4 (-0x1.24051ep-3f),	    .c3 = 0x1.bd7368p-4f,
-  .c4 = V4 (-0x1.491f0ep-4f),	    .c5 = 0x1.93a2c0p-5f,
-  .c6 = V4 (-0x1.4c3c60p-6f),	    .c7 = 0x1.01fd88p-8f,
-  .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1),
+  .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3,
+  .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4,
+  .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5,
+  .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9,
+  .pi = V4 (0x1.921fb6p+1f), .comp_const = V4 (2 * 0x7f800000lu - 1),
 };
 
 #define SignMask v_u32 (0x80000000)
@@ -54,13 +54,13 @@ static inline uint32x4_t
 zeroinfnan (uint32x4_t i, const struct data *d)
 {
   /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
-  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const);
+  return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const);
 }
 
 /* Fast implementation of vector atan2f. Maximum observed error is
-   2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
-   _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
-						 want 0x1.967f00p-1.  */
+   2.13 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
+   _ZGVnN4vv_atan2f (0x1.14a9d4p-87, 0x1.0eb886p-87) got 0x1.97aea2p-1
+						    want 0x1.97ae9ep-1.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -81,28 +81,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
   uint32x4_t pred_xlt0 = vcltzq_f32 (x);
   uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
 
-  /* Set up z for call to atanf.  */
-  float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
-  float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax);
-  float32x4_t z = vdivq_f32 (n, q);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atanf.  */
+  float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
+  float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
-      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f))));
-  shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift);
-  shift = vmulq_f32 (shift, d->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
-     a standard implementation using z8 creates spurious underflow
-     in the very last fma (when z^8 is small enough).
-     Therefore, we split the last fma into a mul and an fma.
-     Horner and single-level Estrin have higher errors that exceed
-     threshold.  */
+      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f))));
+  float32x4_t shift2 = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f))));
+  shift = vaddq_f32 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
   float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z2, z);
   float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
 
   float32x4_t c1357 = vld1q_f32 (&d->c1);
+
   float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0);
   float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1);
   float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2);
@@ -110,10 +113,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
   float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
   float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
 
-  float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47));
+  float32x4_t poly = vfmaq_f32 (p03, z8, p47);
 
   /* y = shift + z * P(z^2).  */
-  ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift);
+  float32x4_t ret = vfmaq_f32 (z, shift, d->pi);
+  ret = vfmaq_f32 (ret, z3, poly);
 
   if (__glibc_unlikely (v_any_u32 (special_cases)))
     {
diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
index 5f26e2a..4d93419 100644
--- a/sysdeps/aarch64/fpu/atan2f_sve.c
+++ b/sysdeps/aarch64/fpu/atan2f_sve.c
@@ -18,18 +18,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 static const struct data
 {
-  float32_t poly[8];
+  float32_t c0, c2, c4, c6;
+  float32_t c1, c3, c5, c7;
   float32_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].  */
-  .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
-  .pi_over_2 = 0x1.921fb6p+0f,
+  .c0 = -0x1.5554dcp-2, .c1 = 0x1.9978ecp-3,  .c2 = -0x1.230a94p-3,
+  .c3 = 0x1.b4debp-4,	.c4 = -0x1.3550dap-4, .c5 = 0x1.61eebp-5,
+  .c6 = -0x1.0c17d4p-6, .c7 = 0x1.7ea694p-9,  .pi_over_2 = 0x1.921fb6p+0f,
 };
 
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
@@ -51,12 +51,14 @@ zeroinfnan (svuint32_t i, const svbool_t pg)
 
 /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 *
    P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum
-   observed error is 2.95 ULP:
-   _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
-						 want 0x1.967f00p-1.  */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+   observed error is 2.21 ULP:
+   _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1
+						 want 0x1.95ed36p-1.  */
+svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+				const svbool_t pg)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
 
   svuint32_t ix = svreinterpret_u32 (x);
   svuint32_t iy = svreinterpret_u32 (y);
@@ -76,29 +78,42 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
 
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
-  /* Set up z for call to atan.  */
-  svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
-  svfloat32_t d = svsel (pred_aygtax, ay, ax);
-  svfloat32_t z = svdiv_x (pg, n, d);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atanf.  */
+  svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat32_t den = svsel (pred_aygtax, ay, ax);
+  svfloat32_t z = svdiv_x (ptrue, num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
   shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
   shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
-  shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
 
   /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z3 = svmul_x (pg, z2, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
 
-  svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly);
+  svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1);
 
-  /* ret = shift + z + z^3 * P(z^2).  */
-  svfloat32_t z3 = svmul_x (pg, z2, z);
-  ret = svmla_x (pg, z, z3, ret);
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3);
 
-  ret = svadd_m (pg, ret, shift);
+  svfloat32_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat32_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat32_t poly = svmla_x (pg, p03, z8, p47);
+
+  /* ret = shift + z + z^3 * P(z^2).  */
+  svfloat32_t ret = svmla_x (pg, z, shift, sv_f32 (d->pi_over_2));
+  ret = svmla_x (pg, ret, z3, poly);
 
   /* Account for the sign of x and y.  */
 
diff --git a/sysdeps/aarch64/fpu/atan2pi_advsimd.c b/sysdeps/aarch64/fpu/atan2pi_advsimd.c
new file mode 100644
index 0000000..3cf231b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pi_advsimd.c
@@ -0,0 +1,175 @@
+/* Double-Precision vector (Advanced SIMD) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64x2_t c0;
+  uint64x2_t zeroinfnan;
+  float64x2_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+	      [2**-1022, 1.0].  */
+  .c0 = V2 (0x1.45f306dc9c883p-2),
+  .c1 = V2 (-0x1.b2995e7b7ba4ap-4),
+  .c2 = 0x1.04c26be3d2c1p-4,
+  .c3 = V2 (-0x1.7483759c17ea1p-5),
+  .c4 = 0x1.21bb95c315d57p-5,
+  .c5 = V2 (-0x1.da1bdc3d453f3p-6),
+  .c6 = 0x1.912d20459b4bfp-6,
+  .c7 = V2 (-0x1.5bbd4545cad1fp-6),
+  .c8 = 0x1.331b83bec30a1p-6,
+  .c9 = V2 (-0x1.13d6457f44de3p-6),
+  .c10 = 0x1.f8e802974db94p-7,
+  .c11 = V2 (-0x1.d7e173ab04a1ap-7),
+  .c12 = 0x1.bdfa47d6a4f28p-7,
+  .c13 = V2 (-0x1.9ba78f3232ceep-7),
+  .c14 = 0x1.5e6044590ab4fp-7,
+  .c15 = V2 (-0x1.01ccfdeb9f77fp-7),
+  .c16 = 0x1.345cf0d4eb1c1p-8,
+  .c17 = V2 (-0x1.19e5f00f67e3ap-9),
+  .c18 = 0x1.6d3035ac7625bp-11,
+  .c19 = V2 (-0x1.286bb9ae4ed79p-13),
+  .c20 = 0x1.c37ec36da0e1ap-17,
+  .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+#define OneOverPi v_f64 (0x1.45f306dc9c883p-2)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t y, float64x2_t x, float64x2_t ret,
+	      uint64x2_t sign_xy, uint64x2_t cmp)
+{
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+
+  /* Since we have no scalar fallback for atan2pi,
+     we can instead make a call to atan2f and divide by pi.  */
+  ret = v_call2_f64 (atan2, y, x, ret, cmp);
+
+  /* Only divide the special cases by pi, and leave the rest unchanged.  */
+  return vbslq_f64 (cmp, vmulq_f64 (ret, OneOverPi), ret);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint64x2_t
+zeroinfnan (uint64x2_t i, const struct data *d)
+{
+  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
+  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan);
+}
+
+/* Fast implementation of vector atan2pi.
+   Maximum observed error is 3.04 ulps:
+   _ZGVnN2vv_atan2pi (0x1.1e0733532ce28p+5, 0x1.2d803379cca1fp+5)
+   got 0x1.eed60c1e89317p-3 want 0x1.eed60c1e89314p-3.  */
+float64x2_t VPCS_ATTR V_NAME_D2 (atan2pi) (float64x2_t y, float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t iy = vreinterpretq_u64_f64 (y);
+
+  uint64x2_t special_cases
+      = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
+  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
+  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
+
+  float64x2_t ax = vabsq_f64 (x);
+  float64x2_t ay = vabsq_f64 (y);
+
+  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
+  uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax);
+
+  /* Set up z for evaluation of atanpi.  */
+  float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-1.0))));
+  float64x2_t shift2 = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (0.5))));
+  shift = vaddq_f64 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t z3 = vmulq_f64 (z2, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  float64x2_t c24 = vld1q_f64 (&d->c2);
+  float64x2_t c68 = vld1q_f64 (&d->c6);
+
+  /* Order-7 Estrin.  */
+  float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0);
+  float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1);
+  float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0);
+  float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1);
+
+  float64x2_t p14 = vfmaq_f64 (p12, z4, p34);
+  float64x2_t p58 = vfmaq_f64 (p56, z4, p78);
+  float64x2_t p18 = vfmaq_f64 (p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c1012 = vld1q_f64 (&d->c10);
+  float64x2_t c1416 = vld1q_f64 (&d->c14);
+  float64x2_t c1820 = vld1q_f64 (&d->c18);
+
+  float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0);
+  float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1);
+  float64x2_t p912 = vfmaq_f64 (p910, z4, p1112);
+
+  float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0);
+  float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1);
+  float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516);
+
+  float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0);
+  float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1);
+  float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920);
+
+  float64x2_t p916 = vfmaq_f64 (p912, z8, p1316);
+  float64x2_t p920 = vfmaq_f64 (p916, z16, p1720);
+
+  float64x2_t poly = vfmaq_f64 (p18, z16, p920);
+
+  /* y = shift + z * P(z^2).  */
+  float64x2_t ret = vfmaq_f64 (shift, z, d->c0);
+  ret = vfmaq_f64 (ret, z3, poly);
+
+  if (__glibc_unlikely (v_any_u64 (special_cases)))
+    return special_case (y, x, ret, sign_xy, special_cases);
+
+  /* Account for the sign of x and y.  */
+  return vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan2pi_sve.c b/sysdeps/aarch64/fpu/atan2pi_sve.c
new file mode 100644
index 0000000..f1d1f1c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pi_sve.c
@@ -0,0 +1,159 @@
+/* Double-Precision vector (SVE) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val;
+} data = {
+  /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on
+     [2^-1022, 1.0].  */
+  .c0 = 0x1.45f306dc9c883p-2,	.c1 = -0x1.b2995e7b7ba4ap-4,
+  .c2 = 0x1.04c26be3d2c1p-4,	.c3 = -0x1.7483759c17ea1p-5,
+  .c4 = 0x1.21bb95c315d57p-5,	.c5 = -0x1.da1bdc3d453f3p-6,
+  .c6 = 0x1.912d20459b4bfp-6,	.c7 = -0x1.5bbd4545cad1fp-6,
+  .c8 = 0x1.331b83bec30a1p-6,	.c9 = -0x1.13d6457f44de3p-6,
+  .c10 = 0x1.f8e802974db94p-7,	.c11 = -0x1.d7e173ab04a1ap-7,
+  .c12 = 0x1.bdfa47d6a4f28p-7,	.c13 = -0x1.9ba78f3232ceep-7,
+  .c14 = 0x1.5e6044590ab4fp-7,	.c15 = -0x1.01ccfdeb9f77fp-7,
+  .c16 = 0x1.345cf0d4eb1c1p-8,	.c17 = -0x1.19e5f00f67e3ap-9,
+  .c18 = 0x1.6d3035ac7625bp-11, .c19 = -0x1.286bb9ae4ed79p-13,
+  .c20 = 0x1.c37ec36da0e1ap-17, .shift_val = 0.5,
+};
+
+#define OneOverPi sv_f64 (0x1.45f306dc9c883p-2)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+static svfloat64_t NOINLINE
+special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
+	      const svbool_t cmp)
+{
+  ret = sv_call2_f64 (atan2, y, x, ret, cmp);
+  return svmul_f64_m (cmp, ret, OneOverPi);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+   of 0, infinity or nan.  */
+static inline svbool_t
+zeroinfnan (svuint64_t i, const svbool_t pg)
+{
+  return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+		  sv_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of SVE atan2pi.
+   Maximum observed error is 3.11 ulps:
+   _ZGVsMxvv_atan2pi (0x1.ef284a877f6b5p+6, 0x1.03fdde8242b17p+7)
+   got 0x1.f00f800163079p-3 want 0x1.f00f800163076p-3.  */
+svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
+				  const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t iy = svreinterpret_u64 (y);
+
+  svbool_t cmp_x = zeroinfnan (ix, pg);
+  svbool_t cmp_y = zeroinfnan (iy, pg);
+  svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+  svfloat64_t ax = svabs_x (pg, x);
+  svfloat64_t ay = svabs_x (pg, y);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t iay = svreinterpret_u64 (ay);
+
+  svuint64_t sign_x = sveor_x (pg, ix, iax);
+  svuint64_t sign_y = sveor_x (pg, iy, iay);
+  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+  svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+  /* Set up z for evaluation of atanpi.  */
+  svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat64_t den = svsel (pred_aygtax, ay, ax);
+  svfloat64_t z = svdiv_x (pg, num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  shift = svmul_x (ptrue, shift, sv_f64 (d->shift_val));
+  shift = svsel (pred_aygtax, sv_f64 (d->shift_val), shift);
+  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
+
+  /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
+  svfloat64_t z2 = svmul_x (pg, z, z);
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+  svfloat64_t z16 = svmul_x (pg, z8, z8);
+
+  /* Order-7 Estrin.  */
+  svfloat64_t c24 = svld1rq (ptrue, &d->c2);
+  svfloat64_t c68 = svld1rq (ptrue, &d->c6);
+
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1);
+  svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0);
+  svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1);
+
+  svfloat64_t p14 = svmla_x (pg, p12, z4, p34);
+  svfloat64_t p58 = svmla_x (pg, p56, z4, p78);
+  svfloat64_t p18 = svmla_x (pg, p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c1012 = svld1rq (ptrue, &d->c10);
+  svfloat64_t c1416 = svld1rq (ptrue, &d->c14);
+  svfloat64_t c1820 = svld1rq (ptrue, &d->c18);
+
+  svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0);
+  svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1);
+  svfloat64_t p912 = svmla_x (pg, p910, z4, p1112);
+
+  svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0);
+  svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1);
+  svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516);
+
+  svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0);
+  svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1);
+  svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920);
+
+  svfloat64_t p916 = svmla_x (pg, p912, z8, p1316);
+  svfloat64_t p920 = svmla_x (pg, p916, z16, p1720);
+
+  svfloat64_t poly = svmla_x (pg, p18, z16, p920);
+
+  svfloat64_t ret = svmla_x (pg, shift, z, sv_f64 (d->c0));
+  ret = svmla_x (pg, ret, z3, poly);
+
+  /* Account for the sign of x and y.  */
+  if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+    return special_case (
+	y, x,
+	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
+	cmp_xy);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan2pif_advsimd.c b/sysdeps/aarch64/fpu/atan2pif_advsimd.c
new file mode 100644
index 0000000..f1f542b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pif_advsimd.c
@@ -0,0 +1,138 @@
+/* Single-Precision vector (Advanced SIMD) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c1, c3, c5, c7;
+  float c2, c4, c6, c8;
+  float32x4_t c0;
+  uint32x4_t comp_const;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+     [2^-128, 1.0].
+     Generated using fpminimax between FLT_MIN and 1.  */
+  .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4),
+  .c2 = 0x1.0490e4p-4,	    .c3 = V4 (-0x1.70c272p-5),
+  .c4 = 0x1.0eef52p-5,	    .c5 = V4 (-0x1.6abbbap-6),
+  .c6 = 0x1.78157p-7,	    .c7 = V4 (-0x1.f0b406p-9),
+  .c8 = 0x1.2ae7fep-11,	    .comp_const = V4 (2 * 0x7f800000lu - 1),
+};
+
+#define SignMask v_u32 (0x80000000)
+#define OneOverPi v_f32 (0x1.45f307p-2)
+
+/* Special cases i.e. 0, infinity and nan (fall back to scalar calls).  */
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t y, float32x4_t x, float32x4_t ret,
+	      uint32x4_t sign_xy, uint32x4_t cmp)
+{
+  /* Account for the sign of y.  */
+  ret = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
+
+  /* Since we have no scalar fallback for atan2pif,
+     we can instead make a call to atan2f and divide by pi.  */
+  ret = v_call2_f32 (atan2f, y, x, ret, cmp);
+
+  /* Only divide the special cases by pi, and leave the rest unchanged.  */
+  return vbslq_f32 (cmp, vmulq_f32 (ret, OneOverPi), ret);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint32x4_t
+zeroinfnan (uint32x4_t i, const struct data *d)
+{
+  /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
+  return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const);
+}
+
+/* Fast implementation of vector atan2f. Maximum observed error is 2.89 ULP:
+   _ZGVnN4vv_atan2pif (0x1.bd397p+54, 0x1.e79a4ap+54) got 0x1.e2678ep-3
+						     want 0x1.e26794p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2pi) (float32x4_t y,
+						    float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t iy = vreinterpretq_u32_f32 (y);
+
+  uint32x4_t special_cases
+      = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint32x4_t sign_x = vandq_u32 (ix, SignMask);
+  uint32x4_t sign_y = vandq_u32 (iy, SignMask);
+  uint32x4_t sign_xy = veorq_u32 (sign_x, sign_y);
+
+  float32x4_t ax = vabsq_f32 (x);
+  float32x4_t ay = vabsq_f32 (y);
+
+  uint32x4_t pred_xlt0 = vcltzq_f32 (x);
+  uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
+
+  /* Set up z for evaluation of atanpif.  */
+  float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
+  float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  float32x4_t shift = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f))));
+  float32x4_t shift2 = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f))));
+  shift = vaddq_f32 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z2, z);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  float32x4_t c2468 = vld1q_f32 (&d->c2);
+
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, c2468, 0);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, c2468, 1);
+  float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, c2468, 2);
+  float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, c2468, 3);
+  float32x4_t p14 = vfmaq_f32 (p12, z4, p34);
+  float32x4_t p58 = vfmaq_f32 (p56, z4, p78);
+
+  float32x4_t poly = vfmaq_f32 (p14, z8, p58);
+
+  /* y = shift + z * P(z^2).  */
+  float32x4_t ret = vfmaq_f32 (shift, z, d->c0);
+  ret = vfmaq_f32 (ret, z3, poly);
+
+  if (__glibc_unlikely (v_any_u32 (special_cases)))
+    {
+      return special_case (y, x, ret, sign_xy, special_cases);
+    }
+
+  /* Account for the sign of y.  */
+  return vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
+}
+libmvec_hidden_def (V_NAME_F2 (atan2pi))
+HALF_WIDTH_ALIAS_F2 (atan2pi)
diff --git a/sysdeps/aarch64/fpu/atan2pif_sve.c b/sysdeps/aarch64/fpu/atan2pif_sve.c
new file mode 100644
index 0000000..d5ac4b7
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pif_sve.c
@@ -0,0 +1,137 @@
+/* Single-Precision vector (SVE) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c0, c1, c3, c5, c7;
+  float32_t c2, c4, c6, c8;
+  float32_t shift_val;
+  uint32_t comp_const;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+     [2**-128, 1.0].  */
+  .c0 = 0x1.45f306p-2,
+  .c1 = -0x1.b2975ep-4,
+  .c2 = 0x1.0490e4p-4,
+  .c3 = -0x1.70c272p-5,
+  .c4 = 0x1.0eef52p-5,
+  .c5 = -0x1.6abbbap-6,
+  .c6 = 0x1.78157p-7,
+  .c7 = -0x1.f0b406p-9,
+  .c8 = 0x1.2ae7fep-11,
+  .shift_val = 0.5f,
+  .comp_const = 2 * 0x7f800000lu - 1,
+};
+
+#define OneOverPi sv_f32 (0x1.45f307p-2)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+static svfloat32_t NOINLINE
+special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
+	      const svbool_t cmp)
+{
+  ret = sv_call2_f32 (atan2f, y, x, ret, cmp);
+  return svmul_f32_x (cmp, ret, OneOverPi);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+   of 0, infinity or nan.  */
+static inline svbool_t
+zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
+{
+  return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+		  sv_u32 (d->comp_const));
+}
+
+/* Fast implementation of SVE atan2pif based on atan(x) ~ shift + z + z^3 *
+   P(z^2) with reduction to [0,1] using z=1/x and shift = 1/2. Maximum
+   observed error is 2.90 ULP:
+   _ZGVsMxvv_atan2pif (0x1.a28542p+5, 0x1.adb7c6p+5) got 0x1.f76524p-3
+						    want 0x1.f7651ep-3.  */
+svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
+				  const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t ix = svreinterpret_u32 (x);
+  svuint32_t iy = svreinterpret_u32 (y);
+
+  svbool_t cmp_x = zeroinfnan (ix, pg, d);
+  svbool_t cmp_y = zeroinfnan (iy, pg, d);
+  svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svfloat32_t ay = svabs_x (pg, y);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t iay = svreinterpret_u32 (ay);
+
+  svuint32_t sign_x = sveor_x (pg, ix, iax);
+  svuint32_t sign_y = sveor_x (pg, iy, iay);
+  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+  svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+  /* Set up z for evaluation of atanpif.  */
+  svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat32_t den = svsel (pred_aygtax, ay, ax);
+  svfloat32_t z = svdiv_x (ptrue, num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
+  shift = svmul_x (ptrue, shift, sv_f32 (d->shift_val));
+  shift = svsel (pred_aygtax, sv_f32 (d->shift_val), shift);
+  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
+
+  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
+  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z4 = svmul_x (pg, z2, z2);
+  svfloat32_t z8 = svmul_x (pg, z4, z4);
+
+  svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2);
+
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1);
+  svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2);
+  svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3);
+
+  svfloat32_t p14 = svmad_x (pg, z4, p34, p12);
+  svfloat32_t p58 = svmad_x (pg, z4, p78, p56);
+
+  svfloat32_t p18 = svmad_x (pg, z8, p58, p14);
+
+  /* ret = shift + z + z^3 * P(z^2).  */
+  svfloat32_t poly = svmad_x (pg, z2, p18, d->c0);
+  svfloat32_t ret = svmad_x (pg, poly, z, shift);
+
+  if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+    return special_case (
+	y, x,
+	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
+	cmp_xy);
+
+  /* Account for the sign of x and y.  */
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c
index f024fd1..da0d371 100644
--- a/sysdeps/aarch64/fpu/atan_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan_advsimd.c
@@ -18,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
@@ -28,16 +27,16 @@ static const struct data
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
 	      [2**-1022, 1.0].  */
-  .c0 = V2 (-0x1.5555555555555p-2),	  .c1 = 0x1.99999999996c1p-3,
-  .c2 = V2 (-0x1.2492492478f88p-3),	  .c3 = 0x1.c71c71bc3951cp-4,
-  .c4 = V2 (-0x1.745d160a7e368p-4),	  .c5 = 0x1.3b139b6a88ba1p-4,
-  .c6 = V2 (-0x1.11100ee084227p-4),	  .c7 = 0x1.e1d0f9696f63bp-5,
-  .c8 = V2 (-0x1.aebfe7b418581p-5),	  .c9 = 0x1.842dbe9b0d916p-5,
-  .c10 = V2 (-0x1.5d30140ae5e99p-5),	  .c11 = 0x1.338e31eb2fbbcp-5,
-  .c12 = V2 (-0x1.00e6eece7de8p-5),	  .c13 = 0x1.860897b29e5efp-6,
-  .c14 = V2 (-0x1.0051381722a59p-6),	  .c15 = 0x1.14e9dc19a4a4ep-7,
-  .c16 = V2 (-0x1.d0062b42fe3bfp-9),	  .c17 = 0x1.17739e210171ap-10,
-  .c18 = V2 (-0x1.ab24da7be7402p-13),	  .c19 = 0x1.358851160a528p-16,
+  .c0 = V2 (-0x1.555555555552ap-2),	  .c1 = 0x1.9999999995aebp-3,
+  .c2 = V2 (-0x1.24924923923f6p-3),	  .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = V2 (-0x1.745d11fb3d32bp-4),	  .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = V2 (-0x1.110e6d985f496p-4),	  .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = V2 (-0x1.ae644e28058c3p-5),	  .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = V2 (-0x1.59d7f901566cbp-5),	  .c11 = 0x1.2c982855ab069p-5,
+  .c12 = V2 (-0x1.eb49592998177p-6),	  .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = V2 (-0x1.ca980345c4204p-7),	  .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = V2 (-0x1.7ea70755b8eccp-9),	  .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = V2 (-0x1.44a4b059b6f67p-13),	  .c19 = 0x1.c4a45029e5a91p-17,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
 };
 
@@ -47,9 +46,9 @@ static const struct data
 
 /* Fast implementation of vector atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
-   z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
-   _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
-				       want 0x1.9225645bdd7c3p-1.  */
+   z=1/x and shift = pi/2. Maximum observed error is 2.45 ulps:
+   _ZGVnN2v_atan (0x1.0008d737eb3e6p+0) got 0x1.92288c551a4c1p-1
+				       want 0x1.92288c551a4c3p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -78,59 +77,53 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0));
+  uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0));
   /* Avoid dependency in abs(x) in division (and comparison).  */
-  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x);
+  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x);
+
   float64x2_t shift = vreinterpretq_f64_u64 (
       vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2)));
-  /* Use absolute value only when needed (odd powers of z).  */
-  float64x2_t az = vbslq_f64 (
-      SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (shift), sign));
+
+  /* Calculate polynomial approximation P(z^2) with deg(P)=19.  */
   float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
 
-  /* estrin_7.  */
+  /* Order-7 Estrin.  */
   float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
   float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
-  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
 
   float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
   float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
-  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
 
-  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
 
-  /* estrin_11.  */
+  /* Order-11 Estrin.  */
   float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
   float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
-  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
 
   float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
   float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
-  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+  float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415);
 
   float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
   float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
-  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+  float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819);
 
-  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
-  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+  float64x2_t p815 = vfmaq_f64 (p811, z8, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, z16, p1619);
 
-  float64x2_t y = vfmaq_f64 (p07, p819, x8);
+  float64x2_t y = vfmaq_f64 (p07, p819, z16);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  y = vfmaq_f64 (az, y, vmulq_f64 (z2, az));
-  y = vaddq_f64 (y, shift);
-
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign));
-  return y;
+  y = vfmsq_f64 (v_f64 (-1.0), z2, y);
+  return vfmsq_f64 (shift, z, y);
 }
diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c
index 3880ced..a6b0489 100644
--- a/sysdeps/aarch64/fpu/atan_sve.c
+++ b/sysdeps/aarch64/fpu/atan_sve.c
@@ -18,23 +18,26 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[20];
-  float64_t pi_over_2;
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val, neg_one;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
-  .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
-            0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-            -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
-            0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-            -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
-            0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-            -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
-  .pi_over_2 = 0x1.921fb54442d18p+0,
+  .c0 = -0x1.555555555552ap-2,	     .c1 = 0x1.9999999995aebp-3,
+  .c2 = -0x1.24924923923f6p-3,	     .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = -0x1.745d11fb3d32bp-4,	     .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = -0x1.110e6d985f496p-4,	     .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = -0x1.ae644e28058c3p-5,	     .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = -0x1.59d7f901566cbp-5,	     .c11 = 0x1.2c982855ab069p-5,
+  .c12 = -0x1.eb49592998177p-6,	     .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = -0x1.ca980345c4204p-7,	     .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = -0x1.7ea70755b8eccp-9,	     .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = -0x1.44a4b059b6f67p-13,     .c19 = 0x1.c4a45029e5a91p-17,
+  .shift_val = 0x1.490fdaa22168cp+1, .neg_one = -1,
 };
 
 /* Useful constants.  */
@@ -43,15 +46,14 @@ static const struct data
 /* Fast implementation of SVE atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
-   error is 2.27 ulps:
-   _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
-				       want 0x1.9225645bdd7c3p-1.  */
+   error is 2.08 ulps:
+   _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1
+				       want 0x1.922a3163e15c4p-1.  */
 svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* No need to trigger special case. Small cases, infs and nans
-     are supported by our approximation technique.  */
+  svbool_t ptrue = svptrue_b64 ();
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t sign = svand_x (pg, ix, SignMask);
 
@@ -59,32 +61,60 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  svbool_t red = svacgt (pg, x, 1.0);
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x);
-  /* Use absolute value only when needed (odd powers of z).  */
-  svfloat64_t az = svabs_x (pg, z);
-  az = svneg_m (az, red, az);
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat64_t shift
+      = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign));
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
-  svfloat64_t z2 = svmul_x (pg, z, z);
-  svfloat64_t x2 = svmul_x (pg, z2, z2);
-  svfloat64_t x4 = svmul_x (pg, x2, x2);
-  svfloat64_t x8 = svmul_x (pg, x4, x4);
+  svfloat64_t z2 = svmul_x (ptrue, z, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+  svfloat64_t z16 = svmul_x (ptrue, z8, z8);
 
-  svfloat64_t y
-      = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly),
-		 sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8);
+  /* Order-7 Estrin.  */
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
 
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat64_t z3 = svmul_x (pg, z2, az);
-  y = svmla_x (pg, az, z3, y);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, z8, p47);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+  svfloat64_t c1315 = svld1rq (ptrue, &d->c13);
+  svfloat64_t c1719 = svld1rq (ptrue, &d->c17);
 
-  /* Apply shift as indicated by `red` predicate.  */
-  y = svadd_m (red, y, d->pi_over_2);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
 
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1);
+  svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415);
 
-  return y;
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0);
+  svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1);
+  svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819);
+
+  svfloat64_t p815 = svmla_x (pg, p811, z8, p1215);
+  svfloat64_t p819 = svmla_x (pg, p815, z16, p1619);
+
+  svfloat64_t y = svmla_x (pg, p07, z16, p819);
+
+  /* y = shift + z + z^3 * P(z^2).  */
+  shift = svadd_m (red, z, shift);
+  y = svmul_x (pg, z2, y);
+  return svmla_x (pg, shift, z, y);
 }
diff --git a/sysdeps/aarch64/fpu/atanf_advsimd.c b/sysdeps/aarch64/fpu/atanf_advsimd.c
index 472865e..817a47e 100644
--- a/sysdeps/aarch64/fpu/atanf_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanf_advsimd.c
@@ -22,26 +22,35 @@
 
 static const struct data
 {
+  uint32x4_t sign_mask, pi_over_2;
+  float32x4_t neg_one;
+#if WANT_SIMD_EXCEPT
   float32x4_t poly[8];
-  float32x4_t pi_over_2;
+} data = {
+  .poly = { V4 (-0x1.5554dcp-2), V4 (0x1.9978ecp-3), V4 (-0x1.230a94p-3),
+	    V4 (0x1.b4debp-4), V4 (-0x1.3550dap-4), V4 (0x1.61eebp-5),
+	    V4 (-0x1.0c17d4p-6), V4 (0x1.7ea694p-9) },
+#else
+  float32x4_t c0, c2, c4, c6;
+  float c1, c3, c5, c7;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
-	    V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
-	    V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
-  .pi_over_2 = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (-0x1.5554dcp-2),	.c1 = 0x1.9978ecp-3,
+  .c2 = V4 (-0x1.230a94p-3),	.c3 = 0x1.b4debp-4,
+  .c4 = V4 (-0x1.3550dap-4),	.c5 = 0x1.61eebp-5,
+  .c6 = V4 (-0x1.0c17d4p-6),	.c7 = 0x1.7ea694p-9,
+#endif
+  .pi_over_2 = V4 (0x3fc90fdb),
+  .neg_one = V4 (-1.0f),
+  .sign_mask = V4 (0x80000000),
 };
 
-#define SignMask v_u32 (0x80000000)
-
-#define P(i) d->poly[i]
-
+#if WANT_SIMD_EXCEPT
 #define TinyBound 0x30800000 /* asuint(0x1p-30).  */
 #define BigBound 0x4e800000  /* asuint(0x1p30).  */
 
-#if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 {
@@ -51,19 +60,20 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
 /* Fast implementation of vector atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
-   using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
-   _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1.  */
+   using z=-1/x and shift = pi/2. Maximum observed error is 2.02 ulps:
+   _ZGVnN4v_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
+				 want 0x1.95ed36p-1.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Small cases, infs and nans are supported by our approximation technique,
-     but do not set fenv flags correctly. Only trigger special case if we need
-     fenv.  */
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint32x4_t sign = vandq_u32 (ix, SignMask);
+  uint32x4_t sign = vandq_u32 (ix, d->sign_mask);
 
 #if WANT_SIMD_EXCEPT
+  /* Small cases, infs and nans are supported by our approximation technique,
+     but do not set fenv flags correctly. Only trigger special case if we need
+     fenv.  */
   uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000));
   uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)),
 				  v_u32 (BigBound - TinyBound));
@@ -71,41 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
   if (__glibc_unlikely (v_any_u32 (special)))
     return special_case (x, x, v_u32 (-1));
 #endif
-
   /* Argument reduction:
-     y := arctan(x) for x < 1
-     y := pi/2 + arctan(-1/x) for x > 1
-     Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0));
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x);
+     y := arctan(x) for |x| < 1
+     y := arctan(-1/x) + pi/2 for x > +1
+     y := arctan(-1/x) - pi/2 for x < -1
+     Hence, use z=-1/a if x>=|-1|, otherwise z=a.  */
+  uint32x4_t red = vcagtq_f32 (x, d->neg_one);
+
+  float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x);
+
+  /* Shift is calculated as +-pi/2 or 0, depending on the argument case.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
-      vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2)));
-  /* Use absolute value only when needed (odd powers of z).  */
-  float32x4_t az = vbslq_f32 (
-      SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z);
+      vandq_u32 (red, veorq_u32 (d->pi_over_2, sign)));
+
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z, z2);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+#if WANT_SIMD_EXCEPT
 
   /* Calculate the polynomial approximation.
      Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
      a standard implementation using z8 creates spurious underflow
      in the very last fma (when z^8 is small enough).
-     Therefore, we split the last fma into a mul and an fma.
-     Horner and single-level Estrin have higher errors that exceed
-     threshold.  */
-  float32x4_t z2 = vmulq_f32 (z, z);
-  float32x4_t z4 = vmulq_f32 (z2, z2);
-
+     Therefore, we split the last fma into a mul and an fma.  */
   float32x4_t y = vfmaq_f32 (
       v_pairwise_poly_3_f32 (z2, z4, d->poly), z4,
       vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4)));
 
-  /* y = shift + z * P(z^2).  */
-  y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift);
+#else
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  /* Uses an Estrin scheme for polynomial approximation.  */
+  float32x4_t odd_coeffs = vld1q_f32 (&d->c1);
+
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, odd_coeffs, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, odd_coeffs, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, odd_coeffs, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, odd_coeffs, 3);
 
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign));
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
 
-  return y;
+  float32x4_t y = vfmaq_f32 (p03, z8, p47);
+#endif
+
+  /* y = shift + z * P(z^2).  */
+  return vfmaq_f32 (vaddq_f32 (shift, z), z3, y);
 }
 libmvec_hidden_def (V_NAME_F1 (atan))
 HALF_WIDTH_ALIAS_F1 (atan)
diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c
index 3a98d70..6558223 100644
--- a/sysdeps/aarch64/fpu/atanf_sve.c
+++ b/sysdeps/aarch64/fpu/atanf_sve.c
@@ -18,18 +18,26 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 static const struct data
 {
-  float32_t poly[8];
-  float32_t pi_over_2;
+  float32_t c1, c3, c5, c7;
+  float32_t c0, c2, c4, c6;
+  float32_t shift_val, neg_one;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
     [2**-128, 1.0].  */
-  .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
-  .pi_over_2 = 0x1.921fb6p+0f,
+  .c0 = -0x1.5554dcp-2,
+  .c1 = 0x1.9978ecp-3,
+  .c2 = -0x1.230a94p-3,
+  .c3 = 0x1.b4debp-4,
+  .c4 = -0x1.3550dap-4,
+  .c5 = 0x1.61eebp-5,
+  .c6 = -0x1.0c17d4p-6,
+  .c7 = 0x1.7ea694p-9,
+  /*  pi/2, used as a shift value after reduction.  */
+  .shift_val = 0x1.921fb54442d18p+0,
+  .neg_one = -1.0f,
 };
 
 #define SignMask (0x80000000)
@@ -37,43 +45,49 @@ static const struct data
 /* Fast implementation of SVE atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=-1/x and shift = pi/2.
-   Largest observed error is 2.9 ULP, close to +/-1.0:
-   _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1
-				 want -0x1.967fp-1.  */
+   Largest observed error is 2.12 ULP:
+   _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
+				 want 0x1.95ed36p-1.  */
 svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
 
   /* No need to trigger special case. Small cases, infs and nans
      are supported by our approximation technique.  */
   svuint32_t ix = svreinterpret_u32 (x);
-  svuint32_t sign = svand_x (pg, ix, SignMask);
+  svuint32_t sign = svand_x (ptrue, ix, SignMask);
 
   /* Argument reduction:
      y := arctan(x) for x < 1
-     y := pi/2 + arctan(-1/x) for x > 1
-     Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  svbool_t red = svacgt (pg, x, 1.0f);
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x);
-  /* Use absolute value only when needed (odd powers of z).  */
-  svfloat32_t az = svabs_x (pg, z);
-  az = svneg_m (az, red, az);
-
-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
-  svfloat32_t z4 = svmul_x (pg, z2, z2);
-  svfloat32_t z8 = svmul_x (pg, z4, z4);
-
-  svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly);
-
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat32_t z3 = svmul_x (pg, z2, az);
-  y = svmla_x (pg, az, z3, y);
-
-  /* Apply shift as indicated by 'red' predicate.  */
-  y = svadd_m (red, y, sv_f32 (d->pi_over_2));
-
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+     y := arctan(-1/x) + pi/2 for x > +1
+     y := arctan(-1/x) - pi/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (d->neg_one), x), x);
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  svfloat32_t shift = svreinterpret_f32 (
+      sveor_x (red, svreinterpret_u32 (sv_f32 (d->shift_val)), sign));
+
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z3 = svmul_x (ptrue, z2, z);
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1);
+
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3);
+
+  svfloat32_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat32_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat32_t y = svmla_x (pg, p03, z8, p47);
+
+  /* shift + z + z^3 * P(z^2).  */
+  shift = svadd_m (red, z, shift);
+  return svmla_x (pg, shift, z3, y);
 }
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
index 16a7cf6..958d69a 100644
--- a/sysdeps/aarch64/fpu/atanh_sve.c
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -30,7 +30,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 }
 
 /* SVE approximation for double-precision atanh, based on log1p.
-   The greatest observed error is 2.81 ULP:
+   The greatest observed error is 3.3 ULP:
    _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
 				      want 0x1.ffd8ff31b501cp-6.  */
 svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
@@ -42,7 +42,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
 
   /* It is special if iax >= 1.  */
-//   svbool_t special = svcmpge (pg, iax, One);
   svbool_t special = svacge (pg, x, 1.0);
 
   /* Computation is performed based on the following sequence of equality:
diff --git a/sysdeps/aarch64/fpu/atanpi_advsimd.c b/sysdeps/aarch64/fpu/atanpi_advsimd.c
new file mode 100644
index 0000000..9101419
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpi_advsimd.c
@@ -0,0 +1,117 @@
+/* Double-Precision vector (Advanced SIMD) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  double c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64x2_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+} data = {
+  /* Coefficients of polynomial P such that atanpi(x)~x*P(x^2) on
+	      [2^-1022, 1.0].  */
+  .c0 = V2 (0x1.45f306dc9c883p-2), .c1 = V2 (-0x1.b2995e7b7ba4ap-4),
+  .c2 = 0x1.04c26be3d2c1p-4,	   .c3 = V2 (-0x1.7483759c17ea1p-5),
+  .c4 = 0x1.21bb95c315d57p-5,	   .c5 = V2 (-0x1.da1bdc3d453f3p-6),
+  .c6 = 0x1.912d20459b4bfp-6,	   .c7 = V2 (-0x1.5bbd4545cad1fp-6),
+  .c8 = 0x1.331b83bec30a1p-6,	   .c9 = V2 (-0x1.13d6457f44de3p-6),
+  .c10 = 0x1.f8e802974db94p-7,	   .c11 = V2 (-0x1.d7e173ab04a1ap-7),
+  .c12 = 0x1.bdfa47d6a4f28p-7,	   .c13 = V2 (-0x1.9ba78f3232ceep-7),
+  .c14 = 0x1.5e6044590ab4fp-7,	   .c15 = V2 (-0x1.01ccfdeb9f77fp-7),
+  .c16 = 0x1.345cf0d4eb1c1p-8,	   .c17 = V2 (-0x1.19e5f00f67e3ap-9),
+  .c18 = 0x1.6d3035ac7625bp-11,	   .c19 = V2 (-0x1.286bb9ae4ed79p-13),
+  .c20 = 0x1.c37ec36da0e1ap-17,
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Fast implementation of vector atanpi.
+   atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1] using
+   z=1/x and shift = +-1/2. Maximum observed error is 2.76 ulps:
+   _ZGVnN2v_atanpi(0x1.fa2d6912cd64fp-1) got 0x1.fc45a51bd497fp-3
+					want 0x1.fc45a51bd497cp-3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (atanpi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t sign = vandq_u64 (ix, SignMask);
+
+  /* Argument Reduction:
+     y := arctanpi(x) for |x| < 1
+     y := arctanpi(-1/x) + 1/2 for x > 1
+     y := arctanpi(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0));
+  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x);
+
+  /* Shift is calculated as +1/2 or 0, depending on the argument case.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (red, vreinterpretq_u64_f64 (v_f64 (0.5))));
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (shift), sign));
+
+  /* Calculate polynomial approximation P(z^2) with deg(P)=19.  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  float64x2_t c24 = vld1q_f64 (&d->c2);
+  float64x2_t c68 = vld1q_f64 (&d->c6);
+
+  /* Order-7 Estrin.  */
+  float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0);
+  float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1);
+  float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0);
+  float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1);
+
+  float64x2_t p14 = vfmaq_f64 (p12, z4, p34);
+  float64x2_t p58 = vfmaq_f64 (p56, z4, p78);
+  float64x2_t p18 = vfmaq_f64 (p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c1012 = vld1q_f64 (&d->c10);
+  float64x2_t c1416 = vld1q_f64 (&d->c14);
+  float64x2_t c1820 = vld1q_f64 (&d->c18);
+
+  float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0);
+  float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1);
+  float64x2_t p912 = vfmaq_f64 (p910, z4, p1112);
+
+  float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0);
+  float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1);
+  float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516);
+
+  float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0);
+  float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1);
+  float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920);
+
+  float64x2_t p916 = vfmaq_f64 (p912, z8, p1316);
+  float64x2_t p920 = vfmaq_f64 (p916, z16, p1720);
+
+  float64x2_t y = vfmaq_f64 (p18, p920, z16);
+
+  y = vfmaq_f64 (d->c0, z2, y);
+
+  /* y = shift + z * p(z^2).  */
+  return vfmaq_f64 (shift, z, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanpi_sve.c b/sysdeps/aarch64/fpu/atanpi_sve.c
new file mode 100644
index 0000000..3f8f277
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpi_sve.c
@@ -0,0 +1,127 @@
+/* Double-Precision vector (SVE) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val, neg_one;
+} data = {
+  /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on
+     [2^-1022, 1.0].  */
+  .c0 = 0x1.45f306dc9c883p-2,
+  .c1 = -0x1.b2995e7b7ba4ap-4,
+  .c2 = 0x1.04c26be3d2c1p-4,
+  .c3 = -0x1.7483759c17ea1p-5,
+  .c4 = 0x1.21bb95c315d57p-5,
+  .c5 = -0x1.da1bdc3d453f3p-6,
+  .c6 = 0x1.912d20459b4bfp-6,
+  .c7 = -0x1.5bbd4545cad1fp-6,
+  .c8 = 0x1.331b83bec30a1p-6,
+  .c9 = -0x1.13d6457f44de3p-6,
+  .c10 = 0x1.f8e802974db94p-7,
+  .c11 = -0x1.d7e173ab04a1ap-7,
+  .c12 = 0x1.bdfa47d6a4f28p-7,
+  .c13 = -0x1.9ba78f3232ceep-7,
+  .c14 = 0x1.5e6044590ab4fp-7,
+  .c15 = -0x1.01ccfdeb9f77fp-7,
+  .c16 = 0x1.345cf0d4eb1c1p-8,
+  .c17 = -0x1.19e5f00f67e3ap-9,
+  .c18 = 0x1.6d3035ac7625bp-11,
+  .c19 = -0x1.286bb9ae4ed79p-13,
+  .c20 = 0x1.c37ec36da0e1ap-17,
+  .shift_val = 1.5,
+  .neg_one = -1,
+};
+
+/* Fast implementation of SVE atan.
+   Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to 0,1 using
+   z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
+   error is 2.80 ulps:
+   _ZGVsMxv_atanpi(0x1.f19587d63c76fp-1) got 0x1.f6b1304817d02p-3
+					want 0x1.f6b1304817d05p-3.  */
+svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t ptrue = svptrue_b64 ();
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t sign = svand_x (pg, ix, 0x8000000000000000);
+
+  /* Argument reduction:
+     y := arctan(x) for x < 1
+     y := pi/2 + arctan(-1/x) for x > 1
+     Hence, use z=-1/a if x>=1, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat64_t shift
+      = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign));
+
+  /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
+  svfloat64_t z2 = svmul_x (ptrue, z, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+  svfloat64_t z16 = svmul_x (ptrue, z8, z8);
+
+  /* Order-7 Estrin.  */
+  svfloat64_t c24 = svld1rq (ptrue, &d->c2);
+  svfloat64_t c68 = svld1rq (ptrue, &d->c6);
+
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1);
+  svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0);
+  svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1);
+
+  svfloat64_t p14 = svmla_x (pg, p12, z4, p34);
+  svfloat64_t p58 = svmla_x (pg, p56, z4, p78);
+  svfloat64_t p18 = svmla_x (pg, p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c1012 = svld1rq (ptrue, &d->c10);
+  svfloat64_t c1416 = svld1rq (ptrue, &d->c14);
+  svfloat64_t c1820 = svld1rq (ptrue, &d->c18);
+
+  svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0);
+  svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1);
+  svfloat64_t p912 = svmla_x (pg, p910, z4, p1112);
+
+  svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0);
+  svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1);
+  svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516);
+
+  svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0);
+  svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1);
+  svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920);
+
+  svfloat64_t p916 = svmla_x (pg, p912, z8, p1316);
+  svfloat64_t p920 = svmla_x (pg, p916, z16, p1720);
+
+  svfloat64_t y = svmla_x (pg, p18, z16, p920);
+
+  y = svmla_x (pg, sv_f64 (d->c0), z2, y);
+  return svmla_x (pg, shift, z, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanpif_advsimd.c b/sysdeps/aarch64/fpu/atanpif_advsimd.c
new file mode 100644
index 0000000..9295156
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpif_advsimd.c
@@ -0,0 +1,92 @@
+/* Single-Precision vector (Advanced SIMD) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  uint32x4_t half;
+  float32x4_t neg_one;
+  float32x4_t c0, c1, c3, c5, c7;
+  float c2, c4, c6, c8;
+} data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see atanpi.sollya for details.  */
+  .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4),
+  .c2 = 0x1.0490e4p-4,	    .c3 = V4 (-0x1.70c272p-5),
+  .c4 = 0x1.0eef52p-5,	    .c5 = V4 (-0x1.6abbbap-6),
+  .c6 = 0x1.78157p-7,	    .c7 = V4 (-0x1.f0b406p-9),
+  .c8 = 0x1.2ae7fep-11,	    .half = V4 (0x3f000000),
+  .neg_one = V4 (-1.0f),
+};
+
+#define SignMask v_u32 (0x80000000)
+
+/* Fast implementation of vector atanpif based on
+   atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1]
+   using z=-1/x and shift = +-1/2.
+   Maximum observed error is 2.59ulps:
+   _ZGVnN4v_atanpif (0x1.f2a89cp-1) got 0x1.f76524p-3
+				   want 0x1.f7651ep-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanpi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t sign = vandq_u32 (ix, SignMask);
+
+  /* Argument Reduction:
+     y := arctanpi(x) for |x| < 1
+     y := arctanpi(-1/x) + 1/2 for x > 1
+     y := arctanpi(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  uint32x4_t red = vcagtq_f32 (x, d->neg_one);
+
+  float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x);
+
+  /* Shift is calculated as +1/2 or 0, depending on the argument case.  */
+  float32x4_t shift = vreinterpretq_f32_u32 (vandq_u32 (red, d->half));
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (shift), sign));
+
+  /* Uses an Estrin scheme for polynomial approximation.  */
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  float32x4_t even_coeffs = vld1q_f32 (&d->c2);
+
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, even_coeffs, 0);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, even_coeffs, 1);
+  float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, even_coeffs, 2);
+  float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, even_coeffs, 3);
+
+  float32x4_t p14 = vfmaq_f32 (p12, z4, p34);
+  float32x4_t p58 = vfmaq_f32 (p56, z4, p78);
+
+  float32x4_t y = vfmaq_f32 (p14, z8, p58);
+  y = vfmaq_f32 (d->c0, z2, y);
+
+  /* y = shift + z * P(z^2).  */
+  return vfmaq_f32 (shift, z, y);
+}
+libmvec_hidden_def (V_NAME_F1 (atanpi))
+HALF_WIDTH_ALIAS_F1 (atanpi)
diff --git a/sysdeps/aarch64/fpu/atanpif_sve.c b/sysdeps/aarch64/fpu/atanpif_sve.c
new file mode 100644
index 0000000..2abd788
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpif_sve.c
@@ -0,0 +1,89 @@
+/* Single-Precision vector (SVE) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c2, c4, c6, c8;
+  float32_t c0, c1, c3, c5, c7;
+  float32_t shift_val, neg_one;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+    [2**-128, 1.0].  */
+  .c0 = 0x1.45f306p-2,	.c1 = -0x1.b2975ep-4, .c2 = 0x1.0490e4p-4,
+  .c3 = -0x1.70c272p-5, .c4 = 0x1.0eef52p-5,  .c5 = -0x1.6abbbap-6,
+  .c6 = 0x1.78157p-7,	.c7 = -0x1.f0b406p-9, .c8 = 0x1.2ae7fep-11,
+  .shift_val = 1.5f,	.neg_one = -1.0f,
+};
+
+#define SignMask (0x80000000)
+
+/* Fast implementation of SVE atanpif based on
+   atan(x) ~ shift + z * P(z^2) with reduction to [0,1] using
+   z=-1/x and shift = 1/2.
+   Largest observed error is 2.59 ULP, close to +/-1.0:
+   _ZGVsMxv_atanpif(0x1.f2a89cp-1) got 0x1.f76524p-3
+				  want 0x1.f7651ep-3.  */
+svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  /* No need to trigger special case. Small cases, infs and nans
+     are supported by our approximation technique.  */
+  svuint32_t ix = svreinterpret_u32 (x);
+  svuint32_t sign = svand_x (pg, ix, SignMask);
+
+  /* Argument reduction:
+     y := arctan(x) for x < 1
+     y := arctan(-1/x) + 1/2 for x > +1
+     y := arctan(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat32_t z = svsel (red, svdiv_x (ptrue, sv_f32 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat32_t shift
+      = svadd_z (red, sv_f32 (d->neg_one), sv_f32 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (shift), sign));
+
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2);
+
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1);
+  svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2);
+  svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3);
+
+  svfloat32_t p14 = svmad_x (pg, z4, p34, p12);
+  svfloat32_t p58 = svmad_x (pg, z4, p78, p56);
+
+  svfloat32_t p18 = svmad_x (pg, z8, p58, p14);
+  svfloat32_t y = svmad_x (pg, z2, p18, d->c0);
+
+  /* shift + z * P(z^2).  */
+  return svmad_x (pg, y, z, shift);
+}
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 5152c0d..77ae10d 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -37,6 +37,10 @@
 # define __DECL_SIMD_acosh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_acoshf
 # define __DECL_SIMD_acoshf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acospi
+# define __DECL_SIMD_acospi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acospif
+# define __DECL_SIMD_acospif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asin
 # define __DECL_SIMD_asin __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asinf
@@ -45,6 +49,10 @@
 # define __DECL_SIMD_asinh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asinhf
 # define __DECL_SIMD_asinhf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinpi
+# define __DECL_SIMD_asinpi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinpif
+# define __DECL_SIMD_asinpif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan
 # define __DECL_SIMD_atan __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atanf
@@ -53,10 +61,18 @@
 # define __DECL_SIMD_atanh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atanhf
 # define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanpi
+# define __DECL_SIMD_atanpi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanpif
+# define __DECL_SIMD_atanpif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2
 # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2f
 # define __DECL_SIMD_atan2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atan2pi
+# define __DECL_SIMD_atan2pi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atan2pif
+# define __DECL_SIMD_atan2pif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cbrt
 # define __DECL_SIMD_cbrt __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cbrtf
@@ -176,12 +192,16 @@ typedef __SVBool_t __sv_bool_t;
 #  define __vpcs __attribute__ ((__aarch64_vector_pcs__))
 
 __vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_atan2pif (__f32x4_t, __f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_acospif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_asinpif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_atanpif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
@@ -207,12 +227,16 @@ __vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_tanpif (__f32x4_t);
 
 __vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_atan2pi (__f64x2_t, __f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_acospi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_asinpi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_atanpi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
@@ -243,12 +267,16 @@ __vpcs __f64x2_t _ZGVnN2v_tanpi (__f64x2_t);
 #ifdef __SVE_VEC_MATH_SUPPORTED
 
 __sv_f32_t _ZGVsMxvv_atan2f (__sv_f32_t, __sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_atan2pif (__sv_f32_t, __sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_acospif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_asinpif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_atanpif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
@@ -274,12 +302,16 @@ __sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_tanpif (__sv_f32_t, __sv_bool_t);
 
 __sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_atan2pi (__sv_f64_t, __sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_acospi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_asinpi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_atanpi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
index 77e58e1..f5a163b 100644
--- a/sysdeps/aarch64/fpu/cosh_sve.c
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -21,71 +21,99 @@
 
 static const struct data
 {
-  float64_t poly[3];
-  float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
+  double c0, c2;
+  double c1, c3;
+  float64_t inv_ln2, ln2_hi, ln2_lo, shift;
   uint64_t special_bound;
 } data = {
-  .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
-	    0x1.5555576a59599p-5, },
-
-  .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2.  */
-  /* -ln2/N.  */
-  .ln2_hi = -0x1.62e42fefa39efp-9,
-  .ln2_lo = -0x1.abc9e3b39803f3p-64,
-  .shift = 0x1.8p+52,
-  .thres = 704.0,
-
-  /* 0x1.6p9, above which exp overflows.  */
-  .special_bound = 0x4086000000000000,
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1.fffffffffdbcdp-2,
+  .c1 = 0x1.555555555444cp-3,
+  .c2 = 0x1.555573c6a9f7dp-5,
+  .c3 = 0x1.1111266d28935p-7,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  /* 1/ln2.  */
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ff80p+46, /* 1.5*2^46+1022.  */
+
+  /* asuint(ln(2^(1024 - 1/128))), the value above which exp overflows.  */
+  .special_bound = 0x40862e37e7d8ba72,
 };
 
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
-{
-  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
-  svfloat64_t y = svadd_x (pg, half_t, half_over_t);
-  return sv_call_f64 (cosh, x, y, special);
-}
-
-/* Helper for approximating exp(x). Copied from sv_exp_tail, with no
-   special-case handling or tail.  */
+/* Helper for approximating exp(x)/2.
+   Functionally identical to FEXPA exp(x), but an adjustment in
+   the shift value which leads to a reduction in the exponent of scale by 1,
+   thus halving the result at no cost.  */
 static inline svfloat64_t
-exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
   /* Calculate exp(x).  */
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t n = svsub_x (pg, z, d->shift);
 
-  svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
-  r = svmla_x (pg, r, n, d->ln2_lo);
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
 
-  svuint64_t u = svreinterpret_u64 (z);
-  svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
-  svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
 
-  svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
-  y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
-  y = svmla_x (pg, sv_f64 (1.0), r, y);
-  y = svmul_x (svptrue_b64 (), r, y);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+  svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
+  svfloat64_t p = svmla_x (pg, r, p04, r2);
 
-  /* s = 2^(n/N).  */
-  u = svld1_gather_index (pg, __v_exp_tail_data, i);
-  svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
+  svfloat64_t scale = svexpa (u);
 
-  return svmla_x (pg, s, s, y);
+  return svmla_x (pg, scale, scale, p);
+}
+
+/* Vectorised special case to handle values past where exp_inline overflows.
+   Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+   the valid range of inputs, and returns inf for anything past that.  */
+static svfloat64_t NOINLINE
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax, svfloat64_t t,
+	      const struct data *d)
+{
+  /* Finish fast path to compute values for non-special cases.  */
+  svfloat64_t inv_twoexp = svdivr_x (pg, t, 0.25);
+  svfloat64_t y = svadd_x (pg, t, inv_twoexp);
+
+  /* Halves input value, and then check if any cases
+     are still going to overflow.  */
+  ax = svmul_x (special, ax, 0.5);
+  svbool_t is_safe
+      = svcmplt (special, svreinterpret_u64 (ax), d->special_bound);
+
+  /* Computes exp(x/2), and sets any overflowing lanes to inf.  */
+  svfloat64_t half_exp = exp_over_two_inline (special, ax, d);
+  half_exp = svsel (is_safe, half_exp, sv_f64 (INFINITY));
+
+  /* Construct special case cosh(x) = (exp(x/2)^2)/2.  */
+  svfloat64_t exp = svmul_x (svptrue_b64 (), half_exp, 2);
+  svfloat64_t special_y = svmul_x (special, exp, half_exp);
+
+  /* Select correct return values for special and non-special cases.  */
+  special_y = svsel (special, special_y, y);
+
+  /* Ensure an input of nan is correctly propagated.  */
+  svbool_t is_nan
+      = svcmpgt (special, svreinterpret_u64 (ax), sv_u64 (0x7ff0000000000000));
+  return svsel (is_nan, ax, svsel (special, special_y, y));
 }
 
 /* Approximation for SVE double-precision cosh(x) using exp_inline.
    cosh(x) = (exp(x) + exp(-x)) / 2.
-   The greatest observed error is in the scalar fall-back region, so is the
-   same as the scalar routine, 1.93 ULP:
-   _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
-				       want 0x1.fd774e958236fp+1021.
-
-   The greatest observed error in the non-special region is 1.54 ULP:
-   _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
-				       want 0x1.f5e2bb8d5c991p+8.  */
+   The greatest observed error in special case region is 2.66 + 0.5 ULP:
+   _ZGVsMxv_cosh (0x1.633b532ffbc1ap+9) got 0x1.f9b2d3d22399ep+1023
+				       want 0x1.f9b2d3d22399bp+1023
+
+  The greatest observed error in the non-special region is 1.01 + 0.5 ULP:
+  _ZGVsMxv_cosh (0x1.998ecbb3c1f81p+1) got 0x1.890b225657f84p+3
+				      want 0x1.890b225657f82p+3.  */
 svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
@@ -94,14 +122,13 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
   svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
 
   /* Up to the point that exp overflows, we can use it to calculate cosh by
-     exp(|x|) / 2 + 1 / (2 * exp(|x|)).  */
-  svfloat64_t t = exp_inline (ax, pg, d);
+     (exp(|x|)/2 + 1) / (2 * exp(|x|)).  */
+  svfloat64_t half_exp = exp_over_two_inline (pg, ax, d);
 
-  /* Fall back to scalar for any special cases.  */
+  /* Falls back to entirely standalone vectorized special case.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, pg, t, special);
+    return special_case (pg, special, ax, half_exp, d);
 
-  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
-  return svadd_x (pg, half_t, half_over_t);
+  svfloat64_t inv_twoexp = svdivr_x (pg, half_exp, 0.25);
+  return svadd_x (pg, half_exp, inv_twoexp);
 }
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index 1a74db2..f3e7f8b 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -19,26 +19,19 @@
 
 #include "sv_math.h"
 
-/* For x < -Thres, the result is subnormal and not handled correctly by
-   FEXPA.  */
-#define Thres 37.9
+/* For x < -Thres (-log10(2^126)), the result is subnormal and not handled
+   correctly by FEXPA.  */
+#define Thres 0x1.2f702p+5
 
 static const struct data
 {
-  float log2_10_lo, c0, c2, c4;
-  float c1, c3, log10_2;
-  float shift, log2_10_hi, thres;
+  float log10_2, log2_10_hi, log2_10_lo, c1;
+  float c0, shift, thres;
 } data = {
   /* Coefficients generated using Remez algorithm with minimisation of relative
-     error.
-     rel error: 0x1.89dafa3p-24
-     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
-     maxerr: 0.52 +0.5 ulp.  */
-  .c0 = 0x1.26bb16p+1f,
-  .c1 = 0x1.5350d2p+1f,
-  .c2 = 0x1.04744ap+1f,
-  .c3 = 0x1.2d8176p+0f,
-  .c4 = 0x1.12b41ap-1f,
+     error.  */
+  .c0 = 0x1.26bb62p1,
+  .c1 = 0x1.53524cp1,
   /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
   .shift = 0x1.803f8p17f,
   .log10_2 = 0x1.a934fp+1,
@@ -53,28 +46,23 @@ sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
   /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
-
-  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log10_2);
 
   /* n = round(x/(log10(2)/N)).  */
   svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
-  svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
+  svfloat32_t z = svmla_lane (shift, x, lane_consts, 0);
+  svfloat32_t n = svsub_x (pg, z, shift);
 
   /* r = x - n*log10(2)/N.  */
-  svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
-  r = svmls_lane (r, n, lane_consts, 0);
+  svfloat32_t r = x;
+  r = svmls_lane (r, n, lane_consts, 1);
+  r = svmls_lane (r, n, lane_consts, 2);
 
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
   /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
-  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
-  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
-  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
+  svfloat32_t poly = svmla_lane (sv_f32 (d->c0), r, lane_consts, 3);
+  poly = svmul_x (pg, poly, r);
   return svmla_x (pg, scale, scale, poly);
 }
 
@@ -85,11 +73,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
 		      special);
 }
 
-/* Single-precision SVE exp10f routine. Implements the same algorithm
-   as AdvSIMD exp10f.
-   Worst case error is 1.02 ULPs.
-   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
-				  want 0x1.ba5f9cp-1.  */
+/* Single-precision SVE exp10f routine. Based on the FEXPA instruction.
+   Worst case error is 1.10 ULP.
+   _ZGVsMxv_exp10f (0x1.cc76dep+3) got 0x1.be0172p+47
+				  want 0x1.be017p+47.  */
 svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c
index 6db8526..c135852 100644
--- a/sysdeps/aarch64/fpu/exp2_sve.c
+++ b/sysdeps/aarch64/fpu/exp2_sve.c
@@ -19,23 +19,21 @@
 
 #include "sv_math.h"
 
-#define N (1 << V_EXP_TABLE_BITS)
-
 #define BigBound 1022
 #define UOFlowBound 1280
 
 static const struct data
 {
-  double c0, c2;
-  double c1, c3;
+  double c2, c4;
+  double c0, c1, c3;
   double shift, big_bound, uoflow_bound;
 } data = {
   /* Coefficients are computed using Remez algorithm with
      minimisation of the absolute error.  */
-  .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
-  .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
-  .shift = 0x1.8p52 / N,      .uoflow_bound = UOFlowBound,
-  .big_bound = BigBound,
+  .c0 = 0x1.62e42fefa39efp-1,  .c1 = 0x1.ebfbdff82a31bp-3,
+  .c2 = 0x1.c6b08d706c8a5p-5,  .c3 = 0x1.3b2ad2ff7d2f3p-7,
+  .c4 = 0x1.5d8761184beb3p-10, .shift = 0x1.800000000ffc0p+46,
+  .uoflow_bound = UOFlowBound, .big_bound = BigBound,
 };
 
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
@@ -64,50 +62,52 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
       svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
-  svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
+  svbool_t p_cmp = svacle (pg, n, d->uoflow_bound);
 
   svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
   svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
-  return svsel (p_cmp, r1, r0);
+  return svsel (p_cmp, r0, r1);
 }
 
 /* Fast vector implementation of exp2.
-   Maximum measured error is 1.65 ulp.
-   _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
-				       want 0x1.f8db0d4df721dp-1.  */
+   Maximum measured error is 0.52 + 0.5 ulp.
+   _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0
+				       want 0x1.8861641b49e07p+0.  */
 svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-  svbool_t no_big_scale = svacle (pg, x, d->big_bound);
-  svbool_t special = svnot_z (pg, no_big_scale);
-
-  /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N].  */
-  svfloat64_t shift = sv_f64 (d->shift);
-  svfloat64_t kd = svadd_x (pg, x, shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  /* kd = k/N.  */
-  kd = svsub_x (pg, kd, shift);
-  svfloat64_t r = svsub_x (pg, x, kd);
-
-  /* scale ~= 2^(k/N).  */
-  svuint64_t idx = svand_x (pg, ki, N - 1);
-  svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
-  /* This is only a valid scale when -1023*N < k < 1024*N.  */
-  svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
-  svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
-
-  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
-  /* Approximate exp2(r) using polynomial.  */
-  /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4.  */
+  svbool_t special = svacge (pg, x, d->big_bound);
+
+  svfloat64_t z = svadd_x (svptrue_b64 (), x, d->shift);
+  svfloat64_t n = svsub_x (svptrue_b64 (), z, d->shift);
+  svfloat64_t r = svsub_x (svptrue_b64 (), x, n);
+
+  svfloat64_t scale = svexpa (svreinterpret_u64 (z));
+
   svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
-  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
-  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
-  svfloat64_t p = svmla_x (pg, p01, p23, r2);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  /* Approximate exp2(r) using polynomial.  */
+  /* y = exp2(r) - 1 ~= r * (C0 + C1 r + C2 r^2 + C3 r^3 + C4 r^4).  */
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  svfloat64_t p = svmla_x (pg, p12, p34, r2);
+  p = svmad_x (pg, p, r, d->c0);
   svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
+
   /* Assemble exp2(x) = exp2(r) * scale.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (pg, scale, y, kd, d);
+    {
+      /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+          special case function so needs to be copied.
+          e = sign bit of u << 46.  */
+      svuint64_t e = svand_x (pg, svlsl_x (pg, svreinterpret_u64 (z), 46),
+            0x8000000000000000);
+      scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+      return special_case (pg, scale, y, n, d);
+    }
+
   return svmla_x (pg, scale, scale, y);
 }
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index fcd7830..989cefb 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -18,21 +18,17 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 #define Thres 0x1.5d5e2ap+6f
 
 static const struct data
 {
-  float c0, c2, c4, c1, c3;
-  float shift, thres;
+  float c0, c1, shift, thres;
 } data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant.  */
-  .c0 = 0x1.62e422p-1f,
-  .c1 = 0x1.ebf9bcp-3f,
-  .c2 = 0x1.c6bd32p-5f,
-  .c3 = 0x1.3ce9e4p-7f,
-  .c4 = 0x1.59977ap-10f,
+  /* Coefficients generated using Remez algorithm with minimisation of relative
+     error.  */
+  .c0 = 0x1.62e485p-1,
+  .c1 = 0x1.ebfbe0p-3,
   /* 1.5*2^17 + 127.  */
   .shift = 0x1.803f8p17f,
   /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
@@ -51,16 +47,8 @@ sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
-  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
-     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
-     coefficients 1 to 4, and apply most significant coefficient directly.  */
-  svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
-  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
-  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
-  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
-  svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
-  svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+  svfloat32_t poly = svmla_x (pg, sv_f32 (d->c0), r, sv_f32 (d->c1));
+  poly = svmul_x (svptrue_b32 (), poly, r);
 
   return svmla_x (pg, scale, scale, poly);
 }
@@ -72,11 +60,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
 		      special);
 }
 
-/* Single-precision SVE exp2f routine. Implements the same algorithm
-   as AdvSIMD exp2f.
-   Worst case error is 1.04 ULPs.
-   _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
-				 want 0x1.ba6a64p-1.  */
+/* Single-precision SVE exp2f routine, based on the FEXPA instruction.
+   Worst case error is 1.09 ULPs.
+   _ZGVsMxv_exp2f (0x1.9a2a94p-1) got 0x1.be1054p+0
+				 want 0x1.be1052p+0.  */
 svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c
index d4ba8cc..b1d940b 100644
--- a/sysdeps/aarch64/fpu/expm1_sve.c
+++ b/sysdeps/aarch64/fpu/expm1_sve.c
@@ -18,82 +18,164 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
-#define SpecialBound 0x1.62b7d369a5aa9p+9
-#define ExponentBias 0x3ff0000000000000
+#define FexpaBound 0x1.4cb5ecef28adap-3 /* 15*ln2/64.  */
+#define SpecialBound 0x1.628c2855bfaddp+9 /* ln(2^(1023 + 1/128)).  */
 
 static const struct data
 {
-  double poly[11];
-  double shift, inv_ln2, special_bound;
-  /* To be loaded in one quad-word.  */
+  double c2, c4;
+  double inv_ln2;
   double ln2_hi, ln2_lo;
+  double c0, c1, c3;
+  double shift, thres;
+  uint64_t expm1_data[32];
 } data = {
-  /* Generated using fpminimax.  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-            0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13,
-            0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-            0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .special_bound = SpecialBound,
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .ln2_hi = 0x1.62e42fefa39efp-1,
-  .ln2_lo = 0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
+  /* Table emulating FEXPA - 1, for values of FEXPA close to 1.
+  The table holds values of 2^(i/64) - 1, computed in arbitrary precision.
+  The first half of the table stores values associated to i from 0 to 15.
+  The second half of the table stores values associated to i from 0 to -15.  */
+  .expm1_data = {
+      0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+      0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+      0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+		  0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+      0x0000000000000000, 0xbfc331751ec3a814, 0xbfc20224341286e4, 0xbfc0cf85bed0f8b7,
+      0xbfbf332113d56b1f, 0xbfbcc0768d4175a6, 0xbfba46f918837cb7, 0xbfb7c695afc3b424,
+		  0xbfb53f391822dbc7, 0xbfb2b0cfe1266bd4, 0xbfb01b466423250a, 0xbfaafd11874c009e,
+      0xbfa5b505d5b6f268, 0xbfa05e4119ea5d89, 0xbf95f134923757f3, 0xbf860f9f985bc9f4,
+    },
+
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023.  */
+  .thres = SpecialBound,
 };
 
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t pg)
+#define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
+
+static NOINLINE svfloat64_t
+special_case (svbool_t pg, svfloat64_t y, svfloat64_t s, svfloat64_t p,
+	      svfloat64_t n)
 {
-  return sv_call_f64 (expm1, x, y, pg);
+  /* s=2^n may overflow, break it up into s=s1*s2,
+     such that exp = s + s*y can be computed as s1*(s2+s2*y)
+     and s1*s1 overflows only if n>0.  */
+
+  /* If n<=0 then set b to 0x6, 0 otherwise.  */
+  svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0.  */
+  svuint64_t b
+      = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0.  */
+
+  /* Set s1 to generate overflow depending on sign of exponent n,
+     ie. s1 = 0x70...0 - b.  */
+  svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+  /* Offset s to avoid overflow in final result if n is below threshold.
+     ie. s2 = as_u64 (s) - 0x3010...0 + b.  */
+  svfloat64_t s2 = svreinterpret_f64 (
+      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
+
+  /* |n| > 1280 => 2^(n) overflows.  */
+  svbool_t p_cmp = svacgt (pg, n, 1280.0);
+
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
+  svfloat64_t r2 = svmla_x (pg, s2, s2, p);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
+
+  svbool_t is_safe = svacle (pg, n, 1023); /* Only correct special lanes.  */
+  return svsel (is_safe, y, svsub_x (pg, svsel (p_cmp, r1, r0), 1.0));
 }
 
-/* Double-precision vector exp(x) - 1 function.
-   The maximum error observed error is 2.18 ULP:
-   _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
-				       want 0x1.a8b9ea8d66e2p-2.  */
+/* FEXPA based SVE expm1 algorithm.
+   Maximum measured error is 2.81 + 0.5 ULP:
+   _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3
+				       want 0x1.c290e5858bb5p-3.  */
 svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Large, Nan/Inf.  */
-  svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  svfloat64_t shift = sv_f64 (d->shift);
-  svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift);
-  svint64_t i = svcvt_s64_x (pg, n);
-  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
-  svfloat64_t f = svmls_lane (x, n, ln2, 0);
-  f = svmls_lane (f, n, ln2, 1);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t f8 = svmul_x (pg, f4, f4);
-  svfloat64_t p
-      = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
-
-  /* Assemble the result.
-   expm1(x) ~= 2^i * (p + 1) - 1
-   Let t = 2^i.  */
-  svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias);
-  svfloat64_t t = svreinterpret_f64 (u);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svbool_t special = svacgt (pg, x, d->thres);
 
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
 
+  /* r = x - n * ln2, r is in [-ln2/128, ln2/128].  */
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+  svfloat64_t scalem1 = svsub_x (pg, scale, sv_f64 (1.0));
+
+  /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.
+
+     This bound is based upon the table size:
+	   Bound = (TableSize-1/64) * ln2.
+     The current bound is based upon a table size of 16.  */
+  svbool_t is_small = svaclt (pg, x, FexpaBound);
+
+  if (svptest_any (pg, is_small))
+    {
+      /* Index via the input of FEXPA, but we only care about the lower 4 bits.
+       */
+      svuint64_t base_idx = svand_x (pg, u, 0xf);
+
+      /* We can use the sign of x as a fifth bit to account for the asymmetry
+	 of e^x around 0.  */
+      svuint64_t signBit
+	  = svlsl_x (pg, svlsr_x (pg, svreinterpret_u64 (x), 63), 4);
+      svuint64_t idx = svorr_x (pg, base_idx, signBit);
+
+      /* Lookup values for scale - 1 for small x.  */
+      svfloat64_t lookup = svreinterpret_f64 (
+	  svld1_gather_index (is_small, d->expm1_data, idx));
+
+      /* Select the appropriate scale - 1 value based on x.  */
+      scalem1 = svsel (is_small, lookup, scalem1);
+    }
+
+  svfloat64_t y = svmla_x (pg, scalem1, scale, p);
+
+  /* FEXPA returns nan for large inputs so we special case those.  */
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    {
+      /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+          special case function so needs to be copied.
+          e = sign bit of u << 46.  */
+      svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
+      /* Copy sign to s.  */
+      scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+      return special_case (pg, y, scale, p, n);
+    }
+
+  /* return expm1 = (scale - 1) + (scale * poly).  */
   return y;
 }
diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c
index 862c13f..821c078 100644
--- a/sysdeps/aarch64/fpu/log1p_sve.c
+++ b/sysdeps/aarch64/fpu/log1p_sve.c
@@ -22,19 +22,33 @@
 
 static const struct data
 {
-  double poly[19];
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
   double ln2_hi, ln2_lo;
   uint64_t hfrt2_top, onemhfrt2_top, inf, mone;
 } data = {
   /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20
-     polynomial, however first 2 coefficients are 0 and 1 so are not stored.  */
-  .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	    -0x1.cfa7385bdb37ep-6, },
+     polynomial, however first 2 coefficients are 0 and 1 so are not
+     stored.  */
+  .c0 = -0x1.ffffffffffffbp-2,
+  .c1 = 0x1.55555555551a9p-2,
+  .c2 = -0x1.00000000008e3p-2,
+  .c3 = 0x1.9999999a32797p-3,
+  .c4 = -0x1.555555552fecfp-3,
+  .c5 = 0x1.249248e071e5ap-3,
+  .c6 = -0x1.ffffff8bf8482p-4,
+  .c7 = 0x1.c71c8f07da57ap-4,
+  .c8 = -0x1.9999ca4ccb617p-4,
+  .c9 = 0x1.7459ad2e1dfa3p-4,
+  .c10 = -0x1.554d2680a3ff2p-4,
+  .c11 = 0x1.3b4c54d487455p-4,
+  .c12 = -0x1.2548a9ffe80e6p-4,
+  .c13 = 0x1.0f389a24b2e07p-4,
+  .c14 = -0x1.eee4db15db335p-5,
+  .c15 = 0x1.e95b494d4a5ddp-5,
+  .c16 = -0x1.15fdf07cb7c73p-4,
+  .c17 = 0x1.0310b70800fcfp-4,
+  .c18 = -0x1.cfa7385bdb37ep-6,
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   /* top32(asuint64(sqrt(2)/2)) << 32.  */
@@ -49,7 +63,7 @@ static const struct data
 #define BottomMask 0xffffffff
 
 static svfloat64_t NOINLINE
-special_case (svbool_t special, svfloat64_t x, svfloat64_t y)
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (log1p, x, y, special);
 }
@@ -91,8 +105,9 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
   svuint64_t utop
       = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top);
-  svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask));
-  svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
+  svuint64_t u_red
+      = svorr_x (pg, utop, svand_x (svptrue_b64 (), mi, BottomMask));
+  svfloat64_t f = svsub_x (svptrue_b64 (), svreinterpret_f64 (u_red), 1);
 
   /* Correction term c/m.  */
   svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m);
@@ -103,18 +118,49 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
      Hence approximation has the form f + f^2 * P(f)
      where P(x) = C0 + C1*x + C2x^2 + ...
      Assembling this all correctly is dealt with at the final step.  */
-  svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2),
-	      f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8);
-  svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly);
+  svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f),
+	      f4 = svmul_x (svptrue_b64 (), f2, f2),
+	      f8 = svmul_x (svptrue_b64 (), f4, f4),
+	      f16 = svmul_x (svptrue_b64 (), f8, f8);
+
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+  svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+  svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+  svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17);
+
+  /* Order-18 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, f2, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, f2, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, f4, p47);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1);
+
+  svfloat64_t p811 = svmla_x (pg, p89, f2, p1011);
+  svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415);
+  svfloat64_t p815 = svmla_x (pg, p811, f4, p1215);
+
+  svfloat64_t p015 = svmla_x (pg, p07, f8, p815);
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0);
+  svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1);
+  svfloat64_t p = svmla_x (pg, p015, f16, p1618);
 
   svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo);
   svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi);
-  svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (special, x, y);
-
-  return y;
+    return special_case (
+	x, svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p),
+	special);
+  return svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p);
 }
 
 strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1))
diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
index 963453f..072ba8f 100644
--- a/sysdeps/aarch64/fpu/sinh_sve.c
+++ b/sysdeps/aarch64/fpu/sinh_sve.c
@@ -18,90 +18,153 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[11];
-  float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
   uint64_t halff;
-  int64_t onef;
-  uint64_t large_bound;
+  double c2, c4;
+  double inv_ln2;
+  double ln2_hi, ln2_lo;
+  double c0, c1, c3;
+  double shift, special_bound, bound;
+  uint64_t expm1_data[20];
 } data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
-	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
-	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .m_ln2_hi = -0x1.62e42fefa39efp-1,
-  .m_ln2_lo = -0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
-
+  /* Table lookup of 2^(i/64) - 1, for values of i from 0..19.  */
+  .expm1_data = {
+    0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+    0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+    0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+    0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+    0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+  },
+
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023.  */
   .halff = 0x3fe0000000000000,
-  .onef = 0x3ff0000000000000,
-  /* 2^9. expm1 helper overflows for large input.  */
-  .large_bound = 0x4080000000000000,
+  .special_bound = 0x1.62e37e7d8ba72p+9,	/* ln(2^(1024 - 1/128)).  */
+  .bound = 0x1.a56ef8ec924ccp-3 /* 19*ln2/64.  */
 };
 
+/* A specialised FEXPA expm1 that is only valid for positive inputs and
+   has no special cases. Based off the full FEXPA expm1 implementated for
+   _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP.  */
 static inline svfloat64_t
-expm1_inline (svfloat64_t x, svbool_t pg)
+expm1_inline (svbool_t pg, svfloat64_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Reduce argument:
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where i = round(x / ln2)
-     and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
-  svfloat64_t j
-      = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
-  svint64_t i = svcvt_s64_x (pg, j);
-  svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
-  f = svmla_x (pg, f, j, d->m_ln2_lo);
-  /* Approximate expm1(f) using polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t f8 = svmul_x (pg, f4, f4);
-  svfloat64_t p
-      = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
-  /* t = 2^i.  */
-  svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
+
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+
+  /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.  */
+  svbool_t is_small = svaclt (pg, x, d->bound);
+
+  /* Index via the input of FEXPA, but we only care about the lower 5 bits.  */
+  svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+  /* Compute scale - 1 from FEXPA, and lookup values where this fails.  */
+  svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+  svuint64_t scalem1_lookup
+      = svld1_gather_index (is_small, d->expm1_data, base_idx);
+
+  /* Select the appropriate scale - 1 value based on x.  */
+  svfloat64_t scalem1
+      = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+
+  /* return expm1 = scale - 1 + (scale * poly).  */
+  return svmla_x (pg, scalem1, scale, p);
 }
 
+/* Vectorised special case to handle values past where exp_inline overflows.
+   Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+   the valid range of inputs, and returns inf for anything past that.  */
 static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg)
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax,
+	      svfloat64_t halfsign, const struct data *d)
 {
-  return sv_call_f64 (sinh, x, x, pg);
+  /* Halves input value, and then check if any cases
+     are still going to overflow.  */
+  ax = svmul_x (special, ax, 0.5);
+  svbool_t is_safe = svaclt (special, ax, d->special_bound);
+
+  svfloat64_t t = expm1_inline (pg, ax);
+
+  /* Finish fastpass to compute values for non-special cases.  */
+  svfloat64_t y = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+  y = svmul_x (pg, y, halfsign);
+
+  /* Computes special lane, and set remaining overflow lanes to inf.  */
+  svfloat64_t half_special_y = svmul_x (svptrue_b64 (), t, halfsign);
+  svfloat64_t special_y = svmul_x (svptrue_b64 (), half_special_y, t);
+
+  svuint64_t signed_inf
+      = svorr_x (svptrue_b64 (), svreinterpret_u64 (halfsign),
+		 sv_u64 (0x7ff0000000000000));
+  special_y = svsel (is_safe, special_y, svreinterpret_f64 (signed_inf));
+
+  /* Join resulting vectors together and return.  */
+  return svsel (special, special_y, y);
 }
 
-/* Approximation for SVE double-precision sinh(x) using expm1.
-   sinh(x) = (exp(x) - exp(-x)) / 2.
-   The greatest observed error is 2.57 ULP:
-   _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
-				       want 0x1.ab929fc64bd63p-2.  */
+/* Approximation for SVE double-precision sinh(x) using FEXPA expm1.
+   Uses sinh(x) = e^2x - 1 / 2e^x, rewritten for accuracy.
+   The greatest observed error in the non-special region is 2.63 + 0.5 ULP:
+   _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2
+				       want 0x1.c3587faf97b09p-2
+
+   The greatest observed error in the special region is 2.65 + 0.5 ULP:
+   _ZGVsMxv_sinh (0x1.633ce847dab1ap+9) got 0x1.fffd30eea0066p+1023
+				       want 0x1.fffd30eea0063p+1023.  */
 svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
+  svbool_t special = svacge (pg, x, d->special_bound);
   svfloat64_t ax = svabs_x (pg, x);
   svuint64_t sign
       = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
 
-  svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
-
   /* Fall back to scalar variant for all lanes if any are special.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, pg);
+    return special_case (pg, special, ax, halfsign, d);
 
   /* Up to the point that expm1 overflows, we can use it to calculate sinh
      using a slight rearrangement of the definition of sinh. This allows us to
      retain acceptable accuracy for very small inputs.  */
-  svfloat64_t t = expm1_inline (ax, pg);
+  svfloat64_t t = expm1_inline (pg, ax);
   t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
   return svmul_x (pg, t, halfsign);
 }
diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h
index 71f88e0..c2b196f 100644
--- a/sysdeps/aarch64/fpu/sv_log1p_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h
@@ -21,11 +21,12 @@
 #define AARCH64_FPU_SV_LOG1P_INLINE_H
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct sv_log1p_data
 {
-  double poly[19], ln2[2];
+  double c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+  double ln2_lo, ln2_hi;
   uint64_t hf_rt2_top;
   uint64_t one_m_hf_rt2_top;
   uint32_t bottom_mask;
@@ -33,15 +34,30 @@ static const struct sv_log1p_data
 } sv_log1p_data = {
   /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
    */
-  .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	    -0x1.cfa7385bdb37ep-6 },
-  .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
+  .c0 = -0x1.ffffffffffffbp-2,
+  .c1 = 0x1.55555555551a9p-2,
+  .c2 = -0x1.00000000008e3p-2,
+  .c3 = 0x1.9999999a32797p-3,
+  .c4 = -0x1.555555552fecfp-3,
+  .c5 = 0x1.249248e071e5ap-3,
+  .c6 = -0x1.ffffff8bf8482p-4,
+  .c7 = 0x1.c71c8f07da57ap-4,
+  .c8 = -0x1.9999ca4ccb617p-4,
+  .c9 = 0x1.7459ad2e1dfa3p-4,
+  .c10 = -0x1.554d2680a3ff2p-4,
+  .c11 = 0x1.3b4c54d487455p-4,
+  .c12 = -0x1.2548a9ffe80e6p-4,
+  .c13 = 0x1.0f389a24b2e07p-4,
+  .c14 = -0x1.eee4db15db335p-5,
+  .c15 = 0x1.e95b494d4a5ddp-5,
+  .c16 = -0x1.15fdf07cb7c73p-4,
+  .c17 = 0x1.0310b70800fcfp-4,
+  .c18 = -0x1.cfa7385bdb37ep-6,
+  .ln2_lo = 0x1.62e42fefa3800p-1,
+  .ln2_hi = 0x1.ef35793c76730p-45,
+  /* top32(asuint64(sqrt(2)/2)) << 32.  */
   .hf_rt2_top = 0x3fe6a09e00000000,
+  /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32.  */
   .one_m_hf_rt2_top = 0x00095f6200000000,
   .bottom_mask = 0xffffffff,
   .one_top = 0x3ff
@@ -51,14 +67,14 @@ static inline svfloat64_t
 sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 {
   /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
-     differs from v_log1p_2u5.c by:
+     differs from advsimd/log1p.c by:
      - No special-case handling - this should be dealt with by the caller.
      - Pairwise Horner polynomial evaluation for improved accuracy.
      - Optionally simulate the shortcut for k=0, used in the scalar routine,
        using svsel, for improved accuracy when the argument to log1p is close
      to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
      in the source of the caller before including this file.
-     See sv_log1p_2u1.c for details of the algorithm.  */
+     See sve/log1p.c for details of the algorithm.  */
   const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
   svfloat64_t m = svadd_x (pg, x, 1);
   svuint64_t mi = svreinterpret_u64 (m);
@@ -79,7 +95,7 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
   svfloat64_t cm;
 
 #ifndef WANT_SV_LOG1P_K0_SHORTCUT
-#error                                                                         \
+#error                                                                       \
   "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
 #elif WANT_SV_LOG1P_K0_SHORTCUT
   /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
@@ -96,14 +112,46 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 #endif
 
   /* Approximate log1p(f) on the reduced input using a polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
+  svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f),
+	      f4 = svmul_x (svptrue_b64 (), f2, f2),
+	      f8 = svmul_x (svptrue_b64 (), f4, f4),
+	      f16 = svmul_x (svptrue_b64 (), f8, f8);
+
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+  svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+  svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+  svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17);
+
+  /* Order-18 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, f2, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, f2, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, f4, p47);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1);
+
+  svfloat64_t p811 = svmla_x (pg, p89, f2, p1011);
+  svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415);
+  svfloat64_t p815 = svmla_x (pg, p811, f4, p1215);
+
+  svfloat64_t p015 = svmla_x (pg, p07, f8, p815);
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0);
+  svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1);
+  svfloat64_t p = svmla_x (pg, p015, f16, p1618);
 
   /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
-  svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
-  svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
+  svfloat64_t ln2_lo_hi = svld1rq (svptrue_b64 (), &d->ln2_lo);
+  svfloat64_t ylo = svmla_lane (cm, k, ln2_lo_hi, 0);
+  svfloat64_t yhi = svmla_lane (f, k, ln2_lo_hi, 1);
 
-  return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+  return svmad_x (pg, p, f2, svadd_x (pg, ylo, yhi));
 }
-
 #endif
diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
index 789cc68..5869419 100644
--- a/sysdeps/aarch64/fpu/tanh_sve.c
+++ b/sysdeps/aarch64/fpu/tanh_sve.c
@@ -18,83 +18,117 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[11];
-  float64_t inv_ln2, ln2_hi, ln2_lo, shift;
-  uint64_t thresh, tiny_bound;
+  double ln2_hi, ln2_lo;
+  double c2, c4;
+  double c0, c1, c3;
+  double two_over_ln2, shift;
+  uint64_t tiny_bound;
+  double large_bound, fexpa_bound;
+  uint64_t e2xm1_data[20];
 } data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
-	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
-	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .ln2_hi = -0x1.62e42fefa39efp-1,
-  .ln2_lo = -0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
-
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .two_over_ln2 = 0x1.71547652b82fep+1,
+  .shift = 0x1.800000000ffc0p+46,   /* 1.5*2^46+1023.  */
   .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27).  */
-  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
-  .thresh = 0x01f241bf835f9d5f,
+  .large_bound = 0x1.30fc1931f09cap+4, /* arctanh(1 - 2^-54).  */
+  .fexpa_bound = 0x1.a56ef8ec924ccp-4,	  /* 19/64 * ln2/2.  */
+  /* Table lookup of 2^(i/64) - 1, for values of i from 0..19.  */
+  .e2xm1_data = {
+    0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+    0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+    0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+    0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+    0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+  },
 };
 
+/* An expm1 inspired, FEXPA based helper function that returns an
+   accurate estimate for e^2x - 1. With no special case or support for
+   negative inputs of x.  */
 static inline svfloat64_t
-expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
-{
-  /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
-     the scalar variant of tanh.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  svfloat64_t j
-      = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
-  svint64_t i = svcvt_s64_x (pg, j);
-  svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
-  f = svmla_x (pg, f, j, d->ln2_lo);
-
-  /* Approximate expm1(f) using polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t p = svmla_x (
-      pg, f, f2,
-      sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
-
-  /* t = 2 ^ i.  */
-  svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
-  /* expm1(x) = p * t + (t - 1).  */
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
-}
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
-  return sv_call_f64 (tanh, x, y, special);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
+
+  /* r = x - n * ln2/2, r is in [-ln2/(2N), ln2/(2N)].  */
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t r = svadd_x (pg, x, x);
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+
+  /* We want to construct e2xm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.  */
+  svbool_t is_small = svaclt (pg, x, d->fexpa_bound);
+
+  /* Index via the input of FEXPA, but we only care about the lower 5 bits.  */
+  svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+  /* Compute scale - 1 from FEXPA, and lookup values where this fails.  */
+  svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+  svuint64_t scalem1_lookup
+      = svld1_gather_index (is_small, d->e2xm1_data, base_idx);
+
+  /* Select the appropriate scale - 1 value based on x.  */
+  svfloat64_t scalem1
+      = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+  return svmla_x (pg, scalem1, scale, p);
 }
 
-/* SVE approximation for double-precision tanh(x), using a simplified
-   version of expm1. The greatest observed error is 2.77 ULP:
-   _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
-				       want -0x1.bd6a21a163624p-3.  */
+/* SVE approximation for double-precision tanh(x), using a modified version of
+   FEXPA expm1 to calculate e^2x - 1.
+   The greatest observed error is 2.79 + 0.5 ULP:
+   _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9
+				       want 0x1.fff7be486cae9p-9.  */
 svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
+  svbool_t large = svacge (pg, x, d->large_bound);
 
-  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
-  svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
+  /* We can use tanh(x) = (e^2x - 1) / (e^2x + 1) to approximate tanh.
+  As an additional optimisation, we can ensure more accurate values of e^x
+  by only using positive inputs. So we calculate tanh(|x|), and restore the
+  sign of the input before returning.  */
+  svfloat64_t ax = svabs_x (pg, x);
+  svuint64_t sign_bit
+      = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
 
-  svfloat64_t u = svadd_x (pg, x, x);
+  svfloat64_t p = e2xm1_inline (pg, ax, d);
+  svfloat64_t q = svadd_x (pg, p, 2);
 
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  svfloat64_t q = expm1_inline (u, pg, d);
-  svfloat64_t qp2 = svadd_x (pg, q, 2);
+  /* For sufficiently high inputs, the result of tanh(|x|) is 1 when correctly
+     rounded, at this point we can return 1 directly, with sign correction.
+     This will also act as a guard against our approximation overflowing.  */
+  svfloat64_t y = svsel (large, sv_f64 (1.0), svdiv_x (pg, p, q));
 
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svdiv_x (pg, q, qp2), special);
-  return svdiv_x (pg, q, qp2);
+  return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y)));
 }
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 07133eb..a3fef22 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -25,11 +25,15 @@
 
 VPCS_VECTOR_WRAPPER (acos_advsimd, _ZGVnN2v_acos)
 VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
+VPCS_VECTOR_WRAPPER (acospi_advsimd, _ZGVnN2v_acospi)
 VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
 VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
+VPCS_VECTOR_WRAPPER (asinpi_advsimd, _ZGVnN2v_asinpi)
 VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
 VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
+VPCS_VECTOR_WRAPPER (atanpi_advsimd, _ZGVnN2v_atanpi)
 VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
+VPCS_VECTOR_WRAPPER_ff (atan2pi_advsimd, _ZGVnN2vv_atan2pi)
 VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt)
 VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
 VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 02953cb..f4a5ae8 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -44,11 +44,15 @@
 
 SVE_VECTOR_WRAPPER (acos_sve, _ZGVsMxv_acos)
 SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
+SVE_VECTOR_WRAPPER (acospi_sve, _ZGVsMxv_acospi)
 SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
 SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
+SVE_VECTOR_WRAPPER (asinpi_sve, _ZGVsMxv_asinpi)
 SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
 SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
+SVE_VECTOR_WRAPPER (atanpi_sve, _ZGVsMxv_atanpi)
 SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
+SVE_VECTOR_WRAPPER_ff (atan2pi_sve, _ZGVsMxvv_atan2pi)
 SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt)
 SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
 SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 118bbb0..bc22956 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -25,11 +25,15 @@
 
 VPCS_VECTOR_WRAPPER (acosf_advsimd, _ZGVnN4v_acosf)
 VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
+VPCS_VECTOR_WRAPPER (acospif_advsimd, _ZGVnN4v_acospif)
 VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
 VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
+VPCS_VECTOR_WRAPPER (asinpif_advsimd, _ZGVnN4v_asinpif)
 VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
 VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
+VPCS_VECTOR_WRAPPER (atanpif_advsimd, _ZGVnN4v_atanpif)
 VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
+VPCS_VECTOR_WRAPPER_ff (atan2pif_advsimd, _ZGVnN4vv_atan2pif)
 VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf)
 VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
 VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index f5e7c8c..ad0d6ad 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -44,11 +44,15 @@
 
 SVE_VECTOR_WRAPPER (acosf_sve, _ZGVsMxv_acosf)
 SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
+SVE_VECTOR_WRAPPER (acospif_sve, _ZGVsMxv_acospif)
 SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
 SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
+SVE_VECTOR_WRAPPER (asinpif_sve, _ZGVsMxv_asinpif)
 SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
 SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
+SVE_VECTOR_WRAPPER (atanpif_sve, _ZGVsMxv_atanpif)
 SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
+SVE_VECTOR_WRAPPER_ff (atan2pif_sve, _ZGVsMxvv_atan2pif)
 SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf)
 SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
 SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
diff --git a/sysdeps/aarch64/machine-gmon.h b/sysdeps/aarch64/machine-gmon.h
index eba7c24..05323c9 100644
--- a/sysdeps/aarch64/machine-gmon.h
+++ b/sysdeps/aarch64/machine-gmon.h
@@ -27,9 +27,8 @@ static void mcount_internal (u_long frompc, u_long selfpc);
 #define _MCOUNT_DECL(frompc, selfpc) \
 static inline void mcount_internal (u_long frompc, u_long selfpc)
 
-/* Note: strip_pac is needed for frompc because of gcc PR target/94791.  */
 #define MCOUNT                                                    \
 void __mcount (void *frompc)                                      \
 {                                                                 \
-  mcount_internal ((u_long) strip_pac (frompc), (u_long) RETURN_ADDRESS (0)); \
+  mcount_internal ((u_long) frompc, (u_long) RETURN_ADDRESS (0)); \
 }
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 8dc314b..0e26171 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -36,18 +36,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/aarch64/multiarch/memcpy.c, memmove.c and memset.c.  */
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_oryon1)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memcpy, sve && !bti, __memcpy_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, mops, __memcpy_mops)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
   IFUNC_IMPL (i, name, memmove,
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_oryon1)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memmove, sve && !bti, __memmove_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
   IFUNC_IMPL (i, name, memset,
@@ -55,10 +51,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_oryon1)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
   IFUNC_IMPL (i, name, memchr,
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index 63c24e7..75b3e08 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -31,7 +31,7 @@
   unsigned __attribute__((unused)) zva_size =				      \
     GLRO(dl_aarch64_cpu_features).zva_size;				      \
   bool __attribute__((unused)) bti =					      \
-    HAVE_AARCH64_BTI && GLRO(dl_aarch64_cpu_features).bti;		      \
+    GLRO(dl_aarch64_cpu_features).bti;					      \
   bool __attribute__((unused)) mte =					      \
     MTE_ENABLED ();							      \
   bool __attribute__((unused)) sve =					      \
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 0e33d19..894dabe 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -43,7 +43,7 @@ select_memcpy_ifunc (void)
   if (mops)
     return __memcpy_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr))
 	return __memcpy_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
index ed18682..acad6e8 100644
--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
@@ -19,9 +19,6 @@
 
 #include <sysdep.h>
 
-#undef BTI_C
-#define BTI_C
-
 /* Assumptions:
  *
  * ARMv8.2-a, AArch64, unaligned accesses, sve
@@ -38,8 +35,6 @@
 #define vlen	x7
 #define vlen8	x8
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 	.macro ld1b_unroll8
@@ -91,9 +86,6 @@
 	st1b	z7.b, p0, [dst, 7, mul vl]
 	.endm
 
-#undef BTI_C
-#define BTI_C
-
 ENTRY (__memcpy_a64fx)
 
 	cntb	vlen
@@ -296,4 +288,3 @@ L(full_overlap):
 	b	L(last_bytes)
 
 END (__memmove_a64fx)
-#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
index 26d4890..0ba6358 100644
--- a/sysdeps/aarch64/multiarch/memcpy_sve.S
+++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
@@ -56,8 +56,6 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 ENTRY (__memcpy_sve)
@@ -199,4 +197,3 @@ L(return):
 	ret
 
 END (__memmove_sve)
-#endif
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index 47b7268..6b0d0ce 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -41,7 +41,7 @@ select_memmove_ifunc (void)
   if (mops)
     return __memmove_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr))
 	return __memmove_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index 872f39f..2b0a58b 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -46,7 +46,7 @@ select_memset_ifunc (void)
   if (mops)
     return __memset_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr) && zva_size == 256)
 	return __memset_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
index ea60b78..e921240 100644
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -31,8 +31,6 @@
 #define PF_DIST_L1	(CACHE_LINE_SIZE * 16)	// Prefetch distance L1
 #define vector_length	x9
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 #define dstin   x0
@@ -50,10 +48,6 @@
 	.endif
 	.endm
 
-
-#undef BTI_C
-#define BTI_C
-
 ENTRY (__memset_a64fx)
 
 	cntb	vector_length
@@ -170,5 +164,3 @@ L(L2):
 	b	L(last)
 
 END (__memset_a64fx)
-
-#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
index 7fb40fd..c385e1a 100644
--- a/sysdeps/aarch64/multiarch/memset_sve_zva64.S
+++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
@@ -25,8 +25,6 @@
  * ZVA size is 64.
  */
 
-#if HAVE_AARCH64_SVE_ASM
-
 .arch armv8.2-a+sve
 
 #define dstin	x0
@@ -120,4 +118,3 @@ L(no_zva_loop):
 	ret
 
 END (__memset_sve_zva64)
-#endif
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
index 19657b6..e1b772c 100644
--- a/sysdeps/aarch64/preconfigure
+++ b/sysdeps/aarch64/preconfigure
@@ -3,5 +3,6 @@ aarch64*)
 	base_machine=aarch64
 	machine=aarch64
 	mtls_descriptor=desc
+	mtls_traditional=trad
 	;;
 esac
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index d82d62c..53c5e7d 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -35,6 +35,20 @@ libc_hidden_def (_setjmp)
 
 ENTRY_ALIGN (__sigsetjmp, 2)
 1:
+
+#if IS_IN(libc)
+	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.
+	   The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
+	bl	__libc_arm_za_disable
+	mov	x30, x13
+	cfi_register (x13, x30)
+#endif
+
 	stp	x19, x20, [x0, #JB_X19<<3]
 	stp	x21, x22, [x0, #JB_X21<<3]
 	stp	x23, x24, [x0, #JB_X23<<3]
@@ -73,7 +87,7 @@ L(gcs_done):
 #if IS_IN (rtld)
 	/* In ld.so we never save the signal mask */
 	mov	w0, #0
-	RET
+	ret
 #else
 	b	C_SYMBOL_NAME(__sigjmp_save)
 #endif
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
index 544e397..694c338 100644
--- a/sysdeps/aarch64/start.S
+++ b/sysdeps/aarch64/start.S
@@ -108,7 +108,7 @@ ENTRY(_start)
 	   because crt1.o and rcrt1.o share code and the later must avoid the
 	   use of GOT relocations before __libc_start_main is called.  */
 __wrap_main:
-	BTI_C
+	bti	c
 	b	main
 #endif
 END(_start)
diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h
index 7781b37..a3322a9 100644
--- a/sysdeps/aarch64/sys/ifunc.h
+++ b/sysdeps/aarch64/sys/ifunc.h
@@ -19,24 +19,77 @@
 #ifndef _SYS_IFUNC_H
 #define _SYS_IFUNC_H
 
+#include <sys/cdefs.h>
+
 /* A second argument is passed to the ifunc resolver.  */
 #define _IFUNC_ARG_HWCAP	(1ULL << 62)
 
-/* The prototype of a gnu indirect function resolver on AArch64 is
+/* Maximum number of HWCAP elements that are currently supported.  */
+#define _IFUNC_HWCAP_MAX	4
+
+/* The prototype of a GNU indirect function resolver on AArch64 is
+
+     ElfW(Addr) ifunc_resolver (uint64_t, const uint64_t *);
+
+   The following prototype is also compatible:
 
      ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *);
 
-   the first argument should have the _IFUNC_ARG_HWCAP bit set and
-   the remaining bits should match the AT_HWCAP settings.  */
+   The first argument might have the _IFUNC_ARG_HWCAP bit set and
+   the remaining bits should match the AT_HWCAP settings.
+
+   If the _IFUNC_ARG_HWCAP bit is set in the first argument, then
+   the second argument is passed to the resolver function.  In
+   this case, the second argument is a const pointer to a buffer
+   that allows to access all available HWCAP elements.
+
+   This buffer has its size in bytes at offset 0.  The HWCAP elements
+   are available at offsets 8, 16, 24, 32... respectively for AT_HWCAP,
+   AT_HWCAP2, AT_HWCAP3, AT_HWCAP4...  (these offsets are multiples of
+   sizeof (unsigned long)).
+
+   Indirect function resolvers must check availability of HWCAP
+   elements at runtime before accessing them using the size of the
+   buffer.  */
 
-/* Second argument to an ifunc resolver.  */
 struct __ifunc_arg_t
 {
-  unsigned long _size; /* Size of the struct, so it can grow.  */
+  unsigned long _size;    /* Size of the struct, so it can grow.  */
   unsigned long _hwcap;
-  unsigned long _hwcap2;
+  unsigned long _hwcap2;  /* End of 1st published struct.  */
+  unsigned long _hwcap3;
+  unsigned long _hwcap4;  /* End of 2nd published struct.  */
 };
 
 typedef struct __ifunc_arg_t __ifunc_arg_t;
 
+/* Constants for IDs of HWCAP elements to be used with the
+   __ifunc_hwcap function below.  */
+enum
+{
+  _IFUNC_ARG_AT_HWCAP = 1,
+  _IFUNC_ARG_AT_HWCAP2 = 2,
+  _IFUNC_ARG_AT_HWCAP3 = 3,
+  _IFUNC_ARG_AT_HWCAP4 = 4,
+};
+
+/* A helper function to obtain HWCAP element by its ID from the
+   parameters ARG0 and ARG1 passed to the ifunc resolver.  Note that
+   ID 1 corresponds to AT_HWCAP, ID 2 corresponds to AT_HWCAP2, etc.
+   If there is no element available for the requested ID then 0 is
+   returned.  If ID doesn't much any supported AT_HWCAP{,2,...} value,
+   then 0 is also returned.  */
+static __inline unsigned long __attribute__ ((unused, always_inline))
+__ifunc_hwcap (unsigned long __id,
+	       unsigned long __arg0, const unsigned long *__arg1)
+{
+  if (__glibc_likely (__arg0 & _IFUNC_ARG_HWCAP))
+    {
+      const unsigned long size = __arg1[0];
+      const unsigned long offset = __id * sizeof (unsigned long);
+      return offset < size && __id > 0 ? __arg1[__id] : 0;
+    }
+  return __id == 1 ? __arg0 : 0;
+}
+
 #endif
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index 9424115..f5e28cb 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -21,43 +21,15 @@
 
 #include <sysdeps/generic/sysdep.h>
 
-#ifndef __ASSEMBLER__
-/* Strip pointer authentication code from pointer p.  */
-static inline void *
-strip_pac (void *p)
-{
-  register void *ra asm ("x30") = (p);
-  asm ("hint 7 // xpaclri" : "+r"(ra));
-  return ra;
-}
-
-/* This is needed when glibc is built with -mbranch-protection=pac-ret
-   with a gcc that is affected by PR target/94891.  */
-# if HAVE_AARCH64_PAC_RET
-#  undef RETURN_ADDRESS
-#  define RETURN_ADDRESS(n) strip_pac (__builtin_return_address (n))
-# endif
-#endif
-
 #ifdef	__ASSEMBLER__
 
+/* CFI directive for return address.  */
+#define cfi_negate_ra_state	.cfi_negate_ra_state
+
 /* Syntactic details of assembler.  */
 
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
 
-/* Branch Target Identitication support.  */
-#if HAVE_AARCH64_BTI
-# define BTI_C		hint	34
-# define BTI_J		hint	36
-#else
-# define BTI_C		nop
-# define BTI_J		nop
-#endif
-
-/* Return address signing support (pac-ret).  */
-#define PACIASP		hint	25
-#define AUTIASP		hint	29
-
 /* Guarded Control Stack support.  */
 #define CHKFEAT_X16	hint	40
 #define MRS_GCSPR(x)	mrs	x, s3_3_c2_c5_1
@@ -87,11 +59,7 @@ strip_pac (void *p)
 
 /* Add GNU property note with the supported features to all asm code
    where sysdep.h is included.  */
-#if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET
 GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS)
-#elif HAVE_AARCH64_BTI
-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
-#endif
 
 /* Define an entry point visible from C.  */
 #define ENTRY(name)						\
@@ -100,7 +68,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .p2align 6;							\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 /* Define an entry point visible from C.  */
@@ -110,7 +78,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .p2align align;						\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 /* Define an entry point visible from C with a specified alignment and
@@ -127,7 +95,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .endr;							\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 #undef	END
diff --git a/sysdeps/aarch64/tst-ifunc-arg-1.c b/sysdeps/aarch64/tst-ifunc-arg-1.c
index b90c836..292c5ae 100644
--- a/sysdeps/aarch64/tst-ifunc-arg-1.c
+++ b/sysdeps/aarch64/tst-ifunc-arg-1.c
@@ -57,6 +57,21 @@ do_test (void)
   TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t));
   TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP));
   TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2));
+  TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3));
+  TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4));
+
+  const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2;
+
+  TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr),
+		getauxval (AT_HWCAP));
+  TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP2));
+  TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP3));
+  TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP4));
+
+
   return 0;
 }
 
diff --git a/sysdeps/aarch64/tst-ifunc-arg-2.c b/sysdeps/aarch64/tst-ifunc-arg-2.c
index dac144d..c05129a 100644
--- a/sysdeps/aarch64/tst-ifunc-arg-2.c
+++ b/sysdeps/aarch64/tst-ifunc-arg-2.c
@@ -60,6 +60,20 @@ do_test (void)
   TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t));
   TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP));
   TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2));
+  TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3));
+  TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4));
+
+  const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2;
+
+  TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP));
+  TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP2));
+  TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP3));
+  TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP4));
+
   return 0;
 }
 
diff --git a/sysdeps/aarch64/tst-ifunc-arg-3.c b/sysdeps/aarch64/tst-ifunc-arg-3.c
new file mode 100644
index 0000000..49d8866
--- /dev/null
+++ b/sysdeps/aarch64/tst-ifunc-arg-3.c
@@ -0,0 +1,97 @@
+/* Tests for __ifunc_hwcap helper function.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include <sys/ifunc.h>
+#include <support/check.h>
+
+#define CHECK_VALUES_WITH_ARG(p1, p2, p3, p4) \
+  ({ \
+    TEST_COMPARE (__ifunc_hwcap (0, _IFUNC_ARG_HWCAP, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, _IFUNC_ARG_HWCAP, arg), p1); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, _IFUNC_ARG_HWCAP, arg), p2); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, _IFUNC_ARG_HWCAP, arg), p3); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, _IFUNC_ARG_HWCAP, arg), p4); \
+    TEST_COMPARE (__ifunc_hwcap (5, _IFUNC_ARG_HWCAP, arg), 0); \
+  })
+
+#define CHECK_VALUES_WITHOUT_ARG(p1) \
+  ({ \
+    TEST_COMPARE (__ifunc_hwcap (0, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, p1, arg), p1); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (5, p1, arg), 0); \
+  })
+
+static void
+test_one (const unsigned long *arg)
+{
+  uint64_t size = arg[0] / sizeof (uint64_t);
+
+  switch (size)
+    {
+      case 1:
+	CHECK_VALUES_WITH_ARG (0, 0, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (0);
+	break;
+      case 2:
+	CHECK_VALUES_WITH_ARG (1, 0, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 3:
+	CHECK_VALUES_WITH_ARG (1, 2, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 4:
+	CHECK_VALUES_WITH_ARG (1, 2, 3, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 5:
+	CHECK_VALUES_WITH_ARG (1, 2, 3, 4);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      default:
+	TEST_VERIFY (0); // unexpected size
+	break;
+    }
+}
+
+static int
+do_test (void)
+{
+  uint64_t arg[_IFUNC_HWCAP_MAX + 1] = {
+    0, /* Placeholder for size */
+    _IFUNC_ARG_AT_HWCAP, /* AT_HWCAP */
+    _IFUNC_ARG_AT_HWCAP2, /* AT_HWCAP2 */
+    _IFUNC_ARG_AT_HWCAP3, /* AT_HWCAP3 */
+    _IFUNC_ARG_AT_HWCAP4, /* AT_HWCAP4 */
+  };
+
+  for (int k = 0; k <= _IFUNC_HWCAP_MAX; k++)
+    {
+      /* Update size */
+      arg[0] = (k + 1) * sizeof (uint64_t);
+      test_one (arg);
+    }
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-ifunc-arg-4.c b/sysdeps/aarch64/tst-ifunc-arg-4.c
new file mode 100644
index 0000000..c95ef9e
--- /dev/null
+++ b/sysdeps/aarch64/tst-ifunc-arg-4.c
@@ -0,0 +1,67 @@
+/* Test for ifunc resolver that uses __ifunc_hwcap helper function.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include <sys/ifunc.h>
+#include <support/check.h>
+
+static int
+one (void)
+{
+  return 1;
+}
+
+static int
+two (void)
+{
+  return 2;
+}
+
+/* Resolver function.  */
+static void *
+resolver (uint64_t arg0, const uint64_t arg1[])
+{
+  uint64_t hwcap2 = __ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, arg0, arg1);
+  if (hwcap2 & HWCAP2_POE)
+    return (void *)one;
+  else
+    return (void *)two;
+}
+
+/* An extern visible ifunc symbol.  */
+int fun (void) __attribute__((ifunc ("resolver")));
+
+static int
+do_test (void)
+{
+  if (getauxval (AT_HWCAP2) & HWCAP2_POE)
+    {
+      printf ("using 1st implementation\n");
+      TEST_VERIFY (fun () == 1);
+    }
+  else
+    {
+      printf ("using 2nd implementation\n");
+      TEST_VERIFY (fun () == 2);
+    }
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h
new file mode 100644
index 0000000..f049416
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-helper.h
@@ -0,0 +1,97 @@
+/* Utility functions for SME tests.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Streaming SVE vector register size.  */
+static unsigned long svl;
+
+struct blk {
+  void *za_save_buffer;
+  uint16_t num_za_save_slices;
+  char __reserved[6];
+};
+
+/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
+static unsigned long
+get_svcr (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
+    : "=r" (x0));
+  return x0;
+}
+
+/* Returns tpidr2.  */
+static void *
+get_tpidr2 (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
+    : "=r"(x0) :: "memory");
+  return (void *) x0;
+}
+
+/* Obtains current streaming SVE vector register size.  */
+static unsigned long
+get_svl (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
+    : "=r" (x0));
+  return x0;
+}
+
+/* PSTATE.ZA = 1, set ZA state to active.  */
+static void
+start_za (void)
+{
+  asm volatile (
+    ".inst   0xd503457f  /* smstart za  */");
+}
+
+/* Load data into ZA byte by byte from p.  */
+static void __attribute__ ((noinline))
+load_za (const void *p)
+{
+  register unsigned long x15 asm ("x15") = 0;
+  register unsigned long x16 asm ("x16") = (unsigned long)p;
+  register unsigned long x17 asm ("x17") = svl;
+
+  asm volatile (
+    ".inst   0xd503437f  /* smstart sm  */\n"
+    ".L_ldr_loop:\n"
+    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
+    "add     w15, w15, 1\n"
+    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
+    "cmp     w15, w17\n"
+    "bne     .L_ldr_loop\n"
+    ".inst   0xd503427f  /* smstop  sm  */\n"
+    : "+r"(x15), "+r"(x16), "+r"(x17));
+}
+
+/* Set tpidr2 to BLK.  */
+static void
+set_tpidr2 (struct blk *blk)
+{
+  register unsigned long x0 asm ("x0") = (unsigned long)blk;
+  asm volatile (
+    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
+    :: "r"(x0) : "memory");
+}
diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c
index 62c419f..103897a 100644
--- a/sysdeps/aarch64/tst-sme-jmp.c
+++ b/sysdeps/aarch64/tst-sme-jmp.c
@@ -27,87 +27,12 @@
 #include <support/support.h>
 #include <support/test-driver.h>
 
-struct blk {
-  void *za_save_buffer;
-  uint16_t num_za_save_slices;
-  char __reserved[6];
-};
+#include "tst-sme-helper.h"
 
-static unsigned long svl;
 static uint8_t *za_orig;
 static uint8_t *za_dump;
 static uint8_t *za_save;
 
-static unsigned long
-get_svl (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
-    : "=r" (x0));
-  return x0;
-}
-
-/* PSTATE.ZA = 1, set ZA state to active.  */
-static void
-start_za (void)
-{
-  asm volatile (
-    ".inst   0xd503457f  /* smstart za  */");
-}
-
-/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
-static unsigned long
-get_svcr (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
-    : "=r" (x0));
-  return x0;
-}
-
-/* Load data into ZA byte by byte from p.  */
-static void __attribute__ ((noinline))
-load_za (const void *p)
-{
-  register unsigned long x15 asm ("x15") = 0;
-  register unsigned long x16 asm ("x16") = (unsigned long)p;
-  register unsigned long x17 asm ("x17") = svl;
-
-  asm volatile (
-    ".inst   0xd503437f  /* smstart sm  */\n"
-    ".L_ldr_loop:\n"
-    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
-    "add     w15, w15, 1\n"
-    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
-    "cmp     w15, w17\n"
-    "bne     .L_ldr_loop\n"
-    ".inst   0xd503427f  /* smstop  sm  */\n"
-    : "+r"(x15), "+r"(x16), "+r"(x17));
-}
-
-/* Set tpidr2 to BLK.  */
-static void
-set_tpidr2 (struct blk *blk)
-{
-  register unsigned long x0 asm ("x0") = (unsigned long)blk;
-  asm volatile (
-    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
-    :: "r"(x0) : "memory");
-}
-
-/* Returns tpidr2.  */
-static void *
-get_tpidr2 (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
-    : "=r"(x0) :: "memory");
-  return (void *) x0;
-}
-
 static void
 print_data(const char *msg, void *p)
 {
@@ -168,8 +93,8 @@ longjmp_test (void)
     {
       p = get_tpidr2 ();
       printf ("before longjmp: tp2 = %p\n", p);
-      if (p != &blk)
-	FAIL_EXIT1 ("tpidr2 is clobbered");
+      if (p != NULL)
+	FAIL_EXIT1 ("tpidr2 has not been reset to null");
       do_longjmp (env);
       FAIL_EXIT1 ("longjmp returned");
     }
diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c
new file mode 100644
index 0000000..63f6eeb
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-za-state.c
@@ -0,0 +1,119 @@
+/* Test for SME ZA state being cleared on setjmp and longjmp.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+
+#include "tst-sme-helper.h"
+
+static uint8_t *state;
+
+static void
+enable_sme_za_state (struct blk *ptr)
+{
+  set_tpidr2 (ptr);
+  start_za ();
+  load_za (state);
+}
+
+static void
+check_sme_za_state (const char msg[], bool clear)
+{
+  unsigned long svcr = get_svcr ();
+  void *tpidr2 = get_tpidr2 ();
+  printf ("[%s]\n", msg);
+  printf ("svcr = %016lx\n", svcr);
+  printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
+  if (clear)
+    {
+      TEST_VERIFY (svcr == 0);
+      TEST_VERIFY (tpidr2 == NULL);
+    }
+  else
+    {
+      TEST_VERIFY (svcr != 0);
+      TEST_VERIFY (tpidr2 != NULL);
+    }
+}
+
+static void
+run (struct blk *ptr)
+{
+  jmp_buf buf;
+  int ret;
+
+  check_sme_za_state ("initial state", /* Clear.  */ true);
+
+  /* Enabled ZA state so that effect of disabling be observable.  */
+  enable_sme_za_state (ptr);
+  check_sme_za_state ("before setjmp", /* Clear.  */ false);
+
+  if ((ret = setjmp (buf)) == 0)
+    {
+      check_sme_za_state ("after setjmp", /* Clear.  */ true);
+
+      /* Enabled ZA state so that effect of disabling be observable.  */
+      enable_sme_za_state (ptr);
+      check_sme_za_state ("before longjmp", /* Clear.  */ false);
+
+      longjmp (buf, 42);
+
+      /* Unreachable.  */
+      TEST_VERIFY (false);
+      __builtin_unreachable ();
+    }
+
+  TEST_COMPARE (ret, 42);
+  check_sme_za_state ("after longjmp", /* Clear.  */ true);
+}
+
+static int
+do_test (void)
+{
+  unsigned long hwcap2 = getauxval (AT_HWCAP2);
+  if ((hwcap2 & HWCAP2_SME) == 0)
+    return EXIT_UNSUPPORTED;
+
+  /* Get current streaming SVE vector register size.  */
+  svl = get_svl ();
+  printf ("svl: %lu\n", svl);
+  TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
+
+  /* Initialise buffer for ZA state of SME.  */
+  state = xmalloc (svl * svl);
+  memset (state, 1, svl * svl);
+  struct blk blk = {
+    .za_save_buffer = state,
+    .num_za_save_slices = svl,
+    .__reserved = {0},
+  };
+
+  run (&blk);
+
+  free (state);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/generic/getrandom-internal.h b/sysdeps/generic/getrandom-internal.h
index 7c54194..4872598 100644
--- a/sysdeps/generic/getrandom-internal.h
+++ b/sysdeps/generic/getrandom-internal.h
@@ -19,7 +19,7 @@
 #ifndef _GETRANDOM_INTERNAL_H
 #define _GETRANDOM_INTERNAL_H
 
-static inline void __getrandom_early_init (_Bool)
+static inline void __getrandom_early_init (_Bool initial)
 {
 }
 
diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h
deleted file mode 100644
index b95e409..0000000
--- a/sysdeps/generic/libc-tsd.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* libc-internal interface for thread-specific data.  Stub or TLS version.
-   Copyright (C) 1998-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _GENERIC_LIBC_TSD_H
-#define _GENERIC_LIBC_TSD_H 1
-
-/* This file defines the following macros for accessing a small fixed
-   set of thread-specific `void *' data used only internally by libc.
-
-   __libc_tsd_define(CLASS, TYPE, KEY)	-- Define or declare a datum with TYPE
-					   for KEY.  CLASS can be `static' for
-					   keys used in only one source file,
-					   empty for global definitions, or
-					   `extern' for global declarations.
-   __libc_tsd_address(TYPE, KEY)	-- Return the `TYPE *' pointing to
-					   the current thread's datum for KEY.
-   __libc_tsd_get(TYPE, KEY)		-- Return the `TYPE' datum for KEY.
-   __libc_tsd_set(TYPE, KEY, VALUE)	-- Set the datum for KEY to VALUE.
-
-   The set of available KEY's will usually be provided as an enum,
-   and contains (at least):
-		_LIBC_TSD_KEY_MALLOC
-		_LIBC_TSD_KEY_DL_ERROR
-		_LIBC_TSD_KEY_RPC_VARS
-   All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros.
-   Some implementations may not provide any enum at all and instead
-   using string pasting in the macros.  */
-
-#include <tls.h>
-
-/* When full support for __thread variables is available, this interface is
-   just a trivial wrapper for it.  Without TLS, this is the generic/stub
-   implementation for wholly single-threaded systems.
-
-   We don't define an enum for the possible key values, because the KEYs
-   translate directly into variables by macro magic.  */
-
-#define __libc_tsd_define(CLASS, TYPE, KEY)	\
-  CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie;
-
-#define __libc_tsd_address(TYPE, KEY)		(&__libc_tsd_##KEY)
-#define __libc_tsd_get(TYPE, KEY)		(__libc_tsd_##KEY)
-#define __libc_tsd_set(TYPE, KEY, VALUE)	(__libc_tsd_##KEY = (VALUE))
-
-#endif	/* libc-tsd.h */
diff --git a/sysdeps/generic/sysdep.h b/sysdeps/generic/sysdep.h
index 4c0dda4..ef5eba2 100644
--- a/sysdeps/generic/sysdep.h
+++ b/sysdeps/generic/sysdep.h
@@ -45,6 +45,7 @@
 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
 # define cfi_offset(reg, off)		.cfi_offset reg, off
 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+# define cfi_val_offset(reg, off)	.cfi_val_offset reg, off
 # define cfi_register(r1, r2)		.cfi_register r1, r2
 # define cfi_return_column(reg)	.cfi_return_column reg
 # define cfi_restore(reg)		.cfi_restore reg
@@ -74,6 +75,8 @@
    ".cfi_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
 # define CFI_REL_OFFSET(reg, off) \
    ".cfi_rel_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
+# define CFI_VAL_OFFSET(reg, off) \
+   ".cfi_val_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
 # define CFI_REGISTER(r1, r2) \
    ".cfi_register " CFI_STRINGIFY(r1) "," CFI_STRINGIFY(r2)
 # define CFI_RETURN_COLUMN(reg) \
diff --git a/sysdeps/gnu/netinet/tcp.h b/sysdeps/gnu/netinet/tcp.h
index b2acbb4..7a3500b 100644
--- a/sysdeps/gnu/netinet/tcp.h
+++ b/sysdeps/gnu/netinet/tcp.h
@@ -212,6 +212,9 @@ enum
 # define TCPI_OPT_ECN		8  /* ECN was negotiated at TCP session init */
 # define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
 # define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
+# define TCPI_OPT_USEC_TS	64 /* usec timestamps */
+# define TCPI_OPT_TFO_CHILD	128 /* child from a Fast Open option on SYN */
+
 
 /* Values for tcpi_state.  */
 enum tcp_ca_state
diff --git a/sysdeps/htl/libc-lockP.h b/sysdeps/htl/libc-lockP.h
index 092eb35..e9977e4 100644
--- a/sysdeps/htl/libc-lockP.h
+++ b/sysdeps/htl/libc-lockP.h
@@ -126,15 +126,9 @@ libc_hidden_proto (__pthread_setcancelstate)
    single-threaded processes.  */
 #if !defined(__NO_WEAK_PTHREAD_ALIASES) && !IS_IN (libpthread)
 # ifdef weak_extern
-weak_extern (__pthread_key_create)
-weak_extern (__pthread_setspecific)
-weak_extern (__pthread_getspecific)
 weak_extern (__pthread_initialize)
 weak_extern (__pthread_atfork)
 # else
-#  pragma weak __pthread_key_create
-#  pragma weak __pthread_setspecific
-#  pragma weak __pthread_getspecific
 #  pragma weak __pthread_initialize
 #  pragma weak __pthread_atfork
 # endif
diff --git a/sysdeps/htl/pt-destroy-specific.c b/sysdeps/htl/pt-destroy-specific.c
index e63b807..b5eb0ba 100644
--- a/sysdeps/htl/pt-destroy-specific.c
+++ b/sysdeps/htl/pt-destroy-specific.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 
 #include <pt-internal.h>
+#include <string.h>
 
 void
 __pthread_destroy_specific (struct __pthread *thread)
@@ -100,3 +101,4 @@ __pthread_destroy_specific (struct __pthread *thread)
   memset (&thread->static_thread_specifics, 0,
 	  sizeof (thread->static_thread_specifics));
 }
+libc_hidden_def (__pthread_destroy_specific)
diff --git a/sysdeps/htl/pt-getspecific.c b/sysdeps/htl/pt-getspecific.c
index 0052ce8..d3ebb31 100644
--- a/sysdeps/htl/pt-getspecific.c
+++ b/sysdeps/htl/pt-getspecific.c
@@ -19,6 +19,7 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
 
 void *
 __pthread_getspecific (pthread_key_t key)
@@ -42,5 +43,9 @@ __pthread_getspecific (pthread_key_t key)
 
   return self->thread_specifics[key];
 }
-weak_alias (__pthread_getspecific, pthread_getspecific);
-hidden_def (__pthread_getspecific)
+libc_hidden_def (__pthread_getspecific)
+versioned_symbol (libc, __pthread_getspecific, pthread_getspecific, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_getspecific, pthread_getspecific, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-key-create.c b/sysdeps/htl/pt-key-create.c
index cf8a8d1..92a9db8 100644
--- a/sysdeps/htl/pt-key-create.c
+++ b/sysdeps/htl/pt-key-create.c
@@ -22,6 +22,9 @@
 
 #include <pt-internal.h>
 #include <pthreadP.h>
+#include <shlib-compat.h>
+#include <ldsodefs.h>
+
 
 pthread_mutex_t __pthread_key_lock;
 pthread_once_t __pthread_key_once = PTHREAD_ONCE_INIT;
@@ -116,5 +119,9 @@ do_search:
   __pthread_mutex_unlock (&__pthread_key_lock);
   return 0;
 }
-weak_alias (__pthread_key_create, pthread_key_create)
-hidden_def (__pthread_key_create)
+libc_hidden_def (__pthread_key_create)
+versioned_symbol (libc, __pthread_key_create, pthread_key_create, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_key_create, pthread_key_create, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-key-delete.c b/sysdeps/htl/pt-key-delete.c
index 79879e9..666314f 100644
--- a/sysdeps/htl/pt-key-delete.c
+++ b/sysdeps/htl/pt-key-delete.c
@@ -19,6 +19,8 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
+#include <ldsodefs.h>
 
 int
 __pthread_key_delete (pthread_key_t key)
@@ -69,4 +71,9 @@ __pthread_key_delete (pthread_key_t key)
 
   return err;
 }
-weak_alias (__pthread_key_delete, pthread_key_delete)
+libc_hidden_def (__pthread_key_delete)
+versioned_symbol (libc, __pthread_key_delete, pthread_key_delete, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_key_delete, pthread_key_delete, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-setspecific.c b/sysdeps/htl/pt-setspecific.c
index dfd55b6..0535225 100644
--- a/sysdeps/htl/pt-setspecific.c
+++ b/sysdeps/htl/pt-setspecific.c
@@ -19,6 +19,8 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
+#include <string.h>
 
 int
 __pthread_setspecific (pthread_key_t key, const void *value)
@@ -68,5 +70,9 @@ __pthread_setspecific (pthread_key_t key, const void *value)
   self->thread_specifics[key] = (void *) value;
   return 0;
 }
-weak_alias (__pthread_setspecific, pthread_setspecific);
-hidden_def (__pthread_setspecific)
+libc_hidden_def (__pthread_setspecific)
+versioned_symbol (libc, __pthread_setspecific, pthread_setspecific, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_setspecific, pthread_setspecific, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pthread-functions.h b/sysdeps/htl/pthread-functions.h
index 467d031..31d85cc 100644
--- a/sysdeps/htl/pthread-functions.h
+++ b/sysdeps/htl/pthread-functions.h
@@ -24,9 +24,6 @@
 void __pthread_exit (void *) __attribute__ ((__noreturn__));
 struct __pthread_cancelation_handler **__pthread_get_cleanup_stack (void);
 int __pthread_once (pthread_once_t *, void (*) (void));
-int __pthread_key_create (pthread_key_t *, void (*) (void *));
-void *__pthread_getspecific (pthread_key_t);
-int __pthread_setspecific (pthread_key_t, const void *);
 
 void _cthreads_flockfile (FILE *);
 void _cthreads_funlockfile (FILE *);
@@ -40,9 +37,6 @@ struct pthread_functions
   void (*ptr___pthread_exit) (void *) __attribute__ ((__noreturn__));
   struct __pthread_cancelation_handler **(*ptr___pthread_get_cleanup_stack) (void);
   int (*ptr_pthread_once) (pthread_once_t *, void (*) (void));
-  int (*ptr___pthread_key_create) (pthread_key_t *, void (*) (void *));
-  void *(*ptr___pthread_getspecific) (pthread_key_t);
-  int (*ptr___pthread_setspecific) (pthread_key_t, const void *);
   void (*ptr__IO_flockfile) (FILE *);
   void (*ptr__IO_funlockfile) (FILE *);
   int (*ptr__IO_ftrylockfile) (FILE *);
diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h
index 535deeb..64db024 100644
--- a/sysdeps/htl/pthreadP.h
+++ b/sysdeps/htl/pthreadP.h
@@ -182,9 +182,13 @@ int __cthread_keycreate (__cthread_key_t *);
 int __cthread_getspecific (__cthread_key_t, void **);
 int __cthread_setspecific (__cthread_key_t, void *);
 int __pthread_key_create (pthread_key_t *key, void (*destr) (void *));
+libc_hidden_proto (__pthread_key_create)
 void *__pthread_getspecific (pthread_key_t key);
+libc_hidden_proto (__pthread_getspecific)
 int __pthread_setspecific (pthread_key_t key, const void *value);
+libc_hidden_proto (__pthread_setspecific)
 int __pthread_key_delete (pthread_key_t key);
+libc_hidden_proto (__pthread_key_delete)
 int __pthread_once (pthread_once_t *once_control, void (*init_routine) (void));
 
 int __pthread_getattr_np (pthread_t, pthread_attr_t *);
@@ -216,9 +220,6 @@ libc_hidden_proto (__pthread_condattr_init)
 #if IS_IN (libpthread)
 hidden_proto (__pthread_create)
 hidden_proto (__pthread_detach)
-hidden_proto (__pthread_key_create)
-hidden_proto (__pthread_getspecific)
-hidden_proto (__pthread_setspecific)
 hidden_proto (__pthread_get_cleanup_stack)
 #endif
 
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
index a2e8c0b..ee6470d 100644
--- a/sysdeps/i386/Makefile
+++ b/sysdeps/i386/Makefile
@@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double
 endif
 
 ifeq ($(subdir),elf)
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += \
+  dl-tls-get-addr \
+# sysdep-dl-routines
 
 tests += tst-audit3
 modules-names += tst-auditmod3a tst-auditmod3b
diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c
new file mode 100644
index 0000000..c97e5c5
--- /dev/null
+++ b/sysdeps/i386/dl-tls-get-addr.c
@@ -0,0 +1,68 @@
+/* Ifunc selector for ___tls_get_addr.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+# define ___tls_get_addr __redirect____tls_get_addr
+# include <dl-tls.h>
+# undef ___tls_get_addr
+# undef __tls_get_addr
+
+# define SYMBOL_NAME ___tls_get_addr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (cpu_features->xsave_state_size != 0)
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+	return OPTIMIZE (xsavec);
+      else
+	return OPTIMIZE (xsave);
+    }
+  else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+    return OPTIMIZE (fxsave);
+  return OPTIMIZE (fnsave);
+}
+
+libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr,
+		       IFUNC_SELECTOR ());
+
+/* The special thing about the x86 TLS ABI is that we have two
+   variants of the __tls_get_addr function with different calling
+   conventions.  The GNU version, which we are mostly concerned here,
+   takes the parameter in a register.  The name is changed by adding
+   an additional underscore at the beginning.  The Sun version uses
+   the normal calling convention.  */
+
+rtld_hidden_proto (___tls_get_addr)
+rtld_hidden_def (___tls_get_addr)
+
+void *
+__tls_get_addr (tls_index *ti)
+{
+  return ___tls_get_addr (ti);
+}
+#endif
diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h
index f453931..ef605c5 100644
--- a/sysdeps/i386/dl-tls.h
+++ b/sysdeps/i386/dl-tls.h
@@ -37,34 +37,14 @@ typedef struct dl_tls_index
 /* This is the prototype for the GNU version.  */
 extern void *___tls_get_addr (tls_index *ti)
      __attribute__ ((__regparm__ (1)));
-extern void *___tls_get_addr_internal (tls_index *ti)
-     __attribute__ ((__regparm__ (1))) attribute_hidden;
-
 # if IS_IN (rtld)
-/* The special thing about the x86 TLS ABI is that we have two
-   variants of the __tls_get_addr function with different calling
-   conventions.  The GNU version, which we are mostly concerned here,
-   takes the parameter in a register.  The name is changed by adding
-   an additional underscore at the beginning.  The Sun version uses
-   the normal calling convention.  */
-void *
-__tls_get_addr (tls_index *ti)
-{
-  return ___tls_get_addr_internal (ti);
-}
-
-
 /* Prepare using the definition of __tls_get_addr in the generic
    version of this file.  */
-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
-strong_alias (___tls_get_addr, ___tls_get_addr_internal)
-rtld_hidden_proto (___tls_get_addr)
-rtld_hidden_def (___tls_get_addr)
-#else
-
+# define __tls_get_addr \
+    __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal
+# else
 /* Users should get the better interface.  */
-# define __tls_get_addr ___tls_get_addr
-
+#  define __tls_get_addr ___tls_get_addr
 # endif
 #endif
 
diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
index 6aec06d..be9ecd6 100644
--- a/sysdeps/i386/dl-tlsdesc-dynamic.h
+++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
@@ -16,34 +16,6 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#undef REGISTER_SAVE_AREA
-
-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
-# error STATE_SAVE_ALIGNMENT must be multiple of 16
-#endif
-
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-# ifdef USE_FNSAVE
-#  error USE_FNSAVE shouldn't be defined
-# endif
-# ifdef USE_FXSAVE
-/* Use fxsave to save all registers.  */
-#  define REGISTER_SAVE_AREA	512
-# endif
-#else
-# ifdef USE_FNSAVE
-/* Use fnsave to save x87 FPU stack registers.  */
-#  define REGISTER_SAVE_AREA	108
-# else
-#  ifndef USE_FXSAVE
-#   error USE_FXSAVE must be defined
-#  endif
-/* Use fxsave to save all registers.  Add 12 bytes to align the stack
-   to 16 bytes.  */
-#  define REGISTER_SAVE_AREA	(512 + 12)
-# endif
-#endif
-
 	.hidden _dl_tlsdesc_dynamic
 	.global	_dl_tlsdesc_dynamic
 	.type	_dl_tlsdesc_dynamic,@function
@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic:
 	ret
 	.p2align 4,,7
 2:
-	cfi_adjust_cfa_offset (32)
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-	movl	%ebx, -28(%esp)
-	movl	%esp, %ebx
-	cfi_def_cfa_register(%ebx)
-	and	$-STATE_SAVE_ALIGNMENT, %esp
-#endif
-#ifdef REGISTER_SAVE_AREA
-	subl	$REGISTER_SAVE_AREA, %esp
-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
-	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
-# endif
-#else
-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
-#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
-# endif
-	/* Allocate stack space of the required size to save the state.  */
-	LOAD_PIC_REG (cx)
-	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
-#endif
-#ifdef USE_FNSAVE
-	fnsave	(%esp)
-#elif defined USE_FXSAVE
-	fxsave	(%esp)
-#else
-	/* Save the argument for ___tls_get_addr in EAX.  */
-	movl	%eax, %ecx
-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
-	xorl	%edx, %edx
-	/* Clear the XSAVE Header.  */
-# ifdef USE_XSAVE
-	movl	%edx, (512)(%esp)
-	movl	%edx, (512 + 4 * 1)(%esp)
-	movl	%edx, (512 + 4 * 2)(%esp)
-	movl	%edx, (512 + 4 * 3)(%esp)
-# endif
-	movl	%edx, (512 + 4 * 4)(%esp)
-	movl	%edx, (512 + 4 * 5)(%esp)
-	movl	%edx, (512 + 4 * 6)(%esp)
-	movl	%edx, (512 + 4 * 7)(%esp)
-	movl	%edx, (512 + 4 * 8)(%esp)
-	movl	%edx, (512 + 4 * 9)(%esp)
-	movl	%edx, (512 + 4 * 10)(%esp)
-	movl	%edx, (512 + 4 * 11)(%esp)
-	movl	%edx, (512 + 4 * 12)(%esp)
-	movl	%edx, (512 + 4 * 13)(%esp)
-	movl	%edx, (512 + 4 * 14)(%esp)
-	movl	%edx, (512 + 4 * 15)(%esp)
-# ifdef USE_XSAVE
-	xsave	(%esp)
-# else
-	xsavec	(%esp)
-# endif
-	/* Restore the argument for ___tls_get_addr in EAX.  */
-	movl	%ecx, %eax
-#endif
-	call	HIDDEN_JUMPTARGET (___tls_get_addr)
-	/* Get register content back.  */
-#ifdef USE_FNSAVE
-	frstor	(%esp)
-#elif defined USE_FXSAVE
-	fxrstor	(%esp)
-#else
-	/* Save and retore ___tls_get_addr return value stored in EAX.  */
-	movl	%eax, %ecx
-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
-	xorl	%edx, %edx
-	xrstor	(%esp)
-	movl	%ecx, %eax
-#endif
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-	mov	%ebx, %esp
-	cfi_def_cfa_register(%esp)
-	movl	-28(%esp), %ebx
-	cfi_restore(%ebx)
-#else
-	addl	$REGISTER_SAVE_AREA, %esp
-	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
-#endif
+#include "tls-get-addr-wrapper.h"
 	jmp	1b
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
index c080993..c914ca4 100644
--- a/sysdeps/i386/dl-tlsdesc.S
+++ b/sysdeps/i386/dl-tlsdesc.S
@@ -22,23 +22,6 @@
 #include <features-offsets.h>
 #include "tlsdesc.h"
 
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
-
-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-
-   __tls_get_addr may be called with 4-byte stack alignment.  Although
-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
-   that stack will be always aligned at 16 bytes.  */
-# define DL_STACK_ALIGNMENT 4
-#endif
-
-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
-   stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr.  */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
-   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
-
 	.text
 
      /* This function is used to compute the TP offset for symbols in
diff --git a/sysdeps/i386/fpu/e_ilogb.S b/sysdeps/i386/fpu/e_ilogb.S
deleted file mode 100644
index f4b792c..0000000
--- a/sysdeps/i386/fpu/e_ilogb.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $")
-
-ENTRY(__ieee754_ilogb)
-	fldl	4(%esp)
-/* I added the following ugly construct because ilogb(+-Inf) is
-   required to return INT_MAX in ISO C99.
-   -- jakub@redhat.com.  */
-	fxam			/* Is NaN or +-Inf?  */
-	fstsw   %ax
-	movb    $0x45, %dh
-	andb    %ah, %dh
-	cmpb    $0x05, %dh
-	je      1f		/* Is +-Inf, jump.  */
-	cmpb    $0x40, %dh
-	je      2f		/* Is +-0, jump.  */
-
-	fxtract
-	pushl	%eax
-	cfi_adjust_cfa_offset (4)
-	fstp	%st
-
-	fistpl	(%esp)
-	fwait
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-
-	ret
-
-1:	fstp	%st
-	movl	$0x7fffffff, %eax
-	ret
-2:	fstp	%st
-	movl	$0x80000000, %eax	/* FP_ILOGB0  */
-	ret
-END (__ieee754_ilogb)
diff --git a/sysdeps/i386/fpu/e_ilogbf.S b/sysdeps/i386/fpu/e_ilogbf.S
deleted file mode 100644
index 37298b9..0000000
--- a/sysdeps/i386/fpu/e_ilogbf.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $")
-
-ENTRY(__ieee754_ilogbf)
-	flds	4(%esp)
-/* I added the following ugly construct because ilogb(+-Inf) is
-   required to return INT_MAX in ISO C99.
-   -- jakub@redhat.com.  */
-	fxam			/* Is NaN or +-Inf?  */
-	fstsw   %ax
-	movb    $0x45, %dh
-	andb    %ah, %dh
-	cmpb    $0x05, %dh
-	je      1f		/* Is +-Inf, jump.  */
-	cmpb    $0x40, %dh
-	je      2f		/* Is +-0, jump.  */
-
-	fxtract
-	pushl	%eax
-	cfi_adjust_cfa_offset (4)
-	fstp	%st
-
-	fistpl	(%esp)
-	fwait
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-
-	ret
-
-1:	fstp	%st
-	movl	$0x7fffffff, %eax
-	ret
-2:	fstp	%st
-	movl	$0x80000000, %eax	/* FP_ILOGB0  */
-	ret
-END (__ieee754_ilogbf)
diff --git a/sysdeps/i386/fpu/math_err.c b/sysdeps/i386/fpu/math_err.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/math_err.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h
new file mode 100644
index 0000000..0708e5a
--- /dev/null
+++ b/sysdeps/i386/tls-get-addr-wrapper.h
@@ -0,0 +1,127 @@
+/* Wrapper of i386 ___tls_get_addr to save and restore vector registers.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#undef REGISTER_SAVE_AREA
+
+#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multiple of 16
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# ifdef USE_FNSAVE
+#  error USE_FNSAVE shouldn't be defined
+# endif
+# ifdef USE_FXSAVE
+/* Use fxsave to save all registers.  */
+#  define REGISTER_SAVE_AREA	512
+# endif
+#else
+# ifdef USE_FNSAVE
+/* Use fnsave to save x87 FPU stack registers.  */
+#  define REGISTER_SAVE_AREA	108
+# else
+#  ifndef USE_FXSAVE
+#   error USE_FXSAVE must be defined
+#  endif
+/* Use fxsave to save all registers.  Add 12 bytes to align the stack
+   to 16 bytes.  */
+#  define REGISTER_SAVE_AREA	(512 + 12)
+# endif
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	movl	%ebx, 28(%esp)
+	movl	%esp, %ebx
+	cfi_def_cfa_register(%ebx)
+	and	$-STATE_SAVE_ALIGNMENT, %esp
+#endif
+#ifdef REGISTER_SAVE_AREA
+	subl	$REGISTER_SAVE_AREA, %esp
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+#else
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
+# endif
+	/* Allocate stack space of the required size to save the state.  */
+	LOAD_PIC_REG (cx)
+	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \
+		+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
+#endif
+#ifdef USE_FNSAVE
+	fnsave	(%esp)
+#elif defined USE_FXSAVE
+	fxsave	(%esp)
+#else
+	/* Save the argument for ___tls_get_addr in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	/* Clear the XSAVE Header.  */
+# ifdef USE_XSAVE
+	movl	%edx, (512)(%esp)
+	movl	%edx, (512 + 4 * 1)(%esp)
+	movl	%edx, (512 + 4 * 2)(%esp)
+	movl	%edx, (512 + 4 * 3)(%esp)
+# endif
+	movl	%edx, (512 + 4 * 4)(%esp)
+	movl	%edx, (512 + 4 * 5)(%esp)
+	movl	%edx, (512 + 4 * 6)(%esp)
+	movl	%edx, (512 + 4 * 7)(%esp)
+	movl	%edx, (512 + 4 * 8)(%esp)
+	movl	%edx, (512 + 4 * 9)(%esp)
+	movl	%edx, (512 + 4 * 10)(%esp)
+	movl	%edx, (512 + 4 * 11)(%esp)
+	movl	%edx, (512 + 4 * 12)(%esp)
+	movl	%edx, (512 + 4 * 13)(%esp)
+	movl	%edx, (512 + 4 * 14)(%esp)
+	movl	%edx, (512 + 4 * 15)(%esp)
+# ifdef USE_XSAVE
+	xsave	(%esp)
+# else
+	xsavec	(%esp)
+# endif
+	/* Restore the argument for ___tls_get_addr in EAX.  */
+	movl	%ecx, %eax
+#endif
+	call	___tls_get_addr_internal
+	/* Get register content back.  */
+#ifdef USE_FNSAVE
+	frstor	(%esp)
+#elif defined USE_FXSAVE
+	fxrstor	(%esp)
+#else
+	/* Save and retore ___tls_get_addr return value stored in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	xrstor	(%esp)
+	movl	%ecx, %eax
+#endif
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	mov	%ebx, %esp
+	cfi_def_cfa_register(%esp)
+	movl	28(%esp), %ebx
+	cfi_restore(%ebx)
+#else
+	addl	$REGISTER_SAVE_AREA, %esp
+	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+#endif
+
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S
new file mode 100644
index 0000000..7d143d8
--- /dev/null
+++ b/sysdeps/i386/tls_get_addr.S
@@ -0,0 +1,57 @@
+/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include <cpu-features-offsets.h>
+#include <features-offsets.h>
+
+	.text
+#ifdef SHARED
+# define USE_FNSAVE
+# define MINIMUM_ALIGNMENT	4
+# define STATE_SAVE_ALIGNMENT	4
+# define ___tls_get_addr	_____tls_get_addr_fnsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef MINIMUM_ALIGNMENT
+# undef USE_FNSAVE
+
+# define MINIMUM_ALIGNMENT	16
+
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT	16
+# define ___tls_get_addr	_____tls_get_addr_fxsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_FXSAVE
+
+# define USE_XSAVE
+# define STATE_SAVE_ALIGNMENT	64
+# define ___tls_get_addr	_____tls_get_addr_xsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_XSAVE
+
+# define USE_XSAVEC
+# define STATE_SAVE_ALIGNMENT	64
+# define ___tls_get_addr	_____tls_get_addr_xsavec
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_XSAVEC
+#endif /* SHARED */
diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h
new file mode 100644
index 0000000..1825798
--- /dev/null
+++ b/sysdeps/i386/tls_get_addr.h
@@ -0,0 +1,42 @@
+/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+	.hidden ___tls_get_addr
+	.global	___tls_get_addr
+	.type	___tls_get_addr,@function
+
+	/* This function is a wrapper of ___tls_get_addr_internal to
+	   preserve caller-saved vector registers.  */
+
+	cfi_startproc
+	.align 16
+___tls_get_addr:
+	/* Like all TLS resolvers, preserve call-clobbered registers.
+	   We need two scratch regs anyway.  */
+	subl	$32, %esp
+	cfi_adjust_cfa_offset (32)
+	movl	%ecx, 20(%esp)
+	movl	%edx, 24(%esp)
+#include "tls-get-addr-wrapper.h"
+	movl	20(%esp), %ecx
+	movl	24(%esp), %edx
+	addl	$32, %esp
+	cfi_adjust_cfa_offset (-32)
+	ret
+	cfi_endproc
+	.size	___tls_get_addr, .-___tls_get_addr
diff --git a/sysdeps/ieee754/dbl-64/e_ilogb.c b/sysdeps/ieee754/dbl-64/e_ilogb.c
index 1e338a5..1ea2f23 100644
--- a/sysdeps/ieee754/dbl-64/e_ilogb.c
+++ b/sysdeps/ieee754/dbl-64/e_ilogb.c
@@ -1,63 +1 @@
-/* @(#)s_ilogb.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ilogb.c,v 1.9 1995/05/10 20:47:28 jtc Exp $";
-#endif
-
-/* ilogb(double x)
- * return the binary exponent of non-zero x
- * ilogb(0) = FP_ILOGB0
- * ilogb(NaN) = FP_ILOGBNAN (no signal is raised)
- * ilogb(+-Inf) = INT_MAX (no signal is raised)
- */
-
-#include <limits.h>
-#include <math.h>
-#include <math_private.h>
-
-int
-__ieee754_ilogb (double x)
-{
-  int32_t hx, lx, ix;
-
-  GET_HIGH_WORD (hx, x);
-  hx &= 0x7fffffff;
-  if (hx < 0x00100000)
-    {
-      GET_LOW_WORD (lx, x);
-      if ((hx | lx) == 0)
-	return FP_ILOGB0;               /* ilogb(0) = FP_ILOGB0 */
-      else                              /* subnormal x */
-      if (hx == 0)
-	{
-	  for (ix = -1043; lx > 0; lx <<= 1)
-	    ix -= 1;
-	}
-      else
-	{
-	  for (ix = -1022, hx <<= 11; hx > 0; hx <<= 1)
-	    ix -= 1;
-	}
-      return ix;
-    }
-  else if (hx < 0x7ff00000)
-    return (hx >> 20) - 1023;
-  else if (FP_ILOGBNAN != INT_MAX)
-    {
-      /* ISO C99 requires ilogb(+-Inf) == INT_MAX.  */
-      GET_LOW_WORD (lx, x);
-      if (((hx ^ 0x7ff00000) | lx) == 0)
-	return INT_MAX;
-    }
-  return FP_ILOGBNAN;
-}
+/* ilogb is implemented at w_ilogb.c  */
diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
index 3382e38..d9288c4 100644
--- a/sysdeps/ieee754/dbl-64/math_config.h
+++ b/sysdeps/ieee754/dbl-64/math_config.h
@@ -109,6 +109,7 @@ issignaling_inline (double x)
 #define BIT_WIDTH       64
 #define MANTISSA_WIDTH  52
 #define EXPONENT_WIDTH  11
+#define EXPONENT_BIAS   1023
 #define MANTISSA_MASK   UINT64_C(0x000fffffffffffff)
 #define EXPONENT_MASK   UINT64_C(0x7ff0000000000000)
 #define EXP_MANT_MASK   UINT64_C(0x7fffffffffffffff)
@@ -121,12 +122,24 @@ is_nan (uint64_t x)
   return (x & EXP_MANT_MASK) > EXPONENT_MASK;
 }
 
+static inline bool
+is_inf (uint64_t x)
+{
+  return (x << 1) == (EXPONENT_MASK << 1);
+}
+
 static inline uint64_t
 get_mantissa (uint64_t x)
 {
   return x & MANTISSA_MASK;
 }
 
+static inline int
+get_exponent (uint64_t x)
+{
+  return (int)((x >> MANTISSA_WIDTH & 0x7ff) - EXPONENT_BIAS);
+}
+
 /* Convert integer number X, unbiased exponent EP, and sign S to double:
 
    result = X * 2^(EP+1 - exponent_bias)
@@ -164,6 +177,8 @@ attribute_hidden double __math_divzero (uint32_t);
 
 /* Invalid input unless it is a quiet NaN.  */
 attribute_hidden double __math_invalid (double);
+attribute_hidden int __math_invalid_i (int);
+attribute_hidden long int __math_invalid_li (long int);
 
 /* Error handling using output checking, only for errno setting.  */
 
diff --git a/sysdeps/ieee754/dbl-64/math_err.c b/sysdeps/ieee754/dbl-64/math_err.c
index 4a07fd5..b8c645a 100644
--- a/sysdeps/ieee754/dbl-64/math_err.c
+++ b/sysdeps/ieee754/dbl-64/math_err.c
@@ -29,8 +29,24 @@ with_errno (double y, int e)
   errno = e;
   return y;
 }
+
+NOINLINE static int
+with_errno_i (int y, int e)
+{
+  errno = e;
+  return y;
+}
+
+NOINLINE static long int
+with_errno_li (long int y, int e)
+{
+  errno = e;
+  return y;
+}
 #else
 #define with_errno(x, e) (x)
+#define with_errno_i(x, e) (x)
+#define with_errno_li(x, e) (x)
 #endif
 
 attribute_hidden double
@@ -83,6 +99,22 @@ __math_invalid (double x)
   return isnan (x) ? y : with_errno (y, EDOM);
 }
 
+attribute_hidden int
+__math_invalid_i (int r)
+{
+  double y = 0.0 / 0.0;
+  math_force_eval (y);
+  return with_errno_i (r, EDOM);
+}
+
+attribute_hidden long int
+__math_invalid_li (long int r)
+{
+  double y = 0.0 / 0.0;
+  math_force_eval (y);
+  return with_errno_li (r, EDOM);
+}
+
 /* Check result and set errno if necessary.  */
 
 attribute_hidden double
diff --git a/sysdeps/ieee754/dbl-64/s_modf.c b/sysdeps/ieee754/dbl-64/s_modf.c
index 0de2084..90cd8e8 100644
--- a/sysdeps/ieee754/dbl-64/s_modf.c
+++ b/sysdeps/ieee754/dbl-64/s_modf.c
@@ -1,63 +1,68 @@
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+/* Extract signed integral and fractional values.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-/*
- * modf(double x, double *iptr)
- * return fraction part of x, and return x's integral part in *iptr.
- * Method:
- *	Bit twiddling.
- *
- * Exception:
- *	No exception.
- */
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
 #include <libm-alias-double.h>
-#include <stdint.h>
-
-static const double one = 1.0;
+#include "math_config.h"
+#include <math-use-builtins-trunc.h>
 
 double
-__modf(double x, double *iptr)
+__modf (double x, double *iptr)
 {
-	int64_t i0;
-	int32_t j0;
-	EXTRACT_WORDS64(i0,x);
-	j0 = ((i0>>52)&0x7ff)-0x3ff;	/* exponent of x */
-	if(j0<52) {			/* integer part in x */
-	    if(j0<0) {			/* |x|<1 */
-		/* *iptr = +-0 */
-		INSERT_WORDS64(*iptr,i0&UINT64_C(0x8000000000000000));
-		return x;
-	    } else {
-		uint64_t i = UINT64_C(0x000fffffffffffff)>>j0;
-		if((i0&i)==0) {		/* x is integral */
-		    *iptr = x;
-		    /* return +-0 */
-		    INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000));
-		    return x;
-		} else {
-		    INSERT_WORDS64(*iptr,i0&(~i));
-		    return x - *iptr;
-		}
-	    }
-	} else { /* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x400 && (i0 & UINT64_C(0xfffffffffffff)))
-	      return x*one;
-	    INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000));	/* return +-0 */
-	    return x;
+  uint64_t t = asuint64 (x);
+#if USE_TRUNC_BUILTIN
+  if (is_inf (t))
+    {
+      *iptr = x;
+      return copysign (0.0, x);
+    }
+  *iptr = trunc (x);
+  return copysign (x - *iptr, x);
+#else
+  int e = get_exponent (t);
+  /* No fraction part.  */
+  if (e < MANTISSA_WIDTH)
+    {
+      if (e < 0)
+	{
+	  /* |x|<1 -> *iptr = +-0 */
+	  *iptr = asdouble (t & SIGN_MASK);
+	  return x;
+	}
+
+      uint64_t i = MANTISSA_MASK >> e;
+      if ((t & i) == 0)
+	{
+	  /* x in integral, return +-0  */
+	  *iptr = x;
+	  return asdouble (t & SIGN_MASK);
 	}
+
+      *iptr = asdouble (t & ~i);
+      return x - *iptr;
+    }
+
+  /* Set invalid operation for sNaN.  */
+  *iptr = x * 1.0;
+  if ((e == 0x400) && (t & MANTISSA_MASK))
+    return *iptr;
+  return asdouble (t & SIGN_MASK);
+#endif
 }
 #ifndef __modf
 libm_alias_double (__modf, modf)
diff --git a/sysdeps/ieee754/dbl-64/w_ilogb-impl.h b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h
new file mode 100644
index 0000000..c919735
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h
@@ -0,0 +1,37 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+static inline RET_TYPE
+IMPL_NAME (double x)
+{
+  uint64_t ux = asuint64 (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (__glibc_unlikely (ex == 0)) /* zero or subnormal */
+    {
+      /* Clear sign and exponent */
+      ux <<= 12;
+      if (ux == 0)
+	return RET_INVALID (RET_LOGB0);
+      /* subnormal  */
+      return (RET_TYPE)-1023 - stdc_leading_zeros (ux);
+    }
+  if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH))
+    /* NaN or Inf */
+    return RET_INVALID (ux << 12 ? RET_LOGBNAN : RET_LOGMAX);
+  return ex - 1023;
+}
diff --git a/sysdeps/ieee754/dbl-64/w_ilogb.c b/sysdeps/ieee754/dbl-64/w_ilogb.c
new file mode 100644
index 0000000..e460f14
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_ilogb.c
@@ -0,0 +1,52 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include <libm-alias-double.h>
+#include "math_config.h"
+
+#ifdef DEF_AS_LLOGB
+# define DECL_NAME   __llogb
+# define FUNC_NAME   llogb
+# define RET_TYPE    long int
+# define RET_LOGB0   FP_LLOGB0
+# define RET_LOGBNAN FP_LLOGBNAN
+# define RET_LOGMAX  LONG_MAX
+# define RET_INVALID __math_invalid_li
+#else
+# define DECL_NAME   __ilogb
+# define FUNC_NAME   ilogb
+# define RET_TYPE    int
+# define RET_LOGB0   FP_ILOGB0
+# define RET_LOGBNAN FP_ILOGBNAN
+# define RET_LOGMAX  INT_MAX
+# define RET_INVALID __math_invalid_i
+#endif
+#define __IMPL_NAME(x,y) x ## _ ## y
+#define _IMPL_NAME(x,y)  __IMPL_NAME(x,y)
+#define IMPL_NAME        _IMPL_NAME(FUNC_NAME, impl)
+#include <w_ilogb-impl.h>
+
+RET_TYPE
+DECL_NAME (double x)
+{
+  return IMPL_NAME (x);
+}
+libm_alias_double (DECL_NAME, FUNC_NAME)
diff --git a/sysdeps/ieee754/dbl-64/w_llogb.c b/sysdeps/ieee754/dbl-64/w_llogb.c
new file mode 100644
index 0000000..c984cd15
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_llogb.c
@@ -0,0 +1,2 @@
+#define DEF_AS_LLOGB
+#include "w_ilogb.c"
diff --git a/sysdeps/ieee754/flt-32/e_ilogbf.c b/sysdeps/ieee754/flt-32/e_ilogbf.c
index db24012..a27fb94 100644
--- a/sysdeps/ieee754/flt-32/e_ilogbf.c
+++ b/sysdeps/ieee754/flt-32/e_ilogbf.c
@@ -1,43 +1 @@
-/* s_ilogbf.c -- float version of s_ilogb.c.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ilogbf.c,v 1.4 1995/05/10 20:47:31 jtc Exp $";
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <math_private.h>
-
-int __ieee754_ilogbf(float x)
-{
-	int32_t hx,ix;
-
-	GET_FLOAT_WORD(hx,x);
-	hx &= 0x7fffffff;
-	if(hx<0x00800000) {
-	    if(hx==0)
-		return FP_ILOGB0;	/* ilogb(0) = FP_ILOGB0 */
-	    else			/* subnormal x */
-	        for (ix = -126,hx<<=8; hx>0; hx<<=1) ix -=1;
-	    return ix;
-	}
-	else if (hx<0x7f800000) return (hx>>23)-127;
-	else if (FP_ILOGBNAN != INT_MAX) {
-	    /* ISO C99 requires ilogbf(+-Inf) == INT_MAX.  */
-	    if (hx==0x7f800000)
-		return INT_MAX;
-	}
-	return FP_ILOGBNAN;
-}
+/* ilogbf is implemented at w_ilogbf.c  */
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 8d9c8ee..33ea631 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -165,6 +165,7 @@ issignalingf_inline (float x)
 #define BIT_WIDTH       32
 #define MANTISSA_WIDTH  23
 #define EXPONENT_WIDTH  8
+#define EXPONENT_BIAS   127
 #define MANTISSA_MASK   0x007fffff
 #define EXPONENT_MASK   0x7f800000
 #define EXP_MANT_MASK   0x7fffffff
@@ -177,12 +178,24 @@ is_nan (uint32_t x)
   return (x & EXP_MANT_MASK) > EXPONENT_MASK;
 }
 
+static inline bool
+is_inf (uint32_t x)
+{
+  return (x << 1) == (EXPONENT_MASK << 1);
+}
+
 static inline uint32_t
 get_mantissa (uint32_t x)
 {
   return x & MANTISSA_MASK;
 }
 
+static inline int
+get_exponent (uint32_t x)
+{
+  return (int)((x >> MANTISSA_WIDTH & 0xff) - EXPONENT_BIAS);
+}
+
 /* Convert integer number X, unbiased exponent EP, and sign S to double:
 
    result = X * 2^(EP+1 - exponent_bias)
@@ -208,6 +221,8 @@ attribute_hidden float __math_uflowf (uint32_t);
 attribute_hidden float __math_may_uflowf (uint32_t);
 attribute_hidden float __math_divzerof (uint32_t);
 attribute_hidden float __math_invalidf (float);
+attribute_hidden int __math_invalidf_i (int);
+attribute_hidden long int __math_invalidf_li (long int);
 attribute_hidden float __math_edomf (float x);
 
 /* Shared between expf, exp2f, exp10f, and powf.  */
diff --git a/sysdeps/ieee754/flt-32/math_errf.c b/sysdeps/ieee754/flt-32/math_errf.c
index edcc4c0..244e38a 100644
--- a/sysdeps/ieee754/flt-32/math_errf.c
+++ b/sysdeps/ieee754/flt-32/math_errf.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <math-barriers.h>
 #include "math_config.h"
 
 #if WANT_ERRNO
@@ -27,8 +28,24 @@ with_errnof (float y, int e)
   errno = e;
   return y;
 }
+
+NOINLINE static int
+with_errnof_i (int y, int e)
+{
+  errno = e;
+  return y;
+}
+
+NOINLINE static long int
+with_errnof_li (long int y, int e)
+{
+  errno = e;
+  return y;
+}
 #else
 # define with_errnof(x, e) (x)
+# define with_errnof_i(x, x) (x)
+# define with_errnof_li(x, x) (x)
 #endif
 
 attribute_hidden float
@@ -80,3 +97,19 @@ __math_invalidf (float x)
   float y = (x - x) / (x - x);
   return isnan (x) ? y : with_errnof (y, EDOM);
 }
+
+attribute_hidden int
+__math_invalidf_i (int x)
+{
+  float y = 0.0f / 0.0f;
+  math_force_eval (y);
+  return with_errnof_i (x, EDOM);
+}
+
+attribute_hidden long int
+__math_invalidf_li (long int x)
+{
+  float y = 0.0f / 0.0f;
+  math_force_eval (y);
+  return with_errnof_li (x, EDOM);
+}
diff --git a/sysdeps/ieee754/flt-32/s_modff.c b/sysdeps/ieee754/flt-32/s_modff.c
index ad2e91d..965136b 100644
--- a/sysdeps/ieee754/flt-32/s_modff.c
+++ b/sysdeps/ieee754/flt-32/s_modff.c
@@ -1,54 +1,69 @@
-/* s_modff.c -- float version of s_modf.c.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+/* Extract signed integral and fractional values.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
 #include <libm-alias-float.h>
-
-static const float one = 1.0;
+#include "math_config.h"
+#include <math-use-builtins-trunc.h>
 
 float
-__modff(float x, float *iptr)
+__modff (float x, float *iptr)
 {
-	int32_t i0,j0;
-	uint32_t i;
-	GET_FLOAT_WORD(i0,x);
-	j0 = ((i0>>23)&0xff)-0x7f;	/* exponent of x */
-	if(__builtin_expect(j0<23, 1)) {		/* integer part in x */
-	    if(j0<0) {			/* |x|<1 */
-		SET_FLOAT_WORD(*iptr,i0&0x80000000);	/* *iptr = +-0 */
-		return x;
-	    } else {
-		i = (0x007fffff)>>j0;
-		if((i0&i)==0) {			/* x is integral */
-		    uint32_t ix;
-		    *iptr = x;
-		    GET_FLOAT_WORD(ix,x);
-		    SET_FLOAT_WORD(x,ix&0x80000000);	/* return +-0 */
-		    return x;
-		} else {
-		    SET_FLOAT_WORD(*iptr,i0&(~i));
-		    return x - *iptr;
-		}
-	    }
-	} else {			/* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x80 && (i0 & 0x7fffff))
-	      return x*one;
-	    SET_FLOAT_WORD(x,i0&0x80000000);	/* return +-0 */
-	    return x;
+  uint32_t t = asuint (x);
+#if USE_TRUNCF_BUILTIN
+  if (is_inf (t))
+    {
+      *iptr = x;
+      return copysignf (0.0, x);
+    }
+  *iptr = truncf (x);
+  return copysignf (x - *iptr, x);
+#else
+  int e = get_exponent (t);
+  /* No fraction part.  */
+  if (e < MANTISSA_WIDTH)
+    {
+      if (e < 0)
+	{
+	  /* |x|<1 -> *iptr = +-0 */
+	  *iptr = asfloat (t & SIGN_MASK);
+	  return x;
 	}
+
+      uint32_t i = MANTISSA_MASK >> e;
+      if ((t & i) == 0)
+	{
+	  /* x in integral, return +-0  */
+	  *iptr = x;
+	  return asfloat (t & SIGN_MASK);
+	}
+
+      *iptr = asfloat (t & ~i);
+      return x - *iptr;
+    }
+
+  /* Set invalid operation for sNaN.  */
+  *iptr = x * 1.0f;
+  if ((e == 0x80) && (t & MANTISSA_MASK))
+    return *iptr;
+  return asfloat (t & SIGN_MASK);
+#endif
 }
+#ifndef __modff
 libm_alias_float (__modf, modf)
+#endif
diff --git a/sysdeps/ieee754/flt-32/w_ilogbf-impl.h b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h
new file mode 100644
index 0000000..5aa8bf0
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h
@@ -0,0 +1,38 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+static inline RET_TYPE
+IMPL_NAME (float x)
+{
+  uint32_t ux = asuint (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (__glibc_unlikely (ex == 0))
+    {
+      /* Zero or subnormal.
+         Clear sign and exponent.  */
+      ux <<= 1 + EXPONENT_WIDTH;
+      if (ux == 0)
+	return RET_INVALID (RET_LOGB0);
+      /* subnormal */
+      return (RET_TYPE)-127 - stdc_leading_zeros (ux);
+    }
+  if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH))
+    /* NaN or Inf */
+    return RET_INVALID (ux << (1 + EXPONENT_WIDTH) ? RET_LOGBNAN : RET_LOGMAX);
+  return ex - 127;
+}
diff --git a/sysdeps/ieee754/flt-32/w_ilogbf.c b/sysdeps/ieee754/flt-32/w_ilogbf.c
new file mode 100644
index 0000000..4e2a707
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_ilogbf.c
@@ -0,0 +1,53 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include <libm-alias-float.h>
+#include <math-type-macros-float.h>
+#include "math_config.h"
+
+#ifdef DEF_AS_LLOGBF
+# define DECL_NAME   __llogb
+# define FUNC_NAME   llogb
+# define RET_TYPE    long int
+# define RET_LOGB0   FP_LLOGB0
+# define RET_LOGBNAN FP_LLOGBNAN
+# define RET_LOGMAX  LONG_MAX
+# define RET_INVALID __math_invalidf_li
+#else
+# define DECL_NAME   __ilogb
+# define FUNC_NAME   ilogb
+# define RET_TYPE    int
+# define RET_LOGB0   FP_ILOGB0
+# define RET_LOGBNAN FP_ILOGBNAN
+# define RET_LOGMAX  INT_MAX
+# define RET_INVALID __math_invalidf_i
+#endif
+#define __IMPL_NAME(x,y) x ## _ ## y
+#define _IMPL_NAME(x,y)  __IMPL_NAME(x,y)
+#define IMPL_NAME        _IMPL_NAME(FUNC_NAME, impl)
+#include <w_ilogbf-impl.h>
+
+RET_TYPE
+M_DECL_FUNC (DECL_NAME) (float x)
+{
+  return IMPL_NAME (x);
+}
+libm_alias_float (DECL_NAME, FUNC_NAME);
diff --git a/sysdeps/ieee754/flt-32/w_llogbf.c b/sysdeps/ieee754/flt-32/w_llogbf.c
new file mode 100644
index 0000000..8676434
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_llogbf.c
@@ -0,0 +1,2 @@
+#define DEF_AS_LLOGBF
+#include "w_ilogbf.c"
diff --git a/sysdeps/ieee754/ldbl-128/Makefile b/sysdeps/ieee754/ldbl-128/Makefile
index 5476a55..e666bdc 100644
--- a/sysdeps/ieee754/ldbl-128/Makefile
+++ b/sysdeps/ieee754/ldbl-128/Makefile
@@ -83,7 +83,7 @@ CFLAGS-w_j1l.c += -fno-builtin-j1f64x -fno-builtin-j1f128
 CFLAGS-w_jnl.c += -fno-builtin-jnf64x -fno-builtin-jnf128
 CFLAGS-s_ldexpl.c += -fno-builtin-ldexpf64x -fno-builtin-ldexpf128
 CFLAGS-w_lgammal.c += -fno-builtin-lgammaf64x -fno-builtin-lgammaf128
-CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r -fno-builtin-lgammaf128_r
 CFLAGS-w_llogbl.c += -fno-builtin-llogbf64x -fno-builtin-llogbf128
 CFLAGS-s_llrintl.c += -fno-builtin-llrintf64x -fno-builtin-llrintf128
 CFLAGS-s_llroundl.c += -fno-builtin-llroundf64x -fno-builtin-llroundf128
diff --git a/sysdeps/ieee754/ldbl-128ibm-compat/Versions b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
index cd39b6a..ae4bd5b 100644
--- a/sysdeps/ieee754/ldbl-128ibm-compat/Versions
+++ b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
@@ -154,8 +154,10 @@ libm {
     __tanpiieee128;
   }
   GLIBC_2.42 {
+    __compoundnieee128;
     __pownieee128;
     __powrieee128;
+    __rootnieee128;
     __rsqrtieee128;
   }
 }
diff --git a/sysdeps/ieee754/ldbl-opt/Makefile b/sysdeps/ieee754/ldbl-opt/Makefile
index beaed61..ef7da1f 100644
--- a/sysdeps/ieee754/ldbl-opt/Makefile
+++ b/sysdeps/ieee754/ldbl-opt/Makefile
@@ -42,6 +42,7 @@ libnldbl-calls = \
   cimag \
   clog \
   clog10 \
+  compoundn \
   conj \
   copysign \
   cos \
@@ -180,6 +181,7 @@ libnldbl-calls = \
   remainder \
   remquo \
   rint \
+  rootn \
   round \
   roundeven \
   rsqrt \
@@ -264,7 +266,7 @@ extra-objs += $(addsuffix .oS, $(libnldbl-routines))
 
 CFLAGS-nldbl-acos.c = -fno-builtin-acosl
 CFLAGS-nldbl-acosh.c = -fno-builtin-acoshl
-CFLAGS-nldbl-acospi.c = -fno-builtin-acospi
+CFLAGS-nldbl-acospi.c = -fno-builtin-acospil
 CFLAGS-nldbl-asin.c = -fno-builtin-asinl
 CFLAGS-nldbl-asinh.c = -fno-builtin-asinhl
 CFLAGS-nldbl-asinpi.c = -fno-builtin-asinpil
@@ -290,11 +292,12 @@ CFLAGS-nldbl-cexp.c = -fno-builtin-cexpl
 CFLAGS-nldbl-cimag.c = -fno-builtin-cimagl
 CFLAGS-nldbl-clog.c = -fno-builtin-clogl
 CFLAGS-nldbl-clog10.c = -fno-builtin-clog10l
+CFLAGS-nldbl-compoundn.c = -fno-builtin-compoundnl
 CFLAGS-nldbl-conj.c = -fno-builtin-conjl
 CFLAGS-nldbl-copysign.c = -fno-builtin-copysignl
 CFLAGS-nldbl-cos.c = -fno-builtin-cosl
 CFLAGS-nldbl-cosh.c = -fno-builtin-coshl
-CFLAGS-nldbl-cospi.c = -fno-builtin-cospi
+CFLAGS-nldbl-cospi.c = -fno-builtin-cospil
 CFLAGS-nldbl-cpow.c = -fno-builtin-cpowl
 CFLAGS-nldbl-cproj.c = -fno-builtin-cprojl
 CFLAGS-nldbl-creal.c = -fno-builtin-creall
@@ -382,6 +385,7 @@ CFLAGS-nldbl-powr.c = -fno-builtin-powrl
 CFLAGS-nldbl-remainder.c = -fno-builtin-remainderl -fno-builtin-dreml
 CFLAGS-nldbl-remquo.c = -fno-builtin-remquol
 CFLAGS-nldbl-rint.c = -fno-builtin-rintl
+CFLAGS-nldbl-rootn.c = -fno-builtin-rootnl
 CFLAGS-nldbl-round.c = -fno-builtin-roundl
 CFLAGS-nldbl-roundeven.c = -fno-builtin-roundevenl
 CFLAGS-nldbl-rsqrt.c = -fno-builtin-rsqrtl
@@ -394,11 +398,11 @@ CFLAGS-nldbl-significand.c = -fno-builtin-significandl
 CFLAGS-nldbl-sin.c = -fno-builtin-sinl
 CFLAGS-nldbl-sincos.c = -fno-builtin-sincosl
 CFLAGS-nldbl-sinh.c = -fno-builtin-sinhl
-CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpi
+CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpil
 CFLAGS-nldbl-sqrt.c = -fno-builtin-sqrtl
 CFLAGS-nldbl-tan.c = -fno-builtin-tanl
 CFLAGS-nldbl-tanh.c = -fno-builtin-tanhl
-CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpi
+CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpil
 CFLAGS-nldbl-tgamma.c = -fno-builtin-tgammal
 CFLAGS-nldbl-totalorder.c = -fno-builtin-totalorderl
 CFLAGS-nldbl-totalordermag.c = -fno-builtin-totalordermagl
diff --git a/sysdeps/ieee754/ldbl-opt/nldbl-compoundn.c b/sysdeps/ieee754/ldbl-opt/nldbl-compoundn.c
new file mode 100644
index 0000000..43da519
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-opt/nldbl-compoundn.c
@@ -0,0 +1,8 @@
+#include "nldbl-compat.h"
+
+double
+attribute_hidden
+compoundnl (double x, long long int y)
+{
+  return compoundn (x, y);
+}
diff --git a/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c
new file mode 100644
index 0000000..fb0d860
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c
@@ -0,0 +1,8 @@
+#include "nldbl-compat.h"
+
+double
+attribute_hidden
+rootnl (double x, long long int y)
+{
+  return rootn (x, y);
+}
diff --git a/sysdeps/loongarch/fpu/e_ilogbf.c b/sysdeps/loongarch/fpu/e_ilogbf.c
index adced63..a27fb94 100644
--- a/sysdeps/loongarch/fpu/e_ilogbf.c
+++ b/sysdeps/loongarch/fpu/e_ilogbf.c
@@ -1,39 +1 @@
-/* __ieee754_ilogbf().  LoongArch version.
-   Copyright (C) 2022-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define NO_MATH_REDIRECT
-#include <math.h>
-#include <fpu_control.h>
-
-int
-__ieee754_ilogbf (float x)
-{
-  int x_cond;
-  asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x));
-
-  if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
-      return FP_ILOGB0;
-  else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
-      return FP_ILOGBNAN;
-  else
-    {
-      asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x));
-      asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x));
-      return x;
-    }
-}
+/* ilogbf is implemented at w_ilogbf.c  */
diff --git a/sysdeps/loongarch/fpu/e_ilogb.c b/sysdeps/loongarch/fpu/w_ilogb-impl.h
index f21fa5c..1905373 100644
--- a/sysdeps/loongarch/fpu/e_ilogb.c
+++ b/sysdeps/loongarch/fpu/w_ilogb-impl.h
@@ -1,4 +1,4 @@
-/* __ieee754_ilogb().  LoongArch version.
+/* Get integer exponent of a floating-point value.  LoongArch version.
    Copyright (C) 2022-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,20 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <math.h>
 #include <fpu_control.h>
 
-int
-__ieee754_ilogb (double x)
+static inline RET_TYPE
+IMPL_NAME (double x)
 {
   int x_cond;
   asm volatile ("fclass.d \t%0, %1" : "=f" (x_cond) : "f" (x));
 
   if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
-      return FP_ILOGB0;
+    return RET_INVALID (RET_LOGB0);
   else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
-      return FP_ILOGBNAN;
+    return RET_INVALID (RET_LOGBNAN);
   else
     {
       asm volatile ("fabs.d \t%0, %1" : "=f" (x) : "f" (x));
diff --git a/sysdeps/loongarch/fpu/w_ilogbf-impl.h b/sysdeps/loongarch/fpu/w_ilogbf-impl.h
new file mode 100644
index 0000000..9cb4172
--- /dev/null
+++ b/sysdeps/loongarch/fpu/w_ilogbf-impl.h
@@ -0,0 +1,37 @@
+/* Get integer exponent of a floating-point value.  LoongArch version.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fpu_control.h>
+
+static inline RET_TYPE
+IMPL_NAME (float x)
+{
+  int x_cond;
+  asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x));
+
+  if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
+    return RET_INVALID (RET_LOGB0);
+  else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
+    return RET_INVALID (RET_LOGBNAN);
+  else
+    {
+      asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x));
+      asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x));
+      return x;
+    }
+}
diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure
index 0d1e9ed..6726ab8 100644
--- a/sysdeps/loongarch/preconfigure
+++ b/sysdeps/loongarch/preconfigure
@@ -44,6 +44,7 @@ loongarch*)
 
     base_machine=loongarch
     mtls_descriptor=desc
+    mtls_traditional=trad
     ;;
 esac
 
diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac
index df07dbf..5640226 100644
--- a/sysdeps/loongarch/preconfigure.ac
+++ b/sysdeps/loongarch/preconfigure.ac
@@ -42,6 +42,7 @@ loongarch*)
 
     base_machine=loongarch
     mtls_descriptor=desc
+    mtls_traditional=trad
     ;;
 esac
 
diff --git a/sysdeps/m68k/m680x0/fpu/math_err.c b/sysdeps/m68k/m680x0/fpu/math_err.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/m68k/m680x0/fpu/math_err.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/m68k/m680x0/w_ilogb.c b/sysdeps/m68k/m680x0/w_ilogb.c
new file mode 100644
index 0000000..9c26217
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_ilogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_ilogbf.c b/sysdeps/m68k/m680x0/w_ilogbf.c
new file mode 100644
index 0000000..047ad4b
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_ilogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_llogb.c b/sysdeps/m68k/m680x0/w_llogb.c
new file mode 100644
index 0000000..5e8891a
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_llogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_llogbf.c b/sysdeps/m68k/m680x0/w_llogbf.c
new file mode 100644
index 0000000..edb7e9a
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_llogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
index 994de00..c905949 100644
--- a/sysdeps/mach/hurd/Makefile
+++ b/sysdeps/mach/hurd/Makefile
@@ -311,9 +311,6 @@ endif
 ifeq ($(subdir),htl)
 tests-unsupported += tst-basic7
 endif
-ifeq ($(subdir),io)
-tests-unsupported += test-lfs
-endif
 ifeq ($(subdir),libio)
 tests-unsupported += tst-asprintf-null
 endif
diff --git a/sysdeps/mach/hurd/bits/ioctls.h b/sysdeps/mach/hurd/bits/ioctls.h
index f01316d..faf1373 100644
--- a/sysdeps/mach/hurd/bits/ioctls.h
+++ b/sysdeps/mach/hurd/bits/ioctls.h
@@ -324,15 +324,8 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 };
    From 4.4 <sys/ioctl_compat.h>.  */
 
 #ifdef __USE_MISC
-#ifdef USE_OLD_TTY
-# undef  TIOCGETD
-# define TIOCGETD	_IOR('t', 0, int)	/* get line discipline */
-# undef  TIOCSETD
-# define TIOCSETD	_IOW('t', 1, int)	/* set line discipline */
-#else
-# define OTIOCGETD	_IOR('t', 0, int)	/* get line discipline */
-# define OTIOCSETD	_IOW('t', 1, int)	/* set line discipline */
-#endif
+#define OTIOCGETD	_IOR('t', 0, int)	/* get line discipline */
+#define OTIOCSETD	_IOW('t', 1, int)	/* set line discipline */
 #define	TIOCHPCL	_IO('t', 2)		/* hang up on last close */
 #define	TIOCGETP	_IOR('t', 8,struct sgttyb)/* get parameters -- gtty */
 #define	TIOCSETP	_IOW('t', 9,struct sgttyb)/* set parameters -- stty */
@@ -411,26 +404,6 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 };
 #define	OTTYDISC	0
 #define	NETLDISC	1
 #define	NTTYDISC	2
-
-/* From 4.4 <sys/ttydev.h>.   */
-#ifdef USE_OLD_TTY
-# define B0	0
-# define B50	1
-# define B75	2
-# define B110	3
-# define B134	4
-# define B150	5
-# define B200	6
-# define B300	7
-# define B600	8
-# define B1200	9
-# define B1800	10
-# define B2400	11
-# define B4800	12
-# define B9600	13
-# define EXTA	14
-# define EXTB	15
-#endif /* USE_OLD_TTY */
 #endif
 
 #endif /* bits/ioctls.h */
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/sysdeps/mach/hurd/getrandom-internal.h
index e9c9022..8bd718b 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c
+++ b/sysdeps/mach/hurd/getrandom-internal.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
+/* Internal definitions for Hurd getrandom implementation.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,16 +16,15 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <string.h>
+#ifndef _GETRANDOM_INTERNAL_H
+#define _GETRANDOM_INTERNAL_H
 
-#define __strncasecmp_l __strncasecmp_l_power7
+extern void __mach_init (void);
 
-#undef weak_alias
-#define weak_alias(a,b)
+static inline void __getrandom_early_init (_Bool initial)
+{
+  /* getrandom needs RPCs for time etc.  */
+  __mach_init ();
+}
 
-#undef libc_hidden_def
-#define libc_hidden_def(name)
-
-extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden;
-
-#include <string/strncase_l.c>
+#endif
diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist
index 3e183f5..a0e686a 100644
--- a/sysdeps/mach/hurd/i386/libc.abilist
+++ b/sysdeps/mach/hurd/i386/libc.abilist
@@ -28,6 +28,7 @@ GLIBC_2.11 mkostemps F
 GLIBC_2.11 mkostemps64 F
 GLIBC_2.11 mkstemps F
 GLIBC_2.11 mkstemps64 F
+GLIBC_2.12 __pthread_key_create F
 GLIBC_2.12 __pthread_self F
 GLIBC_2.12 pthread_attr_destroy F
 GLIBC_2.12 pthread_attr_getdetachstate F
@@ -70,6 +71,9 @@ GLIBC_2.12 pthread_condattr_setclock F
 GLIBC_2.12 pthread_condattr_setpshared F
 GLIBC_2.12 pthread_equal F
 GLIBC_2.12 pthread_getschedparam F
+GLIBC_2.12 pthread_getspecific F
+GLIBC_2.12 pthread_key_create F
+GLIBC_2.12 pthread_key_delete F
 GLIBC_2.12 pthread_mutex_destroy F
 GLIBC_2.12 pthread_mutex_getprioceiling F
 GLIBC_2.12 pthread_mutex_init F
@@ -106,6 +110,7 @@ GLIBC_2.12 pthread_self F
 GLIBC_2.12 pthread_setcancelstate F
 GLIBC_2.12 pthread_setcanceltype F
 GLIBC_2.12 pthread_setschedparam F
+GLIBC_2.12 pthread_setspecific F
 GLIBC_2.12 pthread_sigmask F
 GLIBC_2.13 __fentry__ F
 GLIBC_2.14 syncfs F
@@ -2586,6 +2591,11 @@ GLIBC_2.41 pthread_mutexattr_settype F
 GLIBC_2.41 pthread_sigmask F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetobaud F
 GLIBC_2.42 pthread_barrier_destroy F
 GLIBC_2.42 pthread_barrier_init F
 GLIBC_2.42 pthread_barrier_wait F
@@ -2593,6 +2603,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F
 GLIBC_2.42 pthread_barrierattr_getpshared F
 GLIBC_2.42 pthread_barrierattr_init F
 GLIBC_2.42 pthread_barrierattr_setpshared F
+GLIBC_2.42 pthread_getspecific F
+GLIBC_2.42 pthread_key_create F
+GLIBC_2.42 pthread_key_delete F
 GLIBC_2.42 pthread_mutex_consistent F
 GLIBC_2.42 pthread_mutex_consistent_np F
 GLIBC_2.42 pthread_mutex_getprioceiling F
@@ -2614,6 +2627,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F
 GLIBC_2.42 pthread_rwlockattr_getpshared F
 GLIBC_2.42 pthread_rwlockattr_init F
 GLIBC_2.42 pthread_rwlockattr_setpshared F
+GLIBC_2.42 pthread_setspecific F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
index 8d76dd8..47d215f 100644
--- a/sysdeps/mach/hurd/i386/libm.abilist
+++ b/sysdeps/mach/hurd/i386/libm.abilist
@@ -1277,6 +1277,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1293,6 +1301,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/mach/hurd/i386/libpthread.abilist b/sysdeps/mach/hurd/i386/libpthread.abilist
index b067d37..51535ac 100644
--- a/sysdeps/mach/hurd/i386/libpthread.abilist
+++ b/sysdeps/mach/hurd/i386/libpthread.abilist
@@ -1,7 +1,6 @@
 GLIBC_2.12 __mutex_lock_solid F
 GLIBC_2.12 __mutex_unlock_solid F
 GLIBC_2.12 __pthread_get_cleanup_stack F
-GLIBC_2.12 __pthread_key_create F
 GLIBC_2.12 __pthread_kill F
 GLIBC_2.12 __pthread_mutex_transfer_np F
 GLIBC_2.12 __pthread_spin_destroy F
@@ -29,15 +28,11 @@ GLIBC_2.12 pthread_exit F
 GLIBC_2.12 pthread_getattr_np F
 GLIBC_2.12 pthread_getconcurrency F
 GLIBC_2.12 pthread_getcpuclockid F
-GLIBC_2.12 pthread_getspecific F
 GLIBC_2.12 pthread_join F
-GLIBC_2.12 pthread_key_create F
-GLIBC_2.12 pthread_key_delete F
 GLIBC_2.12 pthread_kill F
 GLIBC_2.12 pthread_mutex_transfer_np F
 GLIBC_2.12 pthread_setconcurrency F
 GLIBC_2.12 pthread_setschedprio F
-GLIBC_2.12 pthread_setspecific F
 GLIBC_2.12 pthread_spin_destroy F
 GLIBC_2.12 pthread_spin_init F
 GLIBC_2.12 pthread_spin_lock F
diff --git a/sysdeps/mach/hurd/renameat2.c b/sysdeps/mach/hurd/renameat2.c
index 59a4e31..5b09fed 100644
--- a/sysdeps/mach/hurd/renameat2.c
+++ b/sysdeps/mach/hurd/renameat2.c
@@ -37,15 +37,28 @@ __renameat2 (int oldfd, const char *old, int newfd, const char *new,
   if (flags & RENAME_NOREPLACE)
     excl = 1;
 
-  olddir = __directory_name_split_at (oldfd, old, (char **) &oldname);
+  olddir = __file_name_split_at (oldfd, old, (char **) &oldname);
   if (olddir == MACH_PORT_NULL)
     return -1;
-  newdir = __directory_name_split_at (newfd, new, (char **) &newname);
+  if (!*oldname)
+    {
+      /* Trailing slash.  */
+      __mach_port_deallocate (__mach_task_self (), olddir);
+      return __hurd_fail (ENOTDIR);
+    }
+  newdir = __file_name_split_at (newfd, new, (char **) &newname);
   if (newdir == MACH_PORT_NULL)
     {
-       __mach_port_deallocate (__mach_task_self (), olddir);
+      __mach_port_deallocate (__mach_task_self (), olddir);
       return -1;
     }
+  if (!*newname)
+    {
+      /* Trailing slash.  */
+      __mach_port_deallocate (__mach_task_self (), olddir);
+      __mach_port_deallocate (__mach_task_self (), newdir);
+      return __hurd_fail (ENOTDIR);
+    }
 
   err = __dir_rename (olddir, oldname, newdir, newname, excl);
   __mach_port_deallocate (__mach_task_self (), olddir);
diff --git a/sysdeps/mach/hurd/x86_64/libc.abilist b/sysdeps/mach/hurd/x86_64/libc.abilist
index 688ee26..8f9d6aa 100644
--- a/sysdeps/mach/hurd/x86_64/libc.abilist
+++ b/sysdeps/mach/hurd/x86_64/libc.abilist
@@ -392,6 +392,7 @@ GLIBC_2.38 __profile_frequency F
 GLIBC_2.38 __progname D 0x8
 GLIBC_2.38 __progname_full D 0x8
 GLIBC_2.38 __pthread_get_cleanup_stack F
+GLIBC_2.38 __pthread_key_create F
 GLIBC_2.38 __pthread_self F
 GLIBC_2.38 __ptsname_r_chk F
 GLIBC_2.38 __pwrite64 F
@@ -1554,6 +1555,9 @@ GLIBC_2.38 pthread_condattr_setpshared F
 GLIBC_2.38 pthread_equal F
 GLIBC_2.38 pthread_exit F
 GLIBC_2.38 pthread_getschedparam F
+GLIBC_2.38 pthread_getspecific F
+GLIBC_2.38 pthread_key_create F
+GLIBC_2.38 pthread_key_delete F
 GLIBC_2.38 pthread_mutex_clocklock F
 GLIBC_2.38 pthread_mutex_consistent F
 GLIBC_2.38 pthread_mutex_consistent_np F
@@ -1599,6 +1603,7 @@ GLIBC_2.38 pthread_self F
 GLIBC_2.38 pthread_setcancelstate F
 GLIBC_2.38 pthread_setcanceltype F
 GLIBC_2.38 pthread_setschedparam F
+GLIBC_2.38 pthread_setspecific F
 GLIBC_2.38 pthread_sigmask F
 GLIBC_2.38 ptrace F
 GLIBC_2.38 ptsname F
@@ -2269,6 +2274,11 @@ GLIBC_2.41 pthread_mutexattr_settype F
 GLIBC_2.41 pthread_sigmask F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetobaud F
 GLIBC_2.42 pthread_barrier_destroy F
 GLIBC_2.42 pthread_barrier_init F
 GLIBC_2.42 pthread_barrier_wait F
@@ -2276,6 +2286,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F
 GLIBC_2.42 pthread_barrierattr_getpshared F
 GLIBC_2.42 pthread_barrierattr_init F
 GLIBC_2.42 pthread_barrierattr_setpshared F
+GLIBC_2.42 pthread_getspecific F
+GLIBC_2.42 pthread_key_create F
+GLIBC_2.42 pthread_key_delete F
 GLIBC_2.42 pthread_mutex_consistent F
 GLIBC_2.42 pthread_mutex_consistent_np F
 GLIBC_2.42 pthread_mutex_getprioceiling F
@@ -2297,6 +2310,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F
 GLIBC_2.42 pthread_rwlockattr_getpshared F
 GLIBC_2.42 pthread_rwlockattr_init F
 GLIBC_2.42 pthread_rwlockattr_setpshared F
+GLIBC_2.42 pthread_setspecific F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/mach/hurd/x86_64/libm.abilist b/sysdeps/mach/hurd/x86_64/libm.abilist
index 12ae364..52c9d56 100644
--- a/sysdeps/mach/hurd/x86_64/libm.abilist
+++ b/sysdeps/mach/hurd/x86_64/libm.abilist
@@ -1134,6 +1134,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1150,6 +1158,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/mach/hurd/x86_64/libpthread.abilist b/sysdeps/mach/hurd/x86_64/libpthread.abilist
index 6b8acec..a1ff462 100644
--- a/sysdeps/mach/hurd/x86_64/libpthread.abilist
+++ b/sysdeps/mach/hurd/x86_64/libpthread.abilist
@@ -6,7 +6,6 @@ GLIBC_2.38 __h_errno_location F
 GLIBC_2.38 __mutex_lock_solid F
 GLIBC_2.38 __mutex_unlock_solid F
 GLIBC_2.38 __pthread_get_cleanup_stack F
-GLIBC_2.38 __pthread_key_create F
 GLIBC_2.38 __pthread_kill F
 GLIBC_2.38 __pthread_mutex_transfer_np F
 GLIBC_2.38 __pthread_spin_destroy F
@@ -47,17 +46,13 @@ GLIBC_2.38 pthread_exit F
 GLIBC_2.38 pthread_getattr_np F
 GLIBC_2.38 pthread_getconcurrency F
 GLIBC_2.38 pthread_getcpuclockid F
-GLIBC_2.38 pthread_getspecific F
 GLIBC_2.38 pthread_hurd_cond_timedwait_np F
 GLIBC_2.38 pthread_hurd_cond_wait_np F
 GLIBC_2.38 pthread_join F
-GLIBC_2.38 pthread_key_create F
-GLIBC_2.38 pthread_key_delete F
 GLIBC_2.38 pthread_kill F
 GLIBC_2.38 pthread_mutex_transfer_np F
 GLIBC_2.38 pthread_setconcurrency F
 GLIBC_2.38 pthread_setschedprio F
-GLIBC_2.38 pthread_setspecific F
 GLIBC_2.38 pthread_spin_destroy F
 GLIBC_2.38 pthread_spin_init F
 GLIBC_2.38 pthread_spin_lock F
diff --git a/sysdeps/mach/sysdep.h b/sysdeps/mach/sysdep.h
index 8293c66..581bdcd 100644
--- a/sysdeps/mach/sysdep.h
+++ b/sysdeps/mach/sysdep.h
@@ -20,6 +20,11 @@
 /* Get the Mach definitions of ENTRY and kernel_trap.  */
 #include <mach/machine/syscall_sw.h>
 
+/* This macro is defined in Mach system headers, but string functions use it
+   with different definitions depending on whether being compiled for
+   wide-characters or not.  */
+#undef P2ALIGN
+
 /* The Mach definitions assume underscores should be prepended to
    symbol names.  Redefine them to do so only when appropriate.  */
 #undef EXT
diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c
index d90cc6c..6f75197 100644
--- a/sysdeps/posix/libc_fatal.c
+++ b/sysdeps/posix/libc_fatal.c
@@ -16,23 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
-#include <errno.h>
-#include <fcntl.h>
+#include <assert.h>
 #include <ldsodefs.h>
-#include <libc-pointer-arith.h>
-#include <paths.h>
+#include <setvmaname.h>
 #include <stdarg.h>
-#include <stdbool.h>
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sysdep.h>
-#include <unistd.h>
-#include <sys/mman.h>
 #include <sys/uio.h>
-#include <not-cancel.h>
-#include <setvmaname.h>
+#include <unistd.h>
 
 #ifdef FATAL_PREPARE_INCLUDE
 #include FATAL_PREPARE_INCLUDE
@@ -47,6 +37,10 @@ writev_for_fatal (int fd, const struct iovec *iov, size_t niov, size_t total)
 }
 #endif
 
+/* At most a substring before each conversion specification and the
+   trailing substring (the plus one).  */
+#define IOVEC_MAX (LIBC_MESSAGE_MAX_ARGS * 2 + 1)
+
 /* Abort with an error message.  */
 void
 __libc_message_impl (const char *fmt, ...)
@@ -61,7 +55,7 @@ __libc_message_impl (const char *fmt, ...)
   if (fd == -1)
     fd = STDERR_FILENO;
 
-  struct iovec iov[LIBC_MESSAGE_MAX_ARGS * 2 - 1];
+  struct iovec iov[IOVEC_MAX];
   int iovcnt = 0;
   ssize_t total = 0;
 
@@ -99,6 +93,16 @@ __libc_message_impl (const char *fmt, ...)
       iov[iovcnt].iov_len = len;
       total += len;
       iovcnt++;
+
+      if (__glibc_unlikely (iovcnt > IOVEC_MAX))
+	{
+	  len = IOVEC_MAX_ERR_MSG_LEN;
+	  iov[0].iov_base = (char *) IOVEC_MAX_ERR_MSG;
+	  iov[0].iov_len = len;
+	  total = len;
+	  iovcnt = 1;
+	  break;
+	}
     }
   va_end (ap);
 
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
index 5e6cb07..5cdb64f 100644
--- a/sysdeps/powerpc/Makefile
+++ b/sysdeps/powerpc/Makefile
@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c
 $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
 
 $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
+
+# The test checks if the __tls_get_addr does not clobber caller-saved
+# register, so disable the powerpc specific optimization to force a
+# __tls_get_addr call.
+LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize
 endif
 
 ifneq (no,$(multi-arch))
diff --git a/sysdeps/powerpc/fpu/math-use-builtins-trunc.h b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h
new file mode 100644
index 0000000..3e6a55d
--- /dev/null
+++ b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h
@@ -0,0 +1,9 @@
+#ifdef _ARCH_PWR5X
+# define USE_TRUNCF_BUILTIN 1
+# define USE_TRUNC_BUILTIN 1
+#else
+# define USE_TRUNCF_BUILTIN 0
+# define USE_TRUNC_BUILTIN 0
+#endif
+#define USE_TRUNCL_BUILTIN 0
+#define USE_TRUNCF128_BUILTIN 0
diff --git a/sysdeps/powerpc/fpu/s_modf.c b/sysdeps/powerpc/fpu/s_modf.c
deleted file mode 100644
index 831072b..0000000
--- a/sysdeps/powerpc/fpu/s_modf.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, see <https://www.gnu.org/licenses/>.  */
-
-/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
-   generic implementation faster.  Also disables for old ISAs that do not
-   have ceil/floor instructions.  */
-#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X)
-# include <sysdeps/ieee754/ldbl-opt/s_modf.c>
-#else
-# include <math.h>
-# include <math_ldbl_opt.h>
-# include <libm-alias-double.h>
-
-double
-__modf (double x, double *iptr)
-{
-  if (__builtin_isinf (x))
-    {
-      *iptr = x;
-      return copysign (0.0, x);
-    }
-  else if (__builtin_isnan (x))
-    {
-      *iptr = NAN;
-      return NAN;
-    }
-
-  if (x >= 0.0)
-    {
-      *iptr = floor (x);
-      return copysign (x - *iptr, x);
-    }
-  else
-    {
-      *iptr = ceil (x);
-      return copysign (x - *iptr, x);
-    }
-}
-# ifndef __modf
-libm_alias_double (__modf, modf)
-#  if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0)
-compat_symbol (libc, __modf, modfl, GLIBC_2_0);
-#  endif
-# endif
-#endif
diff --git a/sysdeps/powerpc/fpu/s_modff.c b/sysdeps/powerpc/fpu/s_modff.c
deleted file mode 100644
index 79eeb7b..0000000
--- a/sysdeps/powerpc/fpu/s_modff.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, see <https://www.gnu.org/licenses/>.  */
-
-/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
-   generic implementation faster.  Also disables for old ISAs that do not
-   have ceil/floor instructions.  */
-#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X)
-# include <sysdeps/ieee754/flt-32/s_modff.c>
-#else
-# include <math.h>
-# include <libm-alias-float.h>
-
-float
-__modff (float x, float *iptr)
-{
-  if (__builtin_isinff (x))
-    {
-      *iptr = x;
-      return copysignf (0.0, x);
-    }
-  else if (__builtin_isnanf (x))
-    {
-      *iptr = NAN;
-      return NAN;
-    }
-
-  if (x >= 0.0)
-    {
-      *iptr = floorf (x);
-      return copysignf (x - *iptr, x);
-    }
-  else
-    {
-      *iptr = ceilf (x);
-      return copysignf (x - *iptr, x);
-    }
-}
-# ifndef __modff
-libm_alias_float (__modf, modf)
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
index b8315c5..48f3a19 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modf __modf_power5plus
-#include <sysdeps/powerpc/fpu/s_modf.c>
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
index 69591da..15bfa0b 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modff __modff_power5plus
-#include <sysdeps/powerpc/fpu/s_modff.c>
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile
index 0a4e828..3a49b85 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile
@@ -6,12 +6,9 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \
 		   rawmemchr-ppc32 strlen-power7 strlen-ppc32 strnlen-power7 \
 		   strnlen-ppc32 \
-		   strcasecmp-power7 strcasecmp_l-power7 strncase-power7 \
-		   strncase_l-power7 strchrnul-power7 strchrnul-ppc32 \
+		   strcasecmp-power7 strcasecmp_l-power7 \
+		   strchrnul-power7 strchrnul-ppc32 \
 		   strchr-power7 strchr-ppc32 \
 		   wordcopy-power7 wordcopy-ppc32 \
 		   memmove-power7 memmove-ppc
-
-CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
-CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
 endif
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c
index 68a3f9d..7537f3a 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c
@@ -138,21 +138,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
 			      __strcasecmp_l_ppc))
 
-  /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c.  */
-  IFUNC_IMPL (i, name, strncasecmp,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      hwcap & PPC_FEATURE_HAS_VSX,
-			      __strncasecmp_power7)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc))
-
-  /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c.  */
-  IFUNC_IMPL (i, name, strncasecmp_l,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      hwcap & PPC_FEATURE_HAS_VSX,
-			      __strncasecmp_l_power7)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
-			      __strncasecmp_l_ppc))
-
   /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c.  */
   IFUNC_IMPL (i, name, strchrnul,
 	      IFUNC_IMPL_ADD (array, i, strchrnul,
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c
deleted file mode 100644
index 3cd6433..0000000
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Multiple versions of strncasecmp.
-   Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <string.h>
-# define strncasecmp __strncasecmp_ppc
-extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
-#endif
-
-#include <string/strncase.c>
-#undef strncasecmp
-
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-extern __typeof (__strncasecmp) __libc_strncasecmp;
-libc_ifunc (__libc_strncasecmp,
-	     (hwcap & PPC_FEATURE_HAS_VSX)
-             ? __strncasecmp_power7
-             : __strncasecmp_ppc);
-weak_alias (__libc_strncasecmp, strncasecmp)
-#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c b/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c
deleted file mode 100644
index 388d482..0000000
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Multiple versions of strncasecmp_l.
-   Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <string.h>
-# define strncasecmp_l __strncasecmp_l_ppc
-extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden;
-extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden;
-#endif
-
-#include <string/strncase_l.c>
-#undef strncasecmp_l
-
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-extern __typeof (__strncasecmp_l) __libc_strncasecmp_l;
-libc_ifunc (__libc_strncasecmp_l,
-	     (hwcap & PPC_FEATURE_HAS_VSX)
-             ? __strncasecmp_l_power7
-             : __strncasecmp_l_ppc);
-
-weak_alias (__libc_strncasecmp_l, strncasecmp_l)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
index b8315c5..48f3a19 100644
--- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
+++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modf __modf_power5plus
-#include <sysdeps/powerpc/fpu/s_modf.c>
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
index 69591da..15bfa0b 100644
--- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
+++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modff __modff_power5plus
-#include <sysdeps/powerpc/fpu/s_modff.c>
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/powerpc/powerpc64/le/configure b/sysdeps/powerpc/powerpc64/le/configure
index 7092f61..ef17f24 100644
--- a/sysdeps/powerpc/powerpc64/le/configure
+++ b/sysdeps/powerpc/powerpc64/le/configure
@@ -137,75 +137,5 @@ then :
   critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."
 fi
 
-for ac_prog in $OBJCOPY
-do
-  # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-printf %s "checking for $ac_word... " >&6; }
-if test ${ac_cv_prog_OBJCOPY+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e) if test -n "$OBJCOPY"; then
-  ac_cv_prog_OBJCOPY="$OBJCOPY" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  case $as_dir in #(((
-    '') as_dir=./ ;;
-    */) ;;
-    *) as_dir=$as_dir/ ;;
-  esac
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
-    ac_cv_prog_OBJCOPY="$ac_prog"
-    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-fi ;;
-esac
-fi
-OBJCOPY=$ac_cv_prog_OBJCOPY
-if test -n "$OBJCOPY"; then
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJCOPY" >&5
-printf "%s\n" "$OBJCOPY" >&6; }
-else
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
-printf "%s\n" "no" >&6; }
-fi
-
-
-  test -n "$OBJCOPY" && break
-done
-
-if test -z "$OBJCOPY"; then
-  ac_verc_fail=yes
-else
-  # Found it, now check the version.
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking version of $OBJCOPY" >&5
-printf %s "checking version of $OBJCOPY... " >&6; }
-  ac_prog_version=`$OBJCOPY --version 2>&1 | sed -n 's/^.*GNU objcopy.* \([0-9]*\.[0-9.]*\).*$/\1/p'`
-  case $ac_prog_version in
-    '') ac_prog_version="v. ?.??, bad"; ac_verc_fail=yes;;
-    2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*)
-       ac_prog_version="$ac_prog_version, ok"; ac_verc_fail=no;;
-    *) ac_prog_version="$ac_prog_version, bad"; ac_verc_fail=yes;;
-
-  esac
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_prog_version" >&5
-printf "%s\n" "$ac_prog_version" >&6; }
-fi
-if test $ac_verc_fail = yes; then
-  AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le"
-fi
-
-
 test -n "$critic_missing" && as_fn_error $? "*** $critic_missing" "$LINENO" 5
 
diff --git a/sysdeps/powerpc/powerpc64/le/configure.ac b/sysdeps/powerpc/powerpc64/le/configure.ac
index 48d7089..79b3d43 100644
--- a/sysdeps/powerpc/powerpc64/le/configure.ac
+++ b/sysdeps/powerpc/powerpc64/le/configure.ac
@@ -66,11 +66,4 @@ CFLAGS="$save_CFLAGS"])
 AS_IF([test "$libc_cv_compiler_powerpc64le_ldbl128_mabi" = "no"],
       [critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."])
 
-dnl objcopy (binutils) 2.26 or newer required to support the --update-section
-dnl feature for fixing up .gnu.attribute section with IEEE ldbl.
-AC_CHECK_PROG_VER(OBJCOPY, $OBJCOPY, --version,
-		  [GNU objcopy.* \([0-9]*\.[0-9.]*\)],
-		  [2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*],
-		  AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le")
-
 test -n "$critic_missing" && AC_MSG_ERROR([*** $critic_missing])
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c
new file mode 100644
index 0000000..89e7498
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c
@@ -0,0 +1,41 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include "math_config.h"
+
+int
+__ieee754_ilogb (double x)
+{
+  uint64_t ux = asuint64 (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (ex == 0) /* zero or subnormal */
+    {
+      /* Clear sign and exponent */
+      ux <<= 12;
+      if (ux == 0)
+	return FP_ILOGB0;
+      /* subnormal  */
+      return -1023 - stdc_leading_zeros (ux);
+    }
+  if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */
+    return ux << 12 ? FP_ILOGBNAN : INT_MAX;
+  return ex - 1023;
+}
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c
new file mode 100644
index 0000000..1c2a8a5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c
@@ -0,0 +1,41 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include "sysdeps/ieee754/flt-32/math_config.h"
+
+int
+__ieee754_ilogbf (float x)
+{
+  uint32_t ux = asuint (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (ex == 0) /* zero or subnormal */
+    {
+      /* Clear sign and exponent.  */
+      ux <<= 1 + EXPONENT_WIDTH;
+      if (ux == 0)
+	return FP_ILOGB0;
+      /* sbunormal */
+      return -127 - stdc_leading_zeros (ux);
+    }
+  if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */
+    return ux << (1 + EXPONENT_WIDTH) ? FP_ILOGBNAN : INT_MAX;
+  return ex - 127;
+}
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
index 6d74f09..1a0e496 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/Makefile
@@ -91,8 +91,8 @@ gen-libm-f128-ifunc-routines = \
 	e_expf128 e_fmodf128 e_hypotf128 e_j0f128 e_j1f128 e_jnf128 \
 	e_lgammaf128_r e_logf128 e_log10f128 e_powf128 e_remainderf128 \
 	e_sinhf128 e_sqrtf128 e_gammaf128_r e_ilogbf128 k_tanf128 s_asinhf128 \
-	s_atanf128 s_cbrtf128 s_ceilf128 s_cosf128 s_erff128 s_exp10m1f128 \
-	s_exp2m1f128 s_expm1f128 \
+	s_atanf128 s_cbrtf128 s_ceilf128 s_compoundnf128 s_cosf128 s_erff128 \
+	s_exp10m1f128 s_exp2m1f128 s_expm1f128 \
 	s_fabsf128 s_floorf128 s_log1pf128 s_logbf128 \
 	s_rintf128 s_scalblnf128 s_sinf128 s_tanf128 \
 	s_tanhf128 s_truncf128 s_remquof128 e_log2f128 \
@@ -232,6 +232,7 @@ CFLAGS-s_cexpf128-ifunc.c += -fno-builtin-cexpf64x
 CFLAGS-s_cimagf128-ifunc.c += -fno-builtin-cimagf64x
 CFLAGS-s_clogf128-ifunc.c += -fno-builtin-clogf64x
 CFLAGS-s_clog10f128-ifunc.c += -fno-builtin-clog10f64x
+CFLAGS-s_compoundnf128-ifunc.c += -fno-builtin-compoundnf64x
 CFLAGS-s_conjf128-ifunc.c += -fno-builtin-conjf64x
 CFLAGS-s_copysignf128-ifunc.c += -fno-builtin-copysignf64x
 CFLAGS-s_cosf128-ifunc.c += -fno-builtin-cosf64x
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h
index 18d09bc..b0cc6fc 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128-ifunc-redirects.h
@@ -28,6 +28,7 @@ F128_REDIR_PFX_R (floorf128, __,);
 F128_REDIR_PFX_R (truncf128, __,);
 F128_REDIR_PFX_R (roundf128, __,);
 F128_REDIR_PFX_R (fabsf128, __,);
+F128_REDIR_PFX_R (fmaf128, __,);
 
 extern __typeof (ldexpf128) F128_SFX_APPEND (__ldexpf128);
 
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h
index d8d743c..1e3d8dc 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/float128_private.h
@@ -55,6 +55,7 @@ F128_REDIR (__asinhf128)
 F128_REDIR (__atanf128)
 F128_REDIR (__cbrtf128)
 F128_REDIR (__ceilf128)
+F128_REDIR (__compoundnf128)
 F128_REDIR (__cosf128)
 F128_REDIR (__erfcf128)
 F128_REDIR (__erff128)
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h
index f125b88..56d1bb7 100644
--- a/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h
+++ b/sysdeps/powerpc/powerpc64/le/fpu/multiarch/math-type-macros-float128.h
@@ -78,6 +78,7 @@ F128_REDIR (__ldexpf128);
 F128_REDIR (__cargf128);
 F128_REDIR (__cimagf128);
 F128_REDIR (__crealf128);
+F128_REDIR (__compoundnf128);
 F128_REDIR (__conjf128);
 F128_REDIR (__cprojf128);
 F128_REDIR (__cabsf128);
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c
new file mode 100644
index 0000000..9c26217
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c
new file mode 100644
index 0000000..047ad4b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c
new file mode 100644
index 0000000..5e8891a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c
new file mode 100644
index 0000000..edb7e9a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S
deleted file mode 100644
index 96ad5a2..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S
+++ /dev/null
@@ -1,315 +0,0 @@
-/* Optimized memchr implementation for POWER10 LE.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-# ifndef MEMCHR
-#  define MEMCHR __memchr
-# endif
-# define M_VREG_ZERO v20
-# define M_OFF_START_LOOP 256
-# define MEMCHR_SUBTRACT_VECTORS \
-	vsububm   v4,v4,v18;	    \
-	vsububm   v5,v5,v18;	    \
-	vsububm   v6,v6,v18;	    \
-	vsububm   v7,v7,v18;
-# define M_TAIL(vreg,increment)	   \
-	vctzlsbb  r4,vreg;	   \
-	cmpld     r5,r4;	   \
-	ble       L(null);	   \
-	addi	  r4,r4,increment; \
-	add	  r3,r6,r4;	   \
-	blr
-
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define M_VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))	 \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define M_LXVP(xtp,dq,ra)		   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
-#define CHECK16B(vreg,offset,addr,label) \
-	lxv	  vreg+32,offset(addr);	\
-	vcmpequb. vreg,vreg,v18;	\
-	bne	  cr6,L(label);		\
-	cmpldi	  r5,16;		\
-	ble	  L(null);		\
-	addi	  r5,r5,-16;
-
-/* Load 4 quadwords, merge into one VR for speed and check for NULLs.  r6 has #
-   of bytes already checked.  */
-#define CHECK64B(offset,addr,label)	    \
-	M_LXVP(v4+32,offset,addr);	    \
-	M_LXVP(v6+32,offset+32,addr);	    \
-	MEMCHR_SUBTRACT_VECTORS;	    \
-	vminub	  v14,v4,v5;		    \
-	vminub	  v15,v6,v7;		    \
-	vminub	  v16,v14,v15;		    \
-	vcmpequb. v0,v16,M_VREG_ZERO;	    \
-	beq	  cr6,$+12;		    \
-	li	  r7,offset;		    \
-	b     	  L(label);          	    \
-	cmpldi	  r5,64;		    \
-	ble	  L(null);		    \
-	addi	  r5,r5,-64
-
-/* Implements the function
-   void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]).  */
-
-	.machine power9
-
-ENTRY_TOCLESS (MEMCHR)
-	CALL_MCOUNT 3
-
-	cmpldi	r5,0
-	beq	L(null)
-	mr	r0,r5
-	xori	r6,r4,0xff
-
-	mtvsrd	v18+32,r4	/* matching char in v18  */
-	mtvsrd	v19+32,r6	/* non matching char in v19  */
-
-	vspltb	v18,v18,7	/* replicate  */
-	vspltb	v19,v19,7	/* replicate  */
-	vspltisb  M_VREG_ZERO,0
-
-	/* Next 16B-aligned address. Prepare address for L(aligned).  */
-	addi	  r6,r3,16
-	clrrdi	  r6,r6,4
-
-	/* Align data and fill bytes not loaded with non matching char.	 */
-	lvx	  v0,0,r3
-	lvsr	  v1,0,r3
-	vperm	  v0,v19,v0,v1
-
-	vcmpequb. v6,v0,v18
-	bne	  cr6,L(found)
-	sub	  r4,r6,r3
-	cmpld	  r5,r4
-	ble	  L(null)
-	sub	  r5,r5,r4
-
-	/* Test up to OFF_START_LOOP-16 bytes in 16B chunks.  The main loop is
-	   optimized for longer strings, so checking the first bytes in 16B
-	   chunks benefits a lot small strings.  */
-	.p2align 5
-L(aligned):
-	cmpldi	r5,0
-	beq     L(null)
-
-	CHECK16B(v0,0,r6,tail1)
-	CHECK16B(v1,16,r6,tail2)
-	CHECK16B(v2,32,r6,tail3)
-	CHECK16B(v3,48,r6,tail4)
-	CHECK16B(v4,64,r6,tail5)
-	CHECK16B(v5,80,r6,tail6)
-	CHECK16B(v6,96,r6,tail7)
-	CHECK16B(v7,112,r6,tail8)
-	CHECK16B(v8,128,r6,tail9)
-	CHECK16B(v9,144,r6,tail10)
-	CHECK16B(v10,160,r6,tail11)
-	CHECK16B(v0,176,r6,tail12)
-	CHECK16B(v1,192,r6,tail13)
-	CHECK16B(v2,208,r6,tail14)
-	CHECK16B(v3,224,r6,tail15)
-
-	cmpdi	cr5,r4,0	/* Check if c == 0.  This will be useful to
-				   choose how we will perform the main loop.  */
-
-	/* Prepare address for the loop.  */
-	addi	  r4,r3,M_OFF_START_LOOP
-	clrrdi	  r4,r4,6
-	sub	  r6,r4,r3
-	sub	  r5,r0,r6
-	addi	  r6,r4,128
-
-	/* If c == 0, use the loop without the vsububm.  */
-	beq	cr5,L(loop)
-
-	/* This is very similar to the block after L(loop), the difference is
-	   that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract
-	   each byte loaded by the char we are looking for, this way we can keep
-	   using vminub to merge the results and checking for nulls.  */
-	.p2align 5
-L(memchr_loop):
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	r6,r6,256
-
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	r6,r6,256
-
-	b	L(memchr_loop)
-	/* Switch to a more aggressive approach checking 64B each time.  Use 2
-	   pointers 128B apart and unroll the loop once to make the pointer
-	   updates and usages separated enough to avoid stalls waiting for
-	   address calculation.  */
-	.p2align 5
-L(loop):
-#undef MEMCHR_SUBTRACT_VECTORS
-#define MEMCHR_SUBTRACT_VECTORS /* nothing */
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	  r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	  r6,r6,256
-
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi      r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi      r6,r6,256
-
-	b	  L(loop)
-
-	.p2align  5
-L(pre_tail_64b):
-	mr	r6,r4
-L(tail_64b):
-	/* OK, we found a null byte.  Let's look for it in the current 64-byte
-	   block and mark it in its corresponding VR.  lxvp vx,0(ry) puts the
-	   low 16B bytes into vx+1, and the high into vx, so the order here is
-	   v5, v4, v7, v6.  */
-	vcmpequb  v1,v5,M_VREG_ZERO
-	vcmpequb  v2,v4,M_VREG_ZERO
-	vcmpequb  v3,v7,M_VREG_ZERO
-	vcmpequb  v4,v6,M_VREG_ZERO
-
-	/* Take into account the other 64B blocks we had already checked.  */
-	add	r6,r6,r7
-	/* Extract first bit of each byte.  */
-	M_VEXTRACTBM(r8,v1)
-	M_VEXTRACTBM(r9,v2)
-	M_VEXTRACTBM(r10,v3)
-	M_VEXTRACTBM(r11,v4)
-
-	/* Shift each value into their corresponding position.  */
-	sldi	  r9,r9,16
-	sldi	  r10,r10,32
-	sldi	  r11,r11,48
-
-	/* Merge the results.  */
-	or	  r8,r8,r9
-	or	  r9,r10,r11
-	or	  r11,r9,r8
-
-	cnttzd	  r0,r11	  /* Count trailing zeros before the match.  */
-	cmpld     r5,r0
-	ble	  L(null)
-	add	  r3,r6,r0	  /* Compute final address.  */
-	blr
-
-	.p2align  5
-L(tail1):
-	M_TAIL(v0,0)
-
-	.p2align  5
-L(tail2):
-	M_TAIL(v1,16)
-
-	.p2align  5
-L(tail3):
-	M_TAIL(v2,32)
-
-	.p2align  5
-L(tail4):
-	M_TAIL(v3,48)
-
-	.p2align  5
-L(tail5):
-	M_TAIL(v4,64)
-
-	.p2align  5
-L(tail6):
-	M_TAIL(v5,80)
-
-	.p2align  5
-L(tail7):
-	M_TAIL(v6,96)
-
-	.p2align  5
-L(tail8):
-	M_TAIL(v7,112)
-
-	.p2align  5
-L(tail9):
-	M_TAIL(v8,128)
-
-	.p2align  5
-L(tail10):
-	M_TAIL(v9,144)
-
-	.p2align  5
-L(tail11):
-	M_TAIL(v10,160)
-
-	.p2align  5
-L(tail12):
-	M_TAIL(v0,176)
-
-	.p2align  5
-L(tail13):
-	M_TAIL(v1,192)
-
-	.p2align  5
-L(tail14):
-	M_TAIL(v2,208)
-
-	.p2align  5
-L(tail15):
-	M_TAIL(v3,224)
-
-	.p2align  5
-L(found):
-	vctzlsbb  r7,v6
-	cmpld     r5,r7
-	ble       L(null)
-	add       r3,r3,r7
-	blr
-
-	.p2align  5
-L(null):
-	li	r3,0
-	blr
-
-END (MEMCHR)
-
-weak_alias (__memchr, memchr)
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
index f32dc38..734bf5f 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
@@ -18,26 +18,10 @@
 
 #include <sysdep.h>
 
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))  \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define LXVP(xtp,dq,ra)			   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
 /* Compare 32 bytes.  */
 #define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
-	LXVP(32+vr1,offset,r3);		\
-	LXVP(32+vr2,offset,r4);		\
+	lxvp      32+vr1,offset(r3);	\
+	lxvp      32+vr2,offset(r4);	\
 	vcmpneb.  v5,vr1+1,vr2+1;	\
 	bne	  cr6,L(tail_2);	\
 	vcmpneb.  v4,vr1,vr2;		\
@@ -56,7 +40,7 @@
 #ifndef MEMCMP
 # define MEMCMP memcmp
 #endif
-	.machine  power9
+	.machine  power10
 ENTRY_TOCLESS (MEMCMP, 4)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
index ed7a9f5..f2a503e 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
@@ -26,7 +26,7 @@
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
 	   Returns 'dst'.  */
 
-	.machine power9
+	.machine power10
 ENTRY_TOCLESS (MEMCPY, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memmove.S b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
index 47c2ac3..4aaa1ef 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memmove.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
@@ -28,7 +28,7 @@
 #ifndef MEMMOVE
 # define MEMMOVE memmove
 #endif
-	.machine power9
+	.machine power10
 ENTRY_TOCLESS (MEMMOVE, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S
index 29d5114..f9442e7 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memset.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S
@@ -25,7 +25,7 @@
 # define MEMSET memset
 #endif
 
-	.machine  power9
+	.machine  power10
 ENTRY_TOCLESS (MEMSET, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
deleted file mode 100644
index fffa1ee..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Optimized strcmp implementation for PowerPC64/POWER10.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-#include <sysdep.h>
-
-#ifndef STRCMP
-# define STRCMP strcmp
-#endif
-
-/* Implements the function
-   int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]).  */
-
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-
-#define LXVP(xtp,dq,ra)		     \
-	.long(((6)<<(32-6))	     \
-	| ((((xtp)-32)>>1)<<(32-10)) \
-	| ((1)<<(32-11))	     \
-	| ((ra)<<(32-16))	     \
-	| dq)
-
-#define COMPARE_16(vreg1,vreg2,offset)  \
-	lxv       vreg1+32,offset(r3);  \
-	lxv       vreg2+32,offset(r4);	\
-	vcmpnezb. v7,vreg1,vreg2;	\
-	bne       cr6,L(different);     \
-
-#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
-	LXVP(vreg1+32,offset,r3);                    \
-	LXVP(vreg2+32,offset,r4);                    \
-	vcmpnezb. v7,vreg1+1,vreg2+1;                \
-	bne	  cr6,L(label1);                     \
-	vcmpnezb. v7,vreg1,vreg2;                    \
-	bne	  cr6,L(label2);                     \
-
-#define TAIL(vreg1,vreg2)     \
-	vctzlsbb r6,v7;	      \
-	vextubrx r5,r6,vreg1; \
-	vextubrx r4,r6,vreg2; \
-	subf	 r3,r4,r5;    \
-	blr;                  \
-
-#define CHECK_N_BYTES(reg1,reg2,len_reg) \
-	sldi	  r0,len_reg,56;         \
-	lxvl	  32+v4,reg1,r0;         \
-	lxvl	  32+v5,reg2,r0;         \
-	add	  reg1,reg1,len_reg;     \
-	add	  reg2,reg2,len_reg;     \
-	vcmpnezb  v7,v4,v5;              \
-	vctzlsbb  r6,v7;                 \
-	cmpld	  cr7,r6,len_reg;        \
-	blt	  cr7,L(different);      \
-
-	/* TODO: change this to .machine power10 when the minimum required
-	binutils allows it.  */
-
-	.machine  power9
-ENTRY_TOCLESS (STRCMP, 4)
-	andi.	r7,r3,4095
-	andi.	r8,r4,4095
-	cmpldi	cr0,r7,4096-16
-	cmpldi	cr1,r8,4096-16
-	bgt	cr0,L(crosses)
-	bgt	cr1,L(crosses)
-	COMPARE_16(v4,v5,0)
-
-L(crosses):
-	andi.	r7,r3,15
-	subfic	r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
-	andi.	r9,r4,15
-	subfic	r5,r9,16	/* r5(nalign2) = 16 - (str2 & 15).  */
-	cmpld	cr7,r7,r5
-	beq	cr7,L(same_aligned)
-	blt	cr7,L(nalign1_min)
-
-	/* nalign2 is minimum and s2 pointer is aligned.  */
-	CHECK_N_BYTES(r3,r4,r5)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.	r10,r3,63	/* Determine offset into 64B hunk.  */
-	andi.	r8,r3,15        /* The offset into the 16B hunk.  */
-	neg	r7,r3
-	andi.	r9,r7,15	/* Number of bytes after a 16B cross.  */
-	rlwinm.	r7,r7,26,0x3F	/* ((r3-4096))>>6&63.  */
-	beq	L(compare_64_pagecross)
-	mtctr	r7
-	b	L(compare_64B_unaligned)
-
-	/* nalign1 is minimum and s1 pointer is aligned.  */
-L(nalign1_min):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.	r10,r4,63	/* Determine offset into 64B hunk.  */
-	andi.	r8,r4,15	/* The offset into the 16B hunk.  */
-	neg	r7,r4
-	andi.	r9,r7,15	/* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F	/* ((r4-4096))>>6&63.  */
-	beq	L(compare_64_pagecross)
-	mtctr	r7
-
-	.p2align 5
-L(compare_64B_unaligned):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	bdnz	L(compare_64B_unaligned)
-
-	/* Cross the page boundary of s2, carefully. Only for first
-	iteration we have to get the count of 64B blocks to be checked.
-	From second iteration and beyond, loop counter is always 63.  */
-L(compare_64_pagecross):
-	li	r11, 63
-	mtctr	r11
-	cmpldi	r10,16
-	ble	L(cross_4)
-	cmpldi	r10,32
-	ble	L(cross_3)
-	cmpldi	r10,48
-	ble	L(cross_2)
-L(cross_1):
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi	r3,r3,48
-	addi	r4,r4,48
-	b	L(compare_64B_unaligned)
-L(cross_2):
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r3,r3,32
-	addi	r4,r4,32
-	b	L(compare_64B_unaligned)
-L(cross_3):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r3,r3,32
-	addi	r4,r4,32
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-	b	L(compare_64B_unaligned)
-L(cross_4):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi	r3,r3,48
-	addi	r4,r4,48
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	b	L(compare_64B_unaligned)
-
-L(same_aligned):
-	CHECK_N_BYTES(r3,r4,r7)
-        /* Align s1 to 32B and adjust s2 address.
-	   Use lxvp only if both s1 and s2 are 32B aligned.  */
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-
-	clrldi	r6,r3,59
-	subfic	r5,r6,32
-	add	r3,r3,r5
-	add	r4,r4,r5
-	andi.	r5,r4,0x1F
-	beq	cr0,L(32B_aligned_loop)
-
-	.p2align 5
-L(16B_aligned_loop):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	b	L(16B_aligned_loop)
-
-	/* Calculate and return the difference.  */
-L(different):
-	TAIL(v4,v5)
-
-	.p2align 5
-L(32B_aligned_loop):
-	COMPARE_32(v14,v16,0,tail1,tail2)
-	COMPARE_32(v18,v20,32,tail3,tail4)
-	COMPARE_32(v22,v24,64,tail5,tail6)
-	COMPARE_32(v26,v28,96,tail7,tail8)
-	addi	r3,r3,128
-	addi	r4,r4,128
-	b	L(32B_aligned_loop)
-
-L(tail1): TAIL(v15,v17)
-L(tail2): TAIL(v14,v16)
-L(tail3): TAIL(v19,v21)
-L(tail4): TAIL(v18,v20)
-L(tail5): TAIL(v23,v25)
-L(tail6): TAIL(v22,v24)
-L(tail7): TAIL(v27,v29)
-L(tail8): TAIL(v26,v28)
-
-END (STRCMP)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
index 4985a92..ec644d5 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
@@ -63,22 +63,6 @@
 	blr
 #endif /* USE_AS_RAWMEMCHR */
 
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))	 \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define LXVP(xtp,dq,ra)		   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
 #define CHECK16(vreg,offset,addr,label) \
 	lxv	  vreg+32,offset(addr);	\
 	vcmpequb. vreg,vreg,v18;	\
@@ -88,8 +72,8 @@
    of bytes already checked.  */
 #define CHECK64(offset,addr,label)	    \
 	li	  r6,offset;		    \
-	LXVP(v4+32,offset,addr);	    \
-	LXVP(v6+32,offset+32,addr);	    \
+	lxvp      v4+32,offset(addr);	    \
+	lxvp      v6+32,offset+32(addr);    \
 	RAWMEMCHR_SUBTRACT_VECTORS;	    \
 	vminub	  v14,v4,v5;		    \
 	vminub	  v15,v6,v7;		    \
@@ -108,7 +92,7 @@
    The implementation can load bytes past a matching byte, but only
    up to the next 64B boundary, so it never crosses a page.  */
 
-.machine power9
+.machine power10
 
 ENTRY_TOCLESS (FUNCNAME, 4)
 	CALL_MCOUNT MCOUNT_NARGS
@@ -234,10 +218,10 @@ L(tail_64b):
 	add	r5,r5,r6
 
 	/* Extract first bit of each byte.  */
-	VEXTRACTBM(r7,v1)
-	VEXTRACTBM(r8,v2)
-	VEXTRACTBM(r9,v3)
-	VEXTRACTBM(r10,v4)
+	vextractbm r7,v1
+	vextractbm r8,v2
+	vextractbm r9,v3
+	vextractbm r10,v4
 
 	/* Shift each value into their corresponding position.  */
 	sldi	  r8,r8,16
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
deleted file mode 100644
index 10700dd..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* Optimized strncmp implementation for PowerPC64/POWER10.
-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Implements the function
-
-   int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment for first 32 bytes and uses
-   vectorised loops after that.  */
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-
-#define LXVP(xtp,dq,ra)              \
-	.long(((6)<<(32-6))          \
-	| ((((xtp)-32)>>1)<<(32-10)) \
-	| ((1)<<(32-11))             \
-	| ((ra)<<(32-16))            \
-	| dq)
-
-#define COMPARE_16(vreg1,vreg2,offset) \
-	lxv	  vreg1+32,offset(r3); \
-	lxv	  vreg2+32,offset(r4); \
-	vcmpnezb. v7,vreg1,vreg2;      \
-	bne	  cr6,L(different);    \
-	cmpldi	  cr7,r5,16;           \
-	ble	  cr7,L(ret0);         \
-	addi	  r5,r5,-16;
-
-#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
-	LXVP(vreg1+32,offset,r3);                    \
-	LXVP(vreg2+32,offset,r4);                    \
-	vcmpnezb. v7,vreg1+1,vreg2+1;                \
-	bne	  cr6,L(label1);                     \
-	vcmpnezb. v7,vreg1,vreg2;                    \
-	bne	  cr6,L(label2);                     \
-	cmpldi	  cr7,r5,32;                         \
-	ble	  cr7,L(ret0);                       \
-	addi	  r5,r5,-32;
-
-#define TAIL_FIRST_16B(vreg1,vreg2) \
-	vctzlsbb r6,v7;             \
-	cmpld	 cr7,r5,r6;         \
-	ble	 cr7,L(ret0);       \
-	vextubrx r5,r6,vreg1;       \
-	vextubrx r4,r6,vreg2;       \
-	subf	 r3,r4,r5;          \
-	blr;
-
-#define TAIL_SECOND_16B(vreg1,vreg2) \
-	vctzlsbb r6,v7;              \
-	addi	 r0,r6,16;           \
-	cmpld	 cr7,r5,r0;          \
-	ble	 cr7,L(ret0);        \
-	vextubrx r5,r6,vreg1;        \
-	vextubrx r4,r6,vreg2;        \
-	subf	 r3,r4,r5;           \
-	blr;
-
-#define CHECK_N_BYTES(reg1,reg2,len_reg) \
-	sldi	  r6,len_reg,56;	 \
-	lxvl	  32+v4,reg1,r6;	 \
-	lxvl	  32+v5,reg2,r6;	 \
-	add	  reg1,reg1,len_reg;	 \
-	add	  reg2,reg2,len_reg;	 \
-	vcmpnezb  v7,v4,v5;		 \
-	vctzlsbb  r6,v7;		 \
-	cmpld	  cr7,r6,len_reg;	 \
-	blt	  cr7,L(different);	 \
-	cmpld	  cr7,r5,len_reg;	 \
-	ble	  cr7,L(ret0);		 \
-	sub	  r5,r5,len_reg;	 \
-
-	/* TODO: change this to .machine power10 when the minimum required
-	 binutils allows it.  */
-	.machine  power9
-ENTRY_TOCLESS (STRNCMP, 4)
-	/* Check if size is 0.  */
-	cmpdi	 cr0,r5,0
-	beq	 cr0,L(ret0)
-	andi.   r7,r3,4095
-	andi.   r8,r4,4095
-	cmpldi  cr0,r7,4096-16
-	cmpldi  cr1,r8,4096-16
-	bgt     cr0,L(crosses)
-	bgt     cr1,L(crosses)
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-L(crosses):
-	andi.	 r7,r3,15
-	subfic	 r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
-	andi.	 r9,r4,15
-	subfic	 r8,r9,16	/* r8(nalign2) = 16 - (str2 & 15).  */
-	cmpld	 cr7,r7,r8
-	beq	 cr7,L(same_aligned)
-	blt	 cr7,L(nalign1_min)
-
-	/* nalign2 is minimum and s2 pointer is aligned.  */
-	CHECK_N_BYTES(r3,r4,r8)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.   r10,r3,63       /* Determine offset into 64B hunk.  */
-	andi.   r8,r3,15        /* The offset into the 16B hunk.  */
-	neg     r7,r3
-	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
-	beq     L(compare_64_pagecross)
-	mtctr   r7
-	b       L(compare_64B_unaligned)
-
-	/* nalign1 is minimum and s1 pointer is aligned.  */
-L(nalign1_min):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.   r10,r4,63       /* Determine offset into 64B hunk.  */
-	andi.   r8,r4,15        /* The offset into the 16B hunk.  */
-	neg     r7,r4
-	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
-	beq     L(compare_64_pagecross)
-	mtctr   r7
-
-	.p2align 5
-L(compare_64B_unaligned):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi    r3,r3,64
-	addi    r4,r4,64
-	bdnz    L(compare_64B_unaligned)
-
-	/* Cross the page boundary of s2, carefully. Only for first
-	iteration we have to get the count of 64B blocks to be checked.
-	From second iteration and beyond, loop counter is always 63.  */
-L(compare_64_pagecross):
-	li      r11, 63
-	mtctr   r11
-	cmpldi  r10,16
-	ble     L(cross_4)
-	cmpldi  r10,32
-	ble     L(cross_3)
-	cmpldi  r10,48
-	ble     L(cross_2)
-L(cross_1):
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi    r3,r3,48
-	addi    r4,r4,48
-	b       L(compare_64B_unaligned)
-L(cross_2):
-	COMPARE_16(v4,v5,0)
-	addi    r3,r3,16
-	addi    r4,r4,16
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi    r3,r3,32
-	addi    r4,r4,32
-	b       L(compare_64B_unaligned)
-L(cross_3):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi    r3,r3,32
-	addi    r4,r4,32
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	addi    r3,r3,16
-	addi    r4,r4,16
-	b       L(compare_64B_unaligned)
-L(cross_4):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi    r3,r3,48
-	addi    r4,r4,48
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	b       L(compare_64B_unaligned)
-
-L(same_aligned):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Align s1 to 32B and adjust s2 address.
-	   Use lxvp only if both s1 and s2 are 32B aligned.  */
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r5,r5,32
-
-	clrldi  r6,r3,59
-	subfic	r7,r6,32
-	add	r3,r3,r7
-	add	r4,r4,r7
-	subf	r5,r7,r5
-	andi.	r7,r4,0x1F
-	beq	cr0,L(32B_aligned_loop)
-
-	.p2align 5
-L(16B_aligned_loop):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	b	L(16B_aligned_loop)
-
-	/* Calculate and return the difference.  */
-L(different):
-	TAIL_FIRST_16B(v4,v5)
-
-	.p2align 5
-L(32B_aligned_loop):
-	COMPARE_32(v14,v16,0,tail1,tail2)
-	COMPARE_32(v18,v20,32,tail3,tail4)
-	COMPARE_32(v22,v24,64,tail5,tail6)
-	COMPARE_32(v26,v28,96,tail7,tail8)
-	addi	r3,r3,128
-	addi	r4,r4,128
-	b	L(32B_aligned_loop)
-
-L(tail1): TAIL_FIRST_16B(v15,v17)
-L(tail2): TAIL_SECOND_16B(v14,v16)
-L(tail3): TAIL_FIRST_16B(v19,v21)
-L(tail4): TAIL_SECOND_16B(v18,v20)
-L(tail5): TAIL_FIRST_16B(v23,v25)
-L(tail6): TAIL_SECOND_16B(v22,v24)
-L(tail7): TAIL_FIRST_16B(v27,v29)
-L(tail8): TAIL_SECOND_16B(v26,v28)
-
-	.p2align 5
-L(ret0):
-	li	r3,0
-	blr
-
-END(STRNCMP)
-libc_hidden_builtin_def(strncmp)
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
index 83b21c6..f0cde81 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
@@ -28,21 +28,6 @@
    The implementation uses unaligned doubleword access for first 32 bytes
    as in POWER8 patch and uses vectorised loops after that.  */
 
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
 /* Get 16 bytes for unaligned case.
    reg1: Vector to hold next 16 bytes.
    reg2: Address to read from.
@@ -61,10 +46,7 @@
 2: \
 	vperm   reg1, v9, reg1, reg3;
 
-/* TODO: change this to .machine power9 when the minimum required binutils
-   allows it.  */
-
-	.machine  power7
+	.machine  power9
 ENTRY_TOCLESS (STRCMP, 4)
 	li	r0, 0
 
@@ -116,7 +98,7 @@ L(align):
 	/* Both s1 and s2 are unaligned.  */
 	GET16BYTES(v4, r7, v10)
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	beq	cr6, L(match)
 	b	L(different)
 
@@ -136,28 +118,28 @@ L(match):
 L(s1_align):
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	beq	cr6, L(s1_align)
@@ -167,37 +149,37 @@ L(s1_align):
 L(aligned):
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	beq	cr6, L(aligned)
 
 	/* Calculate and return the difference.  */
 L(different):
-	VCTZLSBB(r6, v7)
-	VEXTUBRX(r5, r6, v4)
-	VEXTUBRX(r4, r6, v5)
+	vctzlsbb r6, v7
+	vextubrx r5, r6, v4
+	vextubrx r4, r6, v5
 	subf	r3, r4, r5
 	extsw	r3, r3
 	blr
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
index 60c74ab..5a25f94 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
@@ -29,21 +29,6 @@
 # define STRNCMP strncmp
 #endif
 
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
 /* Get 16 bytes for unaligned case.
    reg1: Vector to hold next 16 bytes.
    reg2: Address to read from.
@@ -64,9 +49,7 @@
 2: \
 	vperm	reg1, v9, reg1, reg3;
 
-/* TODO: change this to .machine power9 when minimum binutils
-   is upgraded to 2.27.  */
-	.machine  power7
+	.machine  power9
 ENTRY_TOCLESS (STRNCMP, 4)
 	/* Check if size is 0.  */
 	cmpdi	cr0, r5, 0
@@ -163,7 +146,7 @@ L(align):
 	clrldi	r6, r3, 60
 	subfic	r11, r6, 16
 	GET16BYTES(v4, r3, v10)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	beq	cr6, L(match)
 	b	L(different)
 
@@ -186,7 +169,7 @@ L(match):
 L(s1_align):
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -196,7 +179,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -206,7 +189,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -216,7 +199,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -228,7 +211,7 @@ L(s1_align):
 L(aligned):
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -238,7 +221,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -248,7 +231,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -258,7 +241,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -268,11 +251,11 @@ L(aligned):
 	b	L(aligned)
 	/* Calculate and return the difference.  */
 L(different):
-	VCTZLSBB(r6, v7)
+	vctzlsbb r6, v7
 	cmplw	cr7, r5, r6
 	ble	cr7, L(ret0)
-	VEXTUBRX(r5, r6, v4)
-	VEXTUBRX(r4, r6, v5)
+	vextubrx r5, r6, v4
+	vextubrx r4, r6, v5
 	subf	r3, r4, r5
 	extsw	r3, r3
 	blr
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index dc7c5b1..e321ce5 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -11,7 +11,6 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
 		   strlen-power7 strlen-ppc64 \
 		   strnlen-power8 strnlen-power7 strnlen-ppc64 \
 		   strcasecmp-power7 strcasecmp_l-power7 \
-		   strncase-power7 strncase_l-power7 \
 		   strncmp-power8 strncmp-ppc64 \
 		   strchr-power8 strchr-power7 strchr-ppc64 \
 		   strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
@@ -31,15 +30,12 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
 		   strncase-power8
 
 ifneq (,$(filter %le,$(config-machine)))
-sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \
-		   memmove-power10 memset-power10 rawmemchr-power9 \
-		   rawmemchr-power10 strcmp-power9 strcmp-power10 \
-		   strncmp-power9 strncmp-power10 strcpy-power9 strcat-power10 \
-		   stpcpy-power9 strlen-power9 strncpy-power9 stpncpy-power9 \
-		   strlen-power10
+sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
+		   rawmemchr-power9 rawmemchr-power10 \
+		   strcmp-power9 strncmp-power9 \
+		   strcpy-power9 strcat-power10 stpcpy-power9 \
+		   strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
 endif
-CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
-CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
 endif
 
 # Called during static initialization
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 0a31a58..016d05f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
 #ifdef __LITTLE_ENDIAN__
-	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1
-			      && hwcap & PPC_FEATURE_HAS_VSX,
-			      __strncmp_power10)
 	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strncmp_power9)
@@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c.  */
   IFUNC_IMPL (i, name, memchr,
-#ifdef __LITTLE_ENDIAN__
-	      IFUNC_IMPL_ADD (array, i, memchr,
-		              hwcap2 & PPC_FEATURE2_ARCH_3_1
-			      && hwcap & PPC_FEATURE_HAS_VSX,
-			      __memchr_power10)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memchr,
 			      hwcap2 & PPC_FEATURE2_ARCH_2_07
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
@@ -311,19 +302,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      hwcap2 & PPC_FEATURE2_ARCH_2_07
 			       && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strncasecmp_power8)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      hwcap & PPC_FEATURE_ARCH_2_06,
-			      __strncasecmp_power7)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc))
 
-  /* Support sysdeps/powerpc/powerpc64/multiarch/strncase_l.c.  */
-  IFUNC_IMPL (i, name, strncasecmp_l,
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
-			      hwcap & PPC_FEATURE_ARCH_2_06,
-			      __strncasecmp_l_power7)
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
-			      __strncasecmp_l_ppc))
-
   /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c.  */
   IFUNC_IMPL (i, name, strrchr,
 	      IFUNC_IMPL_ADD (array, i, strrchr,
@@ -387,10 +367,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcmp,
 #ifdef __LITTLE_ENDIAN__
 	      IFUNC_IMPL_ADD (array, i, strcmp,
-			      (hwcap2 & PPC_FEATURE2_ARCH_3_1)
-			      && (hwcap & PPC_FEATURE_HAS_VSX),
-			      __strcmp_power10)
-	      IFUNC_IMPL_ADD (array, i, strcmp,
 			      hwcap2 & PPC_FEATURE2_ARCH_3_00
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strcmp_power9)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c
index b63c796..3abd64a 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c
@@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden;
 extern __typeof (__memchr) __memchr_power7 attribute_hidden;
 extern __typeof (__memchr) __memchr_power8 attribute_hidden;
 
-# ifdef __LITTLE_ENDIAN__
-extern __typeof (__memchr) __memchr_power10 attribute_hidden;
-# endif
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc (__memchr,
-# ifdef __LITTLE_ENDIAN__
-	    (hwcap2 & PPC_FEATURE2_ARCH_3_1
-	     && hwcap & PPC_FEATURE_HAS_VSX)
-	    ? __memchr_power10 :
-# endif
-	      (hwcap2 & PPC_FEATURE2_ARCH_2_07
-	      && hwcap & PPC_FEATURE_HAS_ALTIVEC)
-	      ? __memchr_power8 :
-	        (hwcap & PPC_FEATURE_ARCH_2_06)
-	        ? __memchr_power7
-	        : __memchr_ppc);
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07
+	     && hwcap & PPC_FEATURE_HAS_ALTIVEC)
+	    ? __memchr_power8 :
+	    (hwcap & PPC_FEATURE_ARCH_2_06)
+            ? __memchr_power7
+            : __memchr_ppc);
 
 weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
index 3c636e3..7c77c08 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
@@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden;
 extern __typeof (strcmp) __strcmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
 extern __typeof (strcmp) __strcmp_power9 attribute_hidden;
-extern __typeof (strcmp) __strcmp_power10 attribute_hidden;
 # endif
 
 # undef strcmp
 
 libc_ifunc_redirected (__redirect_strcmp, strcmp,
 # ifdef __LITTLE_ENDIAN__
-		        (hwcap2 & PPC_FEATURE2_ARCH_3_1
-			 && hwcap & PPC_FEATURE_HAS_VSX)
-			? __strcmp_power10 :
 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
 			? __strcmp_power9 :
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
index a83c11a..807b0af 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c
@@ -23,15 +23,12 @@
 extern __typeof (__strncasecmp) __libc_strncasecmp;
 
 extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden;
-extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden;
 extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden;
 
 libc_ifunc (__libc_strncasecmp,
 	     (hwcap2 & PPC_FEATURE2_ARCH_2_07
 	      && hwcap & PPC_FEATURE_HAS_ALTIVEC)
-             ? __strncasecmp_power8:
-	     (hwcap & PPC_FEATURE_ARCH_2_06)
-             ? __strncasecmp_power7
-             : __strncasecmp_ppc);
+             ? __strncasecmp_power8
+	     : __strncasecmp_ppc);
 
 weak_alias (__libc_strncasecmp, strncasecmp)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c b/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c
deleted file mode 100644
index 1f28448..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Multiple versions of strncasecmp_l
-   Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-# include <string.h>
-# define strncasecmp_l __strncasecmp_l_ppc
-extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden;
-extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden;
-#endif
-
-#include <string/strncase_l.c>
-#undef strncasecmp_l
-
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-extern __typeof (__strncasecmp_l) __libc_strncasecmp_l;
-libc_ifunc (__libc_strncasecmp_l,
-	     (hwcap & PPC_FEATURE_ARCH_2_06)
-             ? __strncasecmp_l_power7
-             : __strncasecmp_l_ppc);
-
-weak_alias (__libc_strncasecmp_l, strncasecmp_l)
-#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 0a664a6..4cfe27f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
 extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
 extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
-extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
 # endif
 # undef strncmp
 
@@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
    ifunc symbol properly.  */
 libc_ifunc_redirected (__redirect_strncmp, strncmp,
 # ifdef __LITTLE_ENDIAN__
-			(hwcap2 & PPC_FEATURE2_ARCH_3_1
-			 && hwcap & PPC_FEATURE_HAS_VSX)
-			? __strncmp_power10 :
 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
 			? __strncmp_power9 :
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index 5acf505..de146dd 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -544,6 +544,7 @@ $(objpfx)tst-create1: $(shared-thread-library)
 $(objpfx)tst-create1.out: $(objpfx)tst-create1mod.so
 
 $(objpfx)tst-stack2.out: $(objpfx)tst-stack2-mod.so
+$(objpfx)tst-stack2-mod.so: $(shared-thread-library)
 LDFLAGS-tst-stack2-mod.so = -Wl,-z,execstack
 ifeq ($(have-no-error-execstack),yes)
 LDFLAGS-tst-stack2-mod.so += -Wl,--no-error-execstack
diff --git a/sysdeps/s390/cpu-features.c b/sysdeps/s390/cpu-features.c
index 31a1f4d..106874b 100644
--- a/sysdeps/s390/cpu-features.c
+++ b/sysdeps/s390/cpu-features.c
@@ -27,7 +27,7 @@
 
 #define S390_COPY_CPU_FEATURES(SRC_PTR, DEST_PTR)	\
   (DEST_PTR)->hwcap = (SRC_PTR)->hwcap;			\
-  (DEST_PTR)->stfle_bits[0] = (SRC_PTR)->stfle_bits[0];
+  (DEST_PTR)->stfle_filtered = (SRC_PTR)->stfle_filtered;
 
 static void
 TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
@@ -77,7 +77,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 	  disable = true;
 	  hwcap_mask = HWCAP_S390_VXRS | HWCAP_S390_VXRS_EXT
 	    | HWCAP_S390_VXRS_EXT2;
-	  stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+	  stfle_bits0_mask = S390_STFLE_BIT61_ARCH13_MIE3;
 	}
       else if (tunable_str_comma_strcmp_cte (&t, "z13")
 	       || tunable_str_comma_strcmp_cte (&t, "arch11"))
@@ -85,7 +85,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 	  reset_features = true;
 	  disable = true;
 	  hwcap_mask = HWCAP_S390_VXRS_EXT | HWCAP_S390_VXRS_EXT2;
-	  stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+	  stfle_bits0_mask = S390_STFLE_BIT61_ARCH13_MIE3;
 	}
       else if (tunable_str_comma_strcmp_cte (&t, "z14")
 	       || tunable_str_comma_strcmp_cte (&t, "arch12"))
@@ -93,12 +93,14 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 	  reset_features = true;
 	  disable = true;
 	  hwcap_mask = HWCAP_S390_VXRS_EXT2;
-	  stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+	  stfle_bits0_mask = S390_STFLE_BIT61_ARCH13_MIE3;
 	}
       else if (tunable_str_comma_strcmp_cte (&t, "z15")
 	       || tunable_str_comma_strcmp_cte (&t, "z16")
+	       || tunable_str_comma_strcmp_cte (&t, "z17")
 	       || tunable_str_comma_strcmp_cte (&t, "arch13")
-	       || tunable_str_comma_strcmp_cte (&t, "arch14"))
+	       || tunable_str_comma_strcmp_cte (&t, "arch14")
+	       || tunable_str_comma_strcmp_cte (&t, "arch15"))
 	{
 	  /* For z15 or newer we don't have to disable something, but we have
 	     to reset to the original values.  */
@@ -125,7 +127,7 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 	    hwcap_mask |= HWCAP_S390_VXRS | HWCAP_S390_VXRS_EXT;
 	}
       else if (tunable_str_comma_strcmp_cte (&t, "STFLE_MIE3"))
-	stfle_bits0_mask = S390_STFLE_MASK_ARCH13_MIE3;
+	stfle_bits0_mask = S390_STFLE_BIT61_ARCH13_MIE3;
 
       /* Perform the actions determined above.  */
       if (reset_features)
@@ -144,22 +146,26 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
       if (stfle_bits0_mask != 0ULL)
 	{
 	  if (disable)
-	    cpu_features_curr.stfle_bits[0] &= ~stfle_bits0_mask;
+	    cpu_features_curr.stfle_filtered &= ~stfle_bits0_mask;
 	  else
-	    cpu_features_curr.stfle_bits[0] |= stfle_bits0_mask;
+	    cpu_features_curr.stfle_filtered |= stfle_bits0_mask;
 	}
     }
 
   /* Copy back the features after checking that no unsupported features were
      enabled by user.  */
   cpu_features->hwcap = cpu_features_curr.hwcap & cpu_features_orig.hwcap;
-  cpu_features->stfle_bits[0] = cpu_features_curr.stfle_bits[0]
-    & cpu_features_orig.stfle_bits[0];
+  cpu_features->stfle_filtered = cpu_features_curr.stfle_filtered
+    & cpu_features_orig.stfle_filtered;
 }
 
 static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features_no_tunables (struct cpu_features *cpu_features)
 {
+  /* Only initialize once.  */
+  if (cpu_features->hwcap != 0)
+    return;
+
   /* Fill cpu_features as passed by kernel and machine.  */
   cpu_features->hwcap = GLRO(dl_hwcap);
 
@@ -168,20 +174,57 @@ init_cpu_features (struct cpu_features *cpu_features)
 		      && (cpu_features->hwcap & HWCAP_S390_ZARCH)
 		      && (cpu_features->hwcap & HWCAP_S390_HIGH_GPRS)))
     {
-      register unsigned long reg0 __asm__("0") = 0;
+      unsigned long long stfle_bits[4] = { 0 };
+      register unsigned long reg0 __asm__("0") = 3;
       __asm__ __volatile__(".machine push"        "\n\t"
 			   ".machine \"z9-109\""  "\n\t"
 			   ".machinemode \"zarch_nohighgprs\"\n\t"
 			   "stfle %0"             "\n\t"
 			   ".machine pop"         "\n"
-			   : "=QS" (cpu_features->stfle_bits[0]),
+			   : "=QS" (stfle_bits[0]),
 			     "+d" (reg0)
 			   : : "cc");
+
+      unsigned long long internal_stfle_bits = 0;
+
+      /* Facility bit 34: z10: General instructions extension.  */
+      if ((stfle_bits[0] & (1ULL << (63 - 34))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT34_Z10;
+
+      /* Facility bit 45: z196: Distinct operands, popcount, ...  */
+      if ((stfle_bits[0] & (1ULL << (63 - 45))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT45_Z196;
+
+      /* Facility bit 61: arch13/z15: Miscellaneous-Instruction-Extensions
+	 Facility 3, e.g. mvcrl.  */
+      if ((stfle_bits[0] & (1ULL << (63 - 61))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT61_ARCH13_MIE3;
+
+      /* Facility bit 84: arch15/z17: Miscellaneous-instruction-extensions 4  */
+      if ((stfle_bits[1] & (1ULL << (127 - 84))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT84_ARCH15_MIE4;
+
+      /* Facility bit 198: arch15/z17: Vector-enhancements-facility 3  */
+      if ((stfle_bits[3] & (1ULL << (255 - 198))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT198_ARCH15_VXRS_EXT3;
+
+      /* Facility bit 199: arch15/z17: Vector-Packed-Decimal-Enhancement 3  */
+      if ((stfle_bits[3] & (1ULL << (255 - 199))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT199_ARCH15_VXRS_PDE3;
+
+      /* Facility bit 201: arch15/z17: CPU: Concurrent-Functions Facility  */
+      if ((stfle_bits[3] & (1ULL << (255 - 201))) != 0)
+	internal_stfle_bits |= S390_STFLE_BIT201_ARCH15_CON;
+
+      cpu_features->stfle_orig = internal_stfle_bits;
+      cpu_features->stfle_filtered = internal_stfle_bits;
     }
-  else
-    {
-      cpu_features->stfle_bits[0] = 0ULL;
-    }
+}
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+  init_cpu_features_no_tunables (cpu_features);
 
   TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
 }
diff --git a/sysdeps/s390/cpu-features.h b/sysdeps/s390/cpu-features.h
index 4ff4421..2441b27 100644
--- a/sysdeps/s390/cpu-features.h
+++ b/sysdeps/s390/cpu-features.h
@@ -18,29 +18,58 @@
 #ifndef __CPU_FEATURES_S390X_H
 # define __CPU_FEATURES_S390X_H
 
-#define S390_STFLE_BITS_Z10  34 /* General instructions extension */
-#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
-#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
-					  Facility 3, e.g. mvcrl.  */
+/* The following stfle bit definitions are intended to be used for the
+   glibc internal stfle_orig and stfle_filtered fields in cpu_features
+   struct.  They can't be used on the double words retrieved by the
+   stfle-instruction.  */
 
-#define S390_STFLE_MASK_ARCH13_MIE3 (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))
+/* Facility bit 34: z10: General instructions extension.  */
+#define S390_STFLE_BIT34_Z10 (1ULL << 0)
 
+/* Facility bit 45: z196: Distinct operands, popcount, ...  */
+#define S390_STFLE_BIT45_Z196 (1ULL << 1)
 
-#define S390_IS_ARCH13_MIE3(STFLE_BITS_ARRAY)			\
-  (((STFLE_BITS_ARRAY)[0] & S390_STFLE_MASK_ARCH13_MIE3) != 0)
+/* Facility bit 61: arch13/z15: Miscellaneous-Instruction-Extensions
+   Facility 3, e.g. mvcrl.  */
+#define S390_STFLE_BIT61_ARCH13_MIE3 (1ULL << 2)
 
-#define S390_IS_Z196(STFLE_BITS_ARRAY)			\
-  (((STFLE_BITS_ARRAY)[0] & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)
+/* Facility bit 84: arch15/z17: Miscellaneous-instruction-extensions
+   facility 4  */
+#define S390_STFLE_BIT84_ARCH15_MIE4 (1ULL << 3)
 
-#define S390_IS_Z10(STFLE_BITS_ARRAY)				\
-  (((STFLE_BITS_ARRAY)[0] & (1ULL << (63 - S390_STFLE_BITS_Z10))) != 0)
+/* Facility bit 198: arch15/z17: Vector-enhancements-facility 3  */
+#define S390_STFLE_BIT198_ARCH15_VXRS_EXT3 (1ULL << 4)
+
+/* Facility bit 199: arch15/z17: Vector-Packed-Decimal-Enhancement
+   Facility 3  */
+#define S390_STFLE_BIT199_ARCH15_VXRS_PDE3 (1ULL << 5)
+
+/* Facility bit 201: arch15/z17: CPU: Concurrent-Functions Facility  */
+#define S390_STFLE_BIT201_ARCH15_CON (1ULL << 6)
+
+#define S390_IS_ARCH15(STFLE_BITS)					\
+  ((((STFLE_BITS) & S390_STFLE_BIT84_ARCH15_MIE4) != 0)			\
+   && (((STFLE_BITS) & S390_STFLE_BIT198_ARCH15_VXRS_EXT3) != 0)	\
+   && (((STFLE_BITS) & S390_STFLE_BIT199_ARCH15_VXRS_PDE3) != 0)	\
+   && (((STFLE_BITS) & S390_STFLE_BIT201_ARCH15_CON) != 0))
+
+#define S390_IS_ARCH13_MIE3(STFLE_BITS)			\
+  (((STFLE_BITS) & S390_STFLE_BIT61_ARCH13_MIE3) != 0)
+
+#define S390_IS_Z196(STFLE_BITS)		\
+  (((STFLE_BITS) & S390_STFLE_BIT45_Z196) != 0)
+
+#define S390_IS_Z10(STFLE_BITS)			\
+  (((STFLE_BITS) & S390_STFLE_BIT34_Z10) != 0)
 
 struct cpu_features
 {
   unsigned long int hwcap;
   unsigned long int __reserved_hwcap2;
-  unsigned long long stfle_bits[3];
-  unsigned long long __reserved[11];
+  unsigned long long __reserved;
+  unsigned long long stfle_orig;
+  unsigned long long stfle_filtered;
+  unsigned long long __reserved2[11];
 };
 
 #endif /* __CPU_FEATURES_S390X_H  */
diff --git a/sysdeps/s390/dl-diagnostics-cpu.c b/sysdeps/s390/dl-diagnostics-cpu.c
new file mode 100644
index 0000000..426af2d
--- /dev/null
+++ b/sysdeps/s390/dl-diagnostics-cpu.c
@@ -0,0 +1,37 @@
+/* Print CPU diagnostics data in ld.so.  s390 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-diagnostics.h>
+#include <ldsodefs.h>
+#include <cpu-features.h>
+
+static void
+print_cpu_features_value (const char *label, uint64_t value)
+{
+  _dl_printf ("s390.cpu_features.");
+  _dl_diagnostics_print_labeled_value (label, value);
+}
+
+void
+_dl_diagnostics_cpu (void)
+{
+  const struct cpu_features *cpu_features = &GLRO(dl_s390_cpu_features);
+  print_cpu_features_value ("hwcap", cpu_features->hwcap);
+  print_cpu_features_value ("stfle_orig", cpu_features->stfle_orig);
+  print_cpu_features_value ("stfle_filtered", cpu_features->stfle_filtered);
+}
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
index 48c8ce1..14b1f763 100644
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
@@ -81,8 +81,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Get hardware information.  */
   const struct cpu_features *features = &GLRO(dl_s390_cpu_features);
   unsigned long int dl_hwcap __attribute__ ((unused)) = features->hwcap;
-  const unsigned long long * __attribute__((unused)) stfle_bits
-    = features->stfle_bits;
+  const unsigned long long __attribute__((unused)) stfle_bits
+    = features->stfle_filtered;
 
 #if HAVE_MEMSET_IFUNC
   IFUNC_IMPL (i, name, memset,
diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h
index 34f3b0d..b28c503 100644
--- a/sysdeps/s390/multiarch/ifunc-resolve.h
+++ b/sysdeps/s390/multiarch/ifunc-resolve.h
@@ -23,7 +23,7 @@
 #include <sys/auxv.h>
 
 #define s390_libc_ifunc_expr_stfle_init()				\
-  const unsigned long long *stfle_bits = features->stfle_bits;
+  const unsigned long long stfle_bits = features->stfle_filtered;
 
 #define s390_libc_ifunc_expr_init()					\
   const struct cpu_features *features = &GLRO(dl_s390_cpu_features);	\
diff --git a/sysdeps/s390/s390-32/s390-mcount.S b/sysdeps/s390/s390-32/s390-mcount.S
index 59614ee..7f8457f 100644
--- a/sysdeps/s390/s390-32/s390-mcount.S
+++ b/sysdeps/s390/s390-32/s390-mcount.S
@@ -54,11 +54,7 @@ C_LABEL(_mcount)
 	/* Save the caller-clobbered registers.  */
 	ahi   %r15,-128
 	cfi_adjust_cfa_offset (128)
-	/* binutils 2.28+: .cfi_val_offset r15, -96 */
-	.cfi_escape \
-		/* DW_CFA_val_offset */ 0x14, \
-		/* r15 */               0x0f, \
-		/* scaled offset */     0x18
+	cfi_val_offset (r15, -96)
 	stm   %r14,%r5,96(%r15)
 	cfi_offset (r14, -128)
 	l     %r2,132(%r15)       # callers address  = first parameter
diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile
index 66ed844..991025c 100644
--- a/sysdeps/s390/s390-64/Makefile
+++ b/sysdeps/s390/s390-64/Makefile
@@ -11,7 +11,8 @@ $(objpfx)tst-glibc-hwcaps: \
   $(objpfx)libmarkermod2-1.so \
   $(objpfx)libmarkermod3-1.so \
   $(objpfx)libmarkermod4-1.so \
-  $(objpfx)libmarkermod5-1.so
+  $(objpfx)libmarkermod5-1.so \
+  $(objpfx)libmarkermod6-1.so
 $(objpfx)tst-glibc-hwcaps.out: \
   $(objpfx)libmarkermod2.so \
     $(objpfx)glibc-hwcaps/z13/libmarkermod2.so \
@@ -26,7 +27,14 @@ $(objpfx)tst-glibc-hwcaps.out: \
     $(objpfx)glibc-hwcaps/z13/libmarkermod5.so \
     $(objpfx)glibc-hwcaps/z14/libmarkermod5.so \
     $(objpfx)glibc-hwcaps/z15/libmarkermod5.so \
-    $(objpfx)glibc-hwcaps/z16/libmarkermod5.so
+    $(objpfx)glibc-hwcaps/z16/libmarkermod5.so \
+  $(objpfx)libmarkermod6.so \
+    $(objpfx)glibc-hwcaps/z13/libmarkermod6.so \
+    $(objpfx)glibc-hwcaps/z14/libmarkermod6.so \
+    $(objpfx)glibc-hwcaps/z15/libmarkermod6.so \
+    $(objpfx)glibc-hwcaps/z16/libmarkermod6.so \
+    $(objpfx)glibc-hwcaps/z17/libmarkermod6.so
+
 
 $(objpfx)glibc-hwcaps/z13/libmarkermod2.so: $(objpfx)libmarkermod2-2.so
 	$(make-target-directory)
@@ -58,6 +66,21 @@ $(objpfx)glibc-hwcaps/z15/libmarkermod5.so: $(objpfx)libmarkermod5-4.so
 $(objpfx)glibc-hwcaps/z16/libmarkermod5.so: $(objpfx)libmarkermod5-5.so
 	$(make-target-directory)
 	cp $< $@
+$(objpfx)glibc-hwcaps/z13/libmarkermod6.so: $(objpfx)libmarkermod6-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z14/libmarkermod6.so: $(objpfx)libmarkermod6-3.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z15/libmarkermod6.so: $(objpfx)libmarkermod6-4.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z16/libmarkermod6.so: $(objpfx)libmarkermod6-5.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z17/libmarkermod6.so: $(objpfx)libmarkermod6-6.so
+	$(make-target-directory)
+	cp $< $@
 
 
 ifeq (no,$(build-hardcoded-path-in-tests))
diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h
index 815cf3e..736e654 100644
--- a/sysdeps/s390/s390-64/dl-hwcap-check.h
+++ b/sysdeps/s390/s390-64/dl-hwcap-check.h
@@ -25,8 +25,23 @@
 static inline void
 dl_hwcap_check (void)
 {
-#if defined __ARCH__
-# if GCCMACRO__ARCH__ >= 14
+  /* Note: The s390x kernel won't introduce new HWCAP-Bits if there is
+     no special handling needed in kernel itself.  Thus we have have
+     to check the facility-list retrieved with the stfle instruction.
+     We already have a common storage of this list in cpu-features.c.
+     This dl-hwcap-check.h file is included in
+     sysdeps/unix/sysv/linux/dl-sysdep.c, where also dl-machine.h and
+     cpu-features.c is included.  Therefore we don't have a special
+     include here.  */
+
+#if defined GCCMACRO__ARCH__
+# if GCCMACRO__ARCH__ >= 15
+  init_cpu_features_no_tunables (&GLRO(dl_s390_cpu_features));
+  if (!(S390_IS_ARCH15 (GLRO(dl_s390_cpu_features).stfle_orig)))
+    _dl_fatal_printf ("\
+Fatal glibc error: CPU lacks VXRS_EXT3/VXRS_PDE3/MIE4/Concurrent-functions \
+support (z17 or later required)\n");
+# elif GCCMACRO__ARCH__ >= 14
   if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2))
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n");
@@ -39,7 +54,7 @@ Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n");
     _dl_fatal_printf ("\
 Fatal glibc error: CPU lacks VXE support (z14 or later required)\n");
 # endif
-#endif /* __ARCH__ */
+#endif /* GCCMACRO__ARCH__ */
 }
 
 #endif /* _DL_HWCAP_CHECK_H */
diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
index 32fdabd..1ae9fdd 100644
--- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
+++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
@@ -19,9 +19,10 @@
 #include <dl-hwcaps.h>
 #include <ldsodefs.h>
 #include <sys/auxv.h>
+#include <cpu-features.h>
 
-const char _dl_hwcaps_subdirs[] = "z16:z15:z14:z13";
-enum { subdirs_count = 4 }; /* Number of components in _dl_hwcaps_subdirs.  */
+const char _dl_hwcaps_subdirs[] = "z17:z16:z15:z14:z13";
+enum { subdirs_count = 5 }; /* Number of components in _dl_hwcaps_subdirs.  */
 
 uint32_t
 _dl_hwcaps_subdirs_active (void)
@@ -58,5 +59,12 @@ _dl_hwcaps_subdirs_active (void)
     return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
   ++active;
 
+  /* z17.
+     Note: The kernel has not introduced new HWCAP bits as the new facilities do
+     not require kernel interaction.  Thus we check the features via stfle.  */
+  if (!(S390_IS_ARCH15 (GLRO(dl_s390_cpu_features).stfle_orig)))
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
   return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
 }
diff --git a/sysdeps/s390/s390-64/s390x-mcount.h b/sysdeps/s390/s390-64/s390x-mcount.h
index b82f1a8..c5bd70d 100644
--- a/sysdeps/s390/s390-64/s390x-mcount.h
+++ b/sysdeps/s390/s390-64/s390x-mcount.h
@@ -68,11 +68,7 @@ C_LABEL(MCOUNT_SYMBOL)
 	/* Save the caller-clobbered registers.  */
 	aghi  %r15,-224
 	cfi_adjust_cfa_offset (224)
-	/* binutils 2.28+: .cfi_val_offset r15, -160 */
-	.cfi_escape \
-		/* DW_CFA_val_offset */ 0x14, \
-		/* r15 */               0x0f, \
-		/* scaled offset */     0x14
+	cfi_val_offset (r15, -160)
 	stmg  %r14,%r5,160(%r15)
 	cfi_offset (r14, -224)
 	cfi_offset (r0, -224+16)
diff --git a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
index 1652bd7..b9d8781 100644
--- a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
+++ b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
@@ -26,35 +26,53 @@ extern int marker2 (void);
 extern int marker3 (void);
 extern int marker4 (void);
 extern int marker5 (void);
+extern int marker6 (void);
 
 /* Return the arch level, 10 for the baseline libmarkermod*.so's.  */
 static int
 compute_level (void)
 {
   const char *platform = (const char *) getauxval (AT_PLATFORM);
+  const unsigned long int hwcap = getauxval (AT_HWCAP);
+  const int latest_level = 15;
 
   /* The arch* versions refer to the edition of the Principles of
      Operation, and they are off by two when compared with the recent
      product names.  (The code below should not be considered an
      accurate mapping to Principles of Operation editions for earlier
      AT_PLATFORM strings).  */
-  if (strcmp (platform, "z900") == 0)
-    return 10;
-  if (strcmp (platform, "z990") == 0)
-    return 10;
-  if (strcmp (platform, "z9-109") == 0)
-    return 10;
-  if (strcmp (platform, "z10") == 0)
-    return 10;
-  if (strcmp (platform, "z196") == 0)
-    return 10;
-  if (strcmp (platform, "zEC12") == 0)
-    return 10;
+  if ((strcmp (platform, "z900") == 0)
+      || (strcmp (platform, "z990") == 0)
+      || (strcmp (platform, "z9-109") == 0)
+      || (strcmp (platform, "z10") == 0)
+      || (strcmp (platform, "z196") == 0)
+      || (strcmp (platform, "zEC12") == 0))
+    {
+      if ((hwcap & HWCAP_S390_VX) == 0)
+	{
+	  /* As vector-support was introduced with the newer z13
+	     architecture, we are really on one of the tested older
+	     architectures.  */
+	  return 10;
+	}
+      else
+	{
+	  /* According to AT_PLATFORM we are on an older architecture
+	     without vector-support, but according to HWCAPs vector
+	     registers are supported.  This means we are running on a
+	     new architecture which is not yet known by the kernel.
+	     Thus the default AT_PLATFORM string is used, which is the
+	     oldest supported one.  For this test, assume we are on
+	     the latest known architecture.  See
+	     <kernel>/arch/s390/kernel/processor.c:setup_elf_platform().
+	  */
+	  return latest_level;
+	}
+    }
 
   /* If we are running on z13 or newer and the kernel was booted with novx,
      then AT_PLATFORM is z13 or newer, but _dl_hwcaps_subdirs_active will
      return zero and the _dl_hwcaps_subdirs are not searched.  */
-  const unsigned long int hwcap = getauxval (AT_HWCAP);
   if ((hwcap & HWCAP_S390_VX) == 0)
     return 10;
 
@@ -66,9 +84,12 @@ compute_level (void)
     return 13;
   if (strcmp (platform, "z16") == 0)
     return 14;
+  if (strcmp (platform, "z17") == 0)
+    return latest_level;
+
   printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
-  /* Assume that the new platform supports z16.  */
-  return 14;
+  /* Assume that the new platform supports the latest known architecture.  */
+  return latest_level;
 }
 
 static int
@@ -80,6 +101,7 @@ do_test (void)
   TEST_COMPARE (marker3 (), MIN (level - 9, 3));
   TEST_COMPARE (marker4 (), MIN (level - 9, 4));
   TEST_COMPARE (marker5 (), MIN (level - 9, 5));
+  TEST_COMPARE (marker6 (), MIN (level - 9, 6));
   return 0;
 }
 
diff --git a/sysdeps/sparc/sparc32/start.S b/sysdeps/sparc/sparc32/start.S
index 694b020..8393760 100644
--- a/sysdeps/sparc/sparc32/start.S
+++ b/sysdeps/sparc/sparc32/start.S
@@ -35,6 +35,7 @@
 
 #include <sysdep.h>
 
+#define FRAME_SIZE 104
 
 	.section ".text"
 	.align 4
@@ -48,12 +49,12 @@ _start:
   /* Terminate the stack frame, and reserve space for functions to
      drop their arguments.  */
 	mov	%g0, %fp
-	sub	%sp, 6*4, %sp
+	sub	%sp, FRAME_SIZE, %sp
 
   /* Extract the arguments and environment as encoded on the stack.  The
      argument info starts after one register window (16 words) past the SP.  */
-	ld	[%sp+22*4], %o1
-	add	%sp, 23*4, %o2
+	ld	[%sp+168], %o1
+	add	%sp, 172, %o2
 
   /* Load the addresses of the user entry points.  */
 #ifndef PIC
@@ -73,6 +74,10 @@ _start:
      be NULL.  */
 	mov	%g1, %o5
 
+  /* Provide the highest stack address to update the __libc_stack_end (used
+     to enable executable stacks if required).  */
+	st	%sp, [%sp+23*4]
+
   /* Let libc do the rest of the initialization, and call main.  */
 	call	__libc_start_main
 	 nop
diff --git a/sysdeps/sparc/sparc64/start.S b/sysdeps/sparc/sparc64/start.S
index c9c25c2..08e1e77 100644
--- a/sysdeps/sparc/sparc64/start.S
+++ b/sysdeps/sparc/sparc64/start.S
@@ -74,6 +74,10 @@ _start:
      be NULL.  */
 	mov     %g1, %o5
 
+  /* Provide the highest stack address to update the __libc_stack_end (used
+     to enable executable stacks if required).  */
+	stx	%sp, [%sp+STACK_BIAS+22*8]
+
   /* Let libc do the rest of the initialization, and call main.  */
 	call    __libc_start_main
 	 nop
diff --git a/sysdeps/unix/bsd/tcsetattr.c b/sysdeps/unix/bsd/tcsetattr.c
index 38b5f71..8693d94 100644
--- a/sysdeps/unix/bsd/tcsetattr.c
+++ b/sysdeps/unix/bsd/tcsetattr.c
@@ -32,7 +32,7 @@
 
 /* Set the state of FD to *TERMIOS_P.  */
 int
-tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
+__tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
 {
   struct termios myt;
 
@@ -56,4 +56,6 @@ tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
       return __ioctl (fd, TIOCSETAF, termios_p);
     }
 }
-libc_hidden_def (tcsetattr)
+
+libc_hidden_def (__tcsetattr)
+weak_alias (__tcsetattr, tcsetattr)
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index ebcf820..c47cbdf 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -151,15 +151,6 @@ sysdep_headers += \
   bits/struct_stat.h \
   bits/struct_stat_time64_helper.h \
   bits/syscall.h \
-  bits/termios-baud.h \
-  bits/termios-c_cc.h \
-  bits/termios-c_cflag.h \
-  bits/termios-c_iflag.h \
-  bits/termios-c_lflag.h \
-  bits/termios-c_oflag.h \
-  bits/termios-misc.h \
-  bits/termios-struct.h \
-  bits/termios-tcflow.h \
   bits/timerfd.h \
   bits/types/struct_msqid64_ds.h \
   bits/types/struct_msqid64_ds_helper.h \
@@ -421,6 +412,24 @@ tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
 
 endif # $(subdir) == misc
 
+ifeq ($(subdir),termios)
+sysdep_headers += \
+  bits/termios-c_cc.h \
+  bits/termios-c_cflag.h \
+  bits/termios-c_iflag.h \
+  bits/termios-c_lflag.h \
+  bits/termios-c_oflag.h \
+  bits/termios-cbaud.h \
+  bits/termios-misc.h \
+  bits/termios-struct.h \
+  bits/termios-tcflow.h \
+  # sysdep_headers
+
+tests += \
+  tst-termios-linux \
+  # tests
+endif
+
 ifeq ($(subdir),time)
 sysdep_headers += \
   bits/timex.h \
@@ -603,6 +612,7 @@ endif
 ifeq ($(subdir),io)
 sysdep_routines += \
   close_nocancel \
+  close_nocancel_nostatus \
   fallocate \
   fallocate64 \
   fcntl_nocancel \
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 55d5655..585dec7 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -332,6 +332,13 @@ libc {
     sched_getattr;
     sched_setattr;
   }
+  GLIBC_2.42 {
+    cfgetospeed;
+    cfgetispeed;
+    cfsetospeed;
+    cfsetispeed;
+    cfsetspeed;
+  }
   GLIBC_PRIVATE {
     # functions used in other libraries
     __syscall_rt_sigqueueinfo;
@@ -339,6 +346,7 @@ libc {
     __read_nocancel;
     __pread64_nocancel;
     __close_nocancel;
+    __close_nocancel_nostatus;
     __sigtimedwait;
     # functions used by nscd
     __netlink_assert_response;
diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
index 89aced0..ba4a461 100644
--- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
@@ -175,6 +175,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index 6d63c8a..1acc82d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -23,6 +23,7 @@
 #include <sys/prctl.h>
 #include <sys/utsname.h>
 #include <dl-tunables-parse.h>
+#include <dl-symbol-redir-ifunc.h>
 
 #define DCZID_DZP_MASK (1 << 4)
 #define DCZID_BS_MASK (0xf)
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
index aa6bf48..a22e651 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
@@ -2752,6 +2752,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libm.abilist b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
index 4fdeb0d..bb8114b 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
@@ -1245,6 +1245,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1261,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index a56ce7f..f7f72b6 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -148,3 +148,23 @@ GLIBC_2.41 _ZGVsMxv_sinpi F
 GLIBC_2.41 _ZGVsMxv_sinpif F
 GLIBC_2.41 _ZGVsMxv_tanpi F
 GLIBC_2.41 _ZGVsMxv_tanpif F
+GLIBC_2.42 _ZGVnN2v_acospi F
+GLIBC_2.42 _ZGVnN2v_acospif F
+GLIBC_2.42 _ZGVnN2v_asinpi F
+GLIBC_2.42 _ZGVnN2v_asinpif F
+GLIBC_2.42 _ZGVnN2v_atanpi F
+GLIBC_2.42 _ZGVnN2v_atanpif F
+GLIBC_2.42 _ZGVnN2vv_atan2pi F
+GLIBC_2.42 _ZGVnN2vv_atan2pif F
+GLIBC_2.42 _ZGVnN4v_acospif F
+GLIBC_2.42 _ZGVnN4v_asinpif F
+GLIBC_2.42 _ZGVnN4v_atanpif F
+GLIBC_2.42 _ZGVnN4vv_atan2pif F
+GLIBC_2.42 _ZGVsMxv_acospi F
+GLIBC_2.42 _ZGVsMxv_acospif F
+GLIBC_2.42 _ZGVsMxv_asinpi F
+GLIBC_2.42 _ZGVsMxv_asinpif F
+GLIBC_2.42 _ZGVsMxv_atanpi F
+GLIBC_2.42 _ZGVsMxv_atanpif F
+GLIBC_2.42 _ZGVsMxvv_atan2pi F
+GLIBC_2.42 _ZGVsMxvv_atan2pif F
diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
index a2eab9e..4485723 100644
--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
@@ -36,9 +36,7 @@ static struct _aarch64_ctx *extension (void *p)
 static void *
 alloc_makecontext_gcs (size_t stack_size)
 {
-  void *base;
-  size_t size;
-  void *gcsp = __alloc_gcs (stack_size, &base, &size);
+  void *gcsp = __alloc_gcs (stack_size, NULL);
   if (gcsp == NULL)
     /* ENOSYS, bad size or OOM.  */
     abort ();
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index 022a263..d9716f0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -48,25 +48,16 @@ ENTRY (__setcontext)
 	cbz	x0, 1f
 	b	C_SYMBOL_NAME (__syscall_error)
 1:
-	/* Disable ZA of SME.  */
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-#endif
-	stp	x29, x30, [sp, -16]!
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
+	/* Clear ZA state of SME.  */
+	/* The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
 	bl	__libc_arm_za_disable
-	ldp	x29, x30, [sp], 16
-	cfi_adjust_cfa_offset (-16)
-	cfi_restore (x29)
-	cfi_restore (x30)
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-#endif
+	mov	x30, x13
+	cfi_register (x13, x30)
 	/* Restore the general purpose registers.  */
 	mov	x0, x9
 	cfi_def_cfa (x0, 0)
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index cc41253..58ddb95 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -119,7 +119,7 @@ L(gcs_done):
 2:
 	/* The oucp context is restored here via an indirect branch,
 	   x1 must be restored too which has the real return address.  */
-	BTI_J
+	bti	j
 	mov	x30, x1
 	RET
 PSEUDO_END (__swapcontext)
diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
index 455da93..840d6fe 100644
--- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
@@ -209,6 +209,7 @@
 #define __NR_open 45
 #define __NR_open_by_handle_at 498
 #define __NR_open_tree 538
+#define __NR_open_tree_attr 577
 #define __NR_openat 450
 #define __NR_openat2 547
 #define __NR_osf_adjtime 140
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
index 1f9f7f2..d830884 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
@@ -36,4 +36,6 @@
 
 #ifdef __USE_MISC
 # define ADDRB 04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h
index 324d5d8..69421f6 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h
@@ -17,30 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
-# define CBAUD	0000037
-# define CBAUDEX 0000000
-# define CMSPAR	  010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
+# define CBAUD	    000000037
+# define CBAUDEX    000000000
+# define CIBAUD     007600000
+# define IBSHIFT    16
 #endif
 
-#define  B57600   00020
-#define  B115200  00021
-#define  B230400  00022
-#define  B460800  00023
-#define  B500000  00024
-#define  B576000  00025
-#define  B921600  00026
-#define  B1000000 00027
-#define  B1152000 00030
-#define  B1500000 00031
-#define  B2000000 00032
-#define  B2500000 00033
-#define  B3000000 00034
-#define  B3500000 00035
-#define  B4000000 00036
-
-#define __MAX_BAUD B4000000
+#define  __B57600   00020
+#define  __B115200  00021
+#define  __B230400  00022
+#define  __B460800  00023
+#define  __B500000  00024
+#define  __B576000  00025
+#define  __B921600  00026
+#define  __B1000000 00027
+#define  __B1152000 00030
+#define  __B1500000 00031
+#define  __B2000000 00032
+#define  __B2500000 00033
+#define  __B3000000 00034
+#define  __B3500000 00035
+#define  __B4000000 00036
+#define  __BOTHER   00037
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
index de4d5fc..f50e9ef 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
@@ -30,8 +30,15 @@ struct termios
     tcflag_t c_lflag;		/* local mode flags */
     cc_t c_cc[NCCS];		/* control characters */
     cc_t c_line;		/* line discipline (== c_cc[33]) */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
+    /* Input and output baud rates.  */
+    __extension__ union {
+      speed_t __ispeed;
+      speed_t c_ispeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1
+    __extension__ union {
+      speed_t __ospeed;
+      speed_t c_ospeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1
   };
diff --git a/sysdeps/unix/sysv/linux/alpha/kernel-features.h b/sysdeps/unix/sysv/linux/alpha/kernel-features.h
index 6eae48f..83fdf91 100644
--- a/sysdeps/unix/sysv/linux/alpha/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/alpha/kernel-features.h
@@ -54,4 +54,15 @@
 #undef __ASSUME_CLONE3
 #define __ASSUME_CLONE3 0
 
+/* Alpha did not provide BOTHER, CIBAUD or the termios2 ioctls until
+   kernel 4.20.  Even though struct __kernel_termios and struct
+   termios2 are the same on Alpha, Calling the legacy TCSETS* ioctls
+   with BOTHER set triggers a bug in these old kernels, so only use
+   the legacy TCSETS* ioctl numbers if neither BOTHER nor split speed is
+   needed; that way the code will fail gracefully. */
+#if __LINUX_KERNEL_VERSION < 0x041400
+# undef  __ASSUME_TERMIOS2
+# define __ASSUME_TERMIOS2 0
+#endif
+
 #endif /* _KERNEL_FEATURES_H */
diff --git a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h b/sysdeps/unix/sysv/linux/alpha/kernel_termios.h
deleted file mode 100644
index 6a777dd..0000000
--- a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
-
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-    cc_t c_line;		/* line discipline */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
-  };
-
-#define _HAVE_C_ISPEED 1
-#define _HAVE_C_OSPEED 1
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist
index d5df965..4b5736a 100644
--- a/sysdeps/unix/sysv/linux/alpha/libc.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist
@@ -3099,6 +3099,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/alpha/libm.abilist b/sysdeps/unix/sysv/linux/alpha/libm.abilist
index 06cec45..4b383b1 100644
--- a/sysdeps/unix/sysv/linux/alpha/libm.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libm.abilist
@@ -1404,6 +1404,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1420,6 +1428,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/alpha/termios_arch.h b/sysdeps/unix/sysv/linux/alpha/termios_arch.h
new file mode 100644
index 0000000..20025f2
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/alpha/termios_arch.h
@@ -0,0 +1,26 @@
+/* Architectural parameters for Linux termios - Alpha/PowerPC version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef TERMIOS_INTERNALS_H
+# error "<termios_arch.h> should only be included from <termios_internals.h>"
+#endif
+
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1
+#define _HAVE_STRUCT_OLD_TERMIOS 0
diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
index 01075e8..2534f0f 100644
--- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
@@ -177,6 +177,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist
index c46c08d..b8a4478 100644
--- a/sysdeps/unix/sysv/linux/arc/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libc.abilist
@@ -2513,6 +2513,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arc/libm.abilist b/sysdeps/unix/sysv/linux/arc/libm.abilist
index ab3f09c..c865ec8 100644
--- a/sysdeps/unix/sysv/linux/arc/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libm.abilist
@@ -829,6 +829,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -841,6 +847,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
index 9704472..8e585a4 100644
--- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
@@ -223,6 +223,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 371
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 322
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
index 4df150c..959e446 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
@@ -2805,6 +2805,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arm/be/libm.abilist b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
index efa90f9..63bad09 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
index be29478..a930d1a 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
@@ -2802,6 +2802,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libm.abilist b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
index efa90f9..63bad09 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/bits/ioctls.h b/sysdeps/unix/sysv/linux/bits/ioctls.h
index 7e226e4..1ddcd4f 100644
--- a/sysdeps/unix/sysv/linux/bits/ioctls.h
+++ b/sysdeps/unix/sysv/linux/bits/ioctls.h
@@ -22,87 +22,4 @@
 /* Use the definitions from the kernel header files.  */
 #include <asm/ioctls.h>
 
-/* Routing table calls.  */
-#define SIOCADDRT	0x890B		/* add routing table entry	*/
-#define SIOCDELRT	0x890C		/* delete routing table entry	*/
-#define SIOCRTMSG	0x890D		/* call to routing system	*/
-
-/* Socket configuration controls. */
-#define SIOCGIFNAME	0x8910		/* get iface name		*/
-#define SIOCSIFLINK	0x8911		/* set iface channel		*/
-#define SIOCGIFCONF	0x8912		/* get iface list		*/
-#define SIOCGIFFLAGS	0x8913		/* get flags			*/
-#define SIOCSIFFLAGS	0x8914		/* set flags			*/
-#define SIOCGIFADDR	0x8915		/* get PA address		*/
-#define SIOCSIFADDR	0x8916		/* set PA address		*/
-#define SIOCGIFDSTADDR	0x8917		/* get remote PA address	*/
-#define SIOCSIFDSTADDR	0x8918		/* set remote PA address	*/
-#define SIOCGIFBRDADDR	0x8919		/* get broadcast PA address	*/
-#define SIOCSIFBRDADDR	0x891a		/* set broadcast PA address	*/
-#define SIOCGIFNETMASK	0x891b		/* get network PA mask		*/
-#define SIOCSIFNETMASK	0x891c		/* set network PA mask		*/
-#define SIOCGIFMETRIC	0x891d		/* get metric			*/
-#define SIOCSIFMETRIC	0x891e		/* set metric			*/
-#define SIOCGIFMEM	0x891f		/* get memory address (BSD)	*/
-#define SIOCSIFMEM	0x8920		/* set memory address (BSD)	*/
-#define SIOCGIFMTU	0x8921		/* get MTU size			*/
-#define SIOCSIFMTU	0x8922		/* set MTU size			*/
-#define SIOCSIFNAME	0x8923		/* set interface name		*/
-#define	SIOCSIFHWADDR	0x8924		/* set hardware address 	*/
-#define SIOCGIFENCAP	0x8925		/* get/set encapsulations       */
-#define SIOCSIFENCAP	0x8926
-#define SIOCGIFHWADDR	0x8927		/* Get hardware address		*/
-#define SIOCGIFSLAVE	0x8929		/* Driver slaving support	*/
-#define SIOCSIFSLAVE	0x8930
-#define SIOCADDMULTI	0x8931		/* Multicast address lists	*/
-#define SIOCDELMULTI	0x8932
-#define SIOCGIFINDEX	0x8933		/* name -> if_index mapping	*/
-#define SIOGIFINDEX	SIOCGIFINDEX	/* misprint compatibility :-)	*/
-#define SIOCSIFPFLAGS	0x8934		/* set/get extended flags set	*/
-#define SIOCGIFPFLAGS	0x8935
-#define SIOCDIFADDR	0x8936		/* delete PA address		*/
-#define	SIOCSIFHWBROADCAST	0x8937	/* set hardware broadcast addr	*/
-#define SIOCGIFCOUNT	0x8938		/* get number of devices */
-
-#define SIOCGIFBR	0x8940		/* Bridging support		*/
-#define SIOCSIFBR	0x8941		/* Set bridging options 	*/
-
-#define SIOCGIFTXQLEN	0x8942		/* Get the tx queue length	*/
-#define SIOCSIFTXQLEN	0x8943		/* Set the tx queue length 	*/
-
-
-/* ARP cache control calls. */
-		    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
-#define SIOCDARP	0x8953		/* delete ARP table entry	*/
-#define SIOCGARP	0x8954		/* get ARP table entry		*/
-#define SIOCSARP	0x8955		/* set ARP table entry		*/
-
-/* RARP cache control calls. */
-#define SIOCDRARP	0x8960		/* delete RARP table entry	*/
-#define SIOCGRARP	0x8961		/* get RARP table entry		*/
-#define SIOCSRARP	0x8962		/* set RARP table entry		*/
-
-/* Driver configuration calls */
-
-#define SIOCGIFMAP	0x8970		/* Get device parameters	*/
-#define SIOCSIFMAP	0x8971		/* Set device parameters	*/
-
-/* DLCI configuration calls */
-
-#define SIOCADDDLCI	0x8980		/* Create new DLCI device	*/
-#define SIOCDELDLCI	0x8981		/* Delete DLCI device		*/
-
-/* Device private ioctl calls.  */
-
-/* These 16 ioctls are available to devices via the do_ioctl() device
-   vector.  Each device should include this file and redefine these
-   names as their own. Because these are device dependent it is a good
-   idea _NOT_ to issue them to random objects and hope.  */
-
-#define SIOCDEVPRIVATE 		0x89F0	/* to 89FF */
-
-/*
- *	These 16 ioctl calls are protocol private
- */
-
-#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#include <linux/sockios.h>
diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h
index 3159097..0be4b47 100644
--- a/sysdeps/unix/sysv/linux/bits/mman-shared.h
+++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h
@@ -43,10 +43,9 @@
 # endif
 
 /* Access restrictions for pkey_alloc.  */
-# ifndef PKEY_DISABLE_ACCESS
-#  define PKEY_DISABLE_ACCESS 0x1
-#  define PKEY_DISABLE_WRITE 0x2
-# endif
+# define PKEY_UNRESTRICTED 0x0
+# define PKEY_DISABLE_ACCESS 0x1
+# define PKEY_DISABLE_WRITE 0x2
 
 __BEGIN_DECLS
 
diff --git a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
index bbbb621..befd25a 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
@@ -34,5 +34,7 @@
 #define CLOCAL	0004000
 
 #ifdef __USE_MISC
-# define ADDRB 04000000000
+# define ADDRB    04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/bits/termios-baud.h b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h
index e63a3eb..b9aadff 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h
@@ -17,32 +17,31 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
 # define CBAUD	 000000010017 /* Baud speed mask (not in POSIX).  */
 # define CBAUDEX 000000010000 /* Extra baud speed mask, included in CBAUD.
 				 (not in POSIX).  */
-# define CIBAUD	 002003600000 /* Input baud rate (not used).  */
-# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
-# define CRTSCTS 020000000000 /* Flow control.  */
+# define CIBAUD	 002003600000 /* Input baud rate.  */
+# define IBSHIFT 16
 #endif
 
 /* Extra output baud rates (not in POSIX).  */
-#define  B57600    0010001
-#define  B115200   0010002
-#define  B230400   0010003
-#define  B460800   0010004
-#define  B500000   0010005
-#define  B576000   0010006
-#define  B921600   0010007
-#define  B1000000  0010010
-#define  B1152000  0010011
-#define  B1500000  0010012
-#define  B2000000  0010013
-#define  B2500000  0010014
-#define  B3000000  0010015
-#define  B3500000  0010016
-#define  B4000000  0010017
-#define __MAX_BAUD B4000000
+#define  __BOTHER    0010000
+#define  __B57600    0010001
+#define  __B115200   0010002
+#define  __B230400   0010003
+#define  __B460800   0010004
+#define  __B500000   0010005
+#define  __B576000   0010006
+#define  __B921600   0010007
+#define  __B1000000  0010010
+#define  __B1152000  0010011
+#define  __B1500000  0010012
+#define  __B2000000  0010013
+#define  __B2500000  0010014
+#define  __B3000000  0010015
+#define  __B3500000  0010016
+#define  __B4000000  0010017
diff --git a/sysdeps/unix/sysv/linux/bits/termios-struct.h b/sysdeps/unix/sysv/linux/bits/termios-struct.h
index 4c501a5..0aba1a4 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-struct.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-struct.h
@@ -29,8 +29,15 @@ struct termios
     tcflag_t c_lflag;		/* local mode flags */
     cc_t c_line;			/* line discipline */
     cc_t c_cc[NCCS];		/* control characters */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
+    /* Input and output baud rates.  */
+    __extension__ union {
+      speed_t __ispeed;
+      speed_t c_ispeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1
+    __extension__ union {
+      speed_t __ospeed;
+      speed_t c_ospeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1
   };
diff --git a/sysdeps/unix/sysv/linux/bits/termios.h b/sysdeps/unix/sysv/linux/bits/termios.h
index 3bd1e22..20746a0 100644
--- a/sysdeps/unix/sysv/linux/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/bits/termios.h
@@ -24,35 +24,41 @@ typedef unsigned char	cc_t;
 typedef unsigned int	speed_t;
 typedef unsigned int	tcflag_t;
 
-#include <bits/termios-struct.h>
+#ifdef _TERMIOS_H
+# include <bits/termios-struct.h>
+#endif
+
 #include <bits/termios-c_cc.h>
 #include <bits/termios-c_iflag.h>
 #include <bits/termios-c_oflag.h>
 
 /* c_cflag bit meaning */
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
+#include <bits/termios-c_cflag.h>
+
 #ifdef __USE_MISC
-# define EXTA B19200
-# define EXTB B38400
+#define __B0	 0000000	/* hang up */
+#define __B50	 0000001
+#define __B75	 0000002
+#define __B110	 0000003
+#define __B134	 0000004
+#define __B150	 0000005
+#define __B200	 0000006
+#define __B300	 0000007
+#define __B600	 0000010
+#define __B1200	 0000011
+#define __B1800	 0000012
+#define __B2400	 0000013
+#define __B4800	 0000014
+#define __B9600  0000015
+#define __B19200 0000016
+#define __B38400 0000017
+#include <bits/termios-cbaud.h>
+
+# define __EXTA	 __B19200
+# define __EXTB	 __B38400
+# define BOTHER  __BOTHER
 #endif
-#include <bits/termios-baud.h>
 
-#include <bits/termios-c_cflag.h>
 #include <bits/termios-c_lflag.h>
 
 #ifdef __USE_MISC
@@ -74,3 +80,5 @@ typedef unsigned int	tcflag_t;
 #include <bits/termios-tcflow.h>
 
 #include <bits/termios-misc.h>
+
+#include <bits/termios-baud.h>
diff --git a/sysdeps/unix/sysv/linux/cfsetspeed.c b/sysdeps/unix/sysv/linux/cfsetspeed.c
new file mode 100644
index 0000000..8ce46f8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/cfsetspeed.c
@@ -0,0 +1,59 @@
+/* cfsetspeed(), Linux version.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <termios_internals.h>
+
+/* Set both the input and output baud rates stored in *TERMIOS_P to SPEED.  */
+int
+__cfsetspeed (struct termios *termios_p, speed_t speed)
+{
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
+
+  termios_p->c_ospeed = speed;
+  termios_p->c_ispeed = speed;
+  termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  termios_p->c_cflag |= cbaud | (cbaud << IBSHIFT);
+
+  return 0;
+}
+libc_hidden_def (__cfsetspeed)
+versioned_symbol (libc, __cfsetspeed, cfsetspeed, GLIBC_2_42);
+
+#if _TERMIOS_OLD_COMPAT
+
+int
+attribute_compat_text_section
+__old_cfsetspeed (old_termios_t *termios_p, speed_t speed)
+{
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise these fields don't exist in old_termios_t */
+  termios_p->c_ospeed = real_speed;
+  termios_p->c_ispeed = real_speed;
+#endif
+  termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  termios_p->c_cflag |= speed | (speed << IBSHIFT);
+
+  return 0;
+}
+compat_symbol (libc, __old_cfsetspeed, cfsetspeed, GLIBC_2_0);
+
+#endif /* _TERMIOS_OLD_COMPAT */
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c
index d425680..b1df5ed 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c
+++ b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c
@@ -1,5 +1,5 @@
-/* Optimized strcasecmp_l implementation for POWER7.
-   Copyright (C) 2013-2025 Free Software Foundation, Inc.
+/* Linux close syscall implementation -- non-cancellable, no errno update.
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <string.h>
+#include <unistd.h>
+#include <sysdep-cancel.h>
+#include <not-cancel.h>
 
-#define __strncasecmp_l __strncasecmp_l_power7
-#define USE_IN_EXTENDED_LOCALE_MODEL    1
-
-extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden;
-
-#include <string/strncase.c>
+void
+__close_nocancel_nostatus (int fd)
+{
+  INTERNAL_SYSCALL_CALL (close, fd);
+}
+libc_hidden_def (__close_nocancel_nostatus)
diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
index a719a55..73fdba1 100644
--- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
@@ -184,6 +184,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist
index f123757..6325fc1 100644
--- a/sysdeps/unix/sysv/linux/csky/libc.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libc.abilist
@@ -2789,6 +2789,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/csky/libm.abilist b/sysdeps/unix/sysv/linux/csky/libm.abilist
index 8ae4be4..4ed463c 100644
--- a/sysdeps/unix/sysv/linux/csky/libm.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libm.abilist
@@ -895,6 +895,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -907,6 +913,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
index dc592c5..d8ffab9 100644
--- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
@@ -214,6 +214,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 326
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 275
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist
index 2dc85b9..86b3fbd 100644
--- a/sysdeps/unix/sysv/linux/hppa/libc.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist
@@ -2826,6 +2826,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/hppa/libm.abilist b/sysdeps/unix/sysv/linux/hppa/libm.abilist
index 5797cf4..d681d6e 100644
--- a/sysdeps/unix/sysv/linux/hppa/libm.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
index c10897f..196dfec 100644
--- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
@@ -245,6 +245,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 342
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist
index 1e38217..6555592 100644
--- a/sysdeps/unix/sysv/linux/i386/libc.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libc.abilist
@@ -3009,6 +3009,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
index aa00f7e..de77b0f 100644
--- a/sysdeps/unix/sysv/linux/i386/libm.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
@@ -1284,6 +1284,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1300,6 +1308,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/if_index.c b/sysdeps/unix/sysv/linux/if_index.c
index 0b01fd1..5d13759 100644
--- a/sysdeps/unix/sysv/linux/if_index.c
+++ b/sysdeps/unix/sysv/linux/if_index.c
@@ -32,35 +32,23 @@
 unsigned int
 __if_nametoindex (const char *ifname)
 {
-#ifndef SIOCGIFINDEX
-  __set_errno (ENOSYS);
-  return 0;
-#else
-  struct ifreq ifr;
   if (strlen (ifname) >= IFNAMSIZ)
     {
       __set_errno (ENODEV);
       return 0;
     }
 
-  strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name));
-
   int fd = __opensock ();
-
   if (fd < 0)
     return 0;
 
-  if (__ioctl (fd, SIOCGIFINDEX, &ifr) < 0)
-    {
-      int saved_errno = errno;
-      __close_nocancel_nostatus (fd);
-      if (saved_errno == EINVAL)
-	__set_errno (ENOSYS);
-      return 0;
-    }
+  struct ifreq ifr;
+  strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name));
+
+  int status = __ioctl (fd, SIOCGIFINDEX, &ifr);
   __close_nocancel_nostatus (fd);
-  return ifr.ifr_ifindex;
-#endif
+
+  return status < 0 ? 0 : ifr.ifr_ifindex;
 }
 libc_hidden_def (__if_nametoindex)
 weak_alias (__if_nametoindex, if_nametoindex)
@@ -83,8 +71,8 @@ weak_alias (__if_freenameindex, if_freenameindex)
 libc_hidden_weak (if_freenameindex)
 
 
-static struct if_nameindex *
-if_nameindex_netlink (void)
+struct if_nameindex *
+__if_nameindex (void)
 {
   struct netlink_handle nh = { 0, 0, 0, NULL, NULL };
   struct if_nameindex *idx = NULL;
@@ -196,19 +184,6 @@ if_nameindex_netlink (void)
 
   return idx;
 }
-
-
-struct if_nameindex *
-__if_nameindex (void)
-{
-#ifndef SIOCGIFINDEX
-  __set_errno (ENOSYS);
-  return NULL;
-#else
-  struct if_nameindex *result = if_nameindex_netlink ();
-  return result;
-#endif
-}
 weak_alias (__if_nameindex, if_nameindex)
 libc_hidden_weak (if_nameindex)
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/unix/sysv/linux/isatty.c
index 7b45fcd..3faaec5 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
+++ b/sysdeps/unix/sysv/linux/isatty.c
@@ -1,5 +1,5 @@
-/* Optimized strcmp implementation for POWER10/PPC64.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
+/* Test whether a file descriptor refers to a terminal.  Linux version.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +16,14 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define STRCMP __strcmp_power10
+#include <termios_internals.h>
 
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S>
-#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */
+/* Return 1 if FD is a terminal, 0 if not. This simply does a
+   TCGETS2 ioctl into a dummy buffer without parsing the result. */
+int
+__isatty (int fd)
+{
+  struct termios2 k_termios;
+  return INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0;
+}
+weak_alias (__isatty, isatty)
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c b/sysdeps/unix/sysv/linux/isatty_nostatus.c
index 5b45ed5..406decb 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c
+++ b/sysdeps/unix/sysv/linux/isatty_nostatus.c
@@ -1,5 +1,4 @@
-/* Optimized strcasecmp_l implementation for POWER7.
-   Copyright (C) 2013-2025 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +15,12 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <termios_internals.h>
 
-#include <string.h>
-
-#define __strncasecmp __strncasecmp_power7
-
-extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden;
-
-#include <string/strncase.c>
+/* Return 1 if FD is a terminal, 0 if not, without changing errno  */
+int
+__isatty_nostatus (int fd)
+{
+  struct termios2 k_termios;
+  return INTERNAL_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0;
+}
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index 86b2d3c..a49a915 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -54,6 +54,10 @@
    configurations).  */
 #define __ASSUME_SET_ROBUST_LIST	1
 
+/* The termios2 interface was introduced across all architectures except
+   Alpha in kernel 2.6.22. */
+#define __ASSUME_TERMIOS2	1
+
 /* Support for various CLOEXEC and NONBLOCK flags was added in
    2.6.27.  */
 #define __ASSUME_IN_NONBLOCK	1
diff --git a/sysdeps/unix/sysv/linux/kernel_termios.h b/sysdeps/unix/sysv/linux/kernel_termios.h
deleted file mode 100644
index f02a197..0000000
--- a/sysdeps/unix/sysv/linux/kernel_termios.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/libc_sigaction.c b/sysdeps/unix/sysv/linux/libc_sigaction.c
index bbfc177..67dbc04 100644
--- a/sysdeps/unix/sysv/linux/libc_sigaction.c
+++ b/sysdeps/unix/sysv/linux/libc_sigaction.c
@@ -49,7 +49,7 @@ __libc_sigaction (int sig, const struct sigaction *act, struct sigaction *oact)
     {
       kact.k_sa_handler = act->sa_handler;
       memcpy (&kact.sa_mask, &act->sa_mask, sizeof (sigset_t));
-      kact.sa_flags = act->sa_flags;
+      kact.sa_flags = (unsigned int) act->sa_flags;
       SET_SA_RESTORER (&kact, act);
     }
 
diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
index f123d84..f57a152 100644
--- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
@@ -171,6 +171,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
index 927fc21..a6cab96 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
@@ -2273,6 +2273,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
index 601ad1b..4b3ea80 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
@@ -1124,6 +1124,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1140,6 +1148,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
index 715809a..a95cb41 100644
--- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
@@ -234,6 +234,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 341
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
index 74da49d..7b7b72a 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
@@ -2785,6 +2785,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
index efa90f9..63bad09 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
index e5d6781..df398e4 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
@@ -2952,6 +2952,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
index 040303a..9dba60b 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
@@ -956,6 +956,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -968,6 +974,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
index 24e218f..fe08f5c 100644
--- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
@@ -244,6 +244,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 372
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
index 4dbd4b6..ca8df6f 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
@@ -2838,6 +2838,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
index ad55190..5596e08 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
index c5965bb..9508154 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
@@ -2835,6 +2835,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
index ad55190..5596e08 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/mips/Versions b/sysdeps/unix/sysv/linux/mips/Versions
index 9ea0fa6..48f0037 100644
--- a/sysdeps/unix/sysv/linux/mips/Versions
+++ b/sysdeps/unix/sysv/linux/mips/Versions
@@ -26,6 +26,10 @@ libc {
     pthread_attr_setstack;
     pthread_attr_setstacksize;
   }
+  GLIBC_2.42 {
+    tcgetattr;
+    tcsetattr;
+  }
   GLIBC_PRIVATE {
     # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime).
     __vdso_clock_gettime;
diff --git a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h b/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h
deleted file mode 100644
index ef69821..0000000
--- a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* struct termios definition.  Linux/mips version.
-   Copyright (C) 2019-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _TERMIOS_H
-# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead."
-#endif
-
-#define NCCS 32
-struct termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[NCCS];		/* control characters */
-#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0
-#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0
-  };
diff --git a/sysdeps/unix/sysv/linux/mips/kernel_termios.h b/sysdeps/unix/sysv/linux/mips/kernel_termios.h
deleted file mode 100644
index fd8d35a..0000000
--- a/sysdeps/unix/sysv/linux/mips/kernel_termios.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.24 kernel.  */
-
-#define __KERNEL_NCCS 23
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
index a7615cb..7d76d65 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
@@ -229,6 +229,7 @@
 #define __NR_open 4005
 #define __NR_open_by_handle_at 4340
 #define __NR_open_tree 4428
+#define __NR_open_tree_attr 4467
 #define __NR_openat 4288
 #define __NR_openat2 4437
 #define __NR_pause 4029
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
index 10715e0..4d51cc4 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
@@ -2913,7 +2913,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
index afe1c25..cdcc488 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
index 3d229b9..7f90fad 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
@@ -2911,7 +2911,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
index 1e1085d..888164b 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
@@ -1245,6 +1245,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1261,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
index 4d863c2..bca3ea6 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
@@ -212,6 +212,7 @@
 #define __NR_open 6002
 #define __NR_open_by_handle_at 6304
 #define __NR_open_tree 6428
+#define __NR_open_tree_attr 6467
 #define __NR_openat 6251
 #define __NR_openat2 6437
 #define __NR_pause 6033
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
index e4cb452..fc366d1 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
@@ -2919,7 +2919,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
index 9b6683e..5bcd929 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
@@ -201,6 +201,7 @@
 #define __NR_open 5002
 #define __NR_open_by_handle_at 5299
 #define __NR_open_tree 5428
+#define __NR_open_tree_attr 5467
 #define __NR_openat 5247
 #define __NR_openat2 5437
 #define __NR_pause 5033
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
index 8a32d25..debd5c3 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
@@ -2821,7 +2821,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/termios_arch.h b/sysdeps/unix/sysv/linux/mips/termios_arch.h
new file mode 100644
index 0000000..392d9aa
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/termios_arch.h
@@ -0,0 +1,34 @@
+/* Architectural parameters for Linux termios - MIPS version
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _TERMIOS2_NCCS 23
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
+
+#define _HAVE_STRUCT_OLD_TERMIOS 1
+
+#define OLD_NCCS 32
+struct old_termios
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[OLD_NCCS];		/* control characters */
+};
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index ece3297..5ac6dd4 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -53,6 +53,9 @@ __typeof (__write) __write_nocancel;
 /* Uncancelable close.  */
 __typeof (__close) __close_nocancel;
 
+/* Uncancellable close that does not also set errno in case of failure.  */
+void __close_nocancel_nostatus (int);
+
 /* Uncancelable fcntl.  */
 int __fcntl64_nocancel (int, int, ...);
 
@@ -65,17 +68,10 @@ hidden_proto (__read_nocancel)
 hidden_proto (__pread64_nocancel)
 hidden_proto (__write_nocancel)
 hidden_proto (__close_nocancel)
+hidden_proto (__close_nocancel_nostatus)
 hidden_proto (__fcntl64_nocancel)
 #endif
 
-/* Non cancellable close syscall that does not also set errno in case of
-   failure.  */
-static inline void
-__close_nocancel_nostatus (int fd)
-{
-  __close_nocancel (fd);
-}
-
 /* Non cancellable writev syscall that does not also set errno in case of
    failure.  */
 static inline void
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/unix/sysv/linux/old_termios.h
index 4387908..56d19ba 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
+++ b/sysdeps/unix/sysv/linux/old_termios.h
@@ -1,4 +1,6 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+/* old_termios.h for Linux other than MIPS and SPARC
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,11 +17,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define STRNCMP __strncmp_power10
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S>
-#endif
+/* By default, no old termios structure */
+#define _HAVE_STRUCT_OLD_TERMIOS 0
+#define OLD_NCCS NCCS
+typedef struct termios old_termios_t;
diff --git a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
index a071c76..c2a1d51 100644
--- a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
@@ -183,6 +183,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_or1k_atomic 244
diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist
index 64dac95..b62d59f 100644
--- a/sysdeps/unix/sysv/linux/or1k/libc.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist
@@ -2263,6 +2263,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/or1k/libm.abilist b/sysdeps/unix/sysv/linux/or1k/libm.abilist
index 80e4ba1..bef7a98 100644
--- a/sysdeps/unix/sysv/linux/or1k/libm.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libm.abilist
@@ -829,6 +829,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -841,6 +847,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/ioctls.h b/sysdeps/unix/sysv/linux/powerpc/bits/ioctls.h
new file mode 100644
index 0000000..e1921df
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/ioctls.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 1996-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_IOCTL_H
+# error "Never use <bits/ioctls.h> directly; include <sys/ioctl.h> instead."
+#endif
+
+/* Use the definitions from the kernel header files.  */
+#include <asm/ioctls.h>
+
+/* PowerPC quirk: on PowerPC only, ioctl() emulates the TCGETS/TCSETS*
+   ioctls with tcgetattr/tcsetattr using the glibc struct termios.
+   As struct termios2 is the same as the kernel struct termios on PowerPC,
+   simply consider the kernel ones as the termios2 interface, even
+   though the kernel doesn't call it that. */
+
+#define TCGETS2	 _IOR ('t', 19, struct termios2)
+#define TCSETS2	 _IOW ('t', 20, struct termios2)
+#define TCSETSW2 _IOW ('t', 21, struct termios2)
+#define TCSETSF2 _IOW ('t', 22, struct termios2)
+
+#include <linux/sockios.h>
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
index 9ea8cfb..a90d581 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
@@ -35,5 +35,7 @@
 #define CLOCAL	00100000
 
 #ifdef __USE_MISC
-# define ADDRB 04000000000
+# define ADDRB    04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h
index 374d9f8..7bcbba4 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h
@@ -17,29 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
-# define CBAUD	0000377
-# define CBAUDEX 0000020
-# define CMSPAR   010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
+# define CBAUD	    000000377
+# define CBAUDEX    000000020
+# define CIBAUD     077600000
+# define IBSHIFT    16
 #endif
 
-#define  B57600   00020
-#define  B115200  00021
-#define  B230400  00022
-#define  B460800  00023
-#define  B500000  00024
-#define  B576000  00025
-#define  B921600  00026
-#define  B1000000 00027
-#define  B1152000 00030
-#define  B1500000 00031
-#define  B2000000 00032
-#define  B2500000 00033
-#define  B3000000 00034
-#define  B3500000 00035
-#define  B4000000 00036
-#define __MAX_BAUD B4000000
+#define  __B57600   00020
+#define  __B115200  00021
+#define  __B230400  00022
+#define  __B460800  00023
+#define  __B500000  00024
+#define  __B576000  00025
+#define  __B921600  00026
+#define  __B1000000 00027
+#define  __B1152000 00030
+#define  __B1500000 00031
+#define  __B2000000 00032
+#define  __B2500000 00033
+#define  __B3000000 00034
+#define  __B3500000 00035
+#define  __B4000000 00036
+#define  __BOTHER   00037
diff --git a/sysdeps/unix/sysv/linux/powerpc/configure b/sysdeps/unix/sysv/linux/powerpc/configure
index 61ae675..ef2055d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/configure
+++ b/sysdeps/unix/sysv/linux/powerpc/configure
@@ -40,48 +40,7 @@ fi
 printf "%s\n" "$libc_cv_mlong_double_128ibm" >&6; }
 
 if test "$libc_cv_mlong_double_128ibm" = no; then
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC $CFLAGS supports -mabi=ibmlongdouble" >&5
-printf %s "checking whether $CC $CFLAGS supports -mabi=ibmlongdouble... " >&6; }
-if test ${libc_cv_mabi_ibmlongdouble+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble"
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <float.h>
-int
-main (void)
-{
-
-#if LDBL_MANT_DIG != 106
-# error "compiler doesn't implement IBM extended format of long double"
-#endif
-long double foobar (long double x) { return x; }
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"
-then :
-  libc_cv_mabi_ibmlongdouble=yes
-else case e in #(
-  e) libc_cv_mabi_ibmlongdouble=no ;;
-esac
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
-  CFLAGS="$save_CFLAGS" ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mabi_ibmlongdouble" >&5
-printf "%s\n" "$libc_cv_mabi_ibmlongdouble" >&6; }
-
-  if test "$libc_cv_mabi_ibmlongdouble" = yes; then
-    CFLAGS="$CFLAGS -mabi=ibmlongdouble"
-  else
-    as_fn_error $? "this configuration requires -mlong-double-128 IBM extended format support" "$LINENO" 5
-  fi
+  CFLAGS="$CFLAGS -mabi=ibmlongdouble"
 fi
 
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports --no-tls-get-addr-optimize" >&5
diff --git a/sysdeps/unix/sysv/linux/powerpc/configure.ac b/sysdeps/unix/sysv/linux/powerpc/configure.ac
index 8d2ec60..42347a6 100644
--- a/sysdeps/unix/sysv/linux/powerpc/configure.ac
+++ b/sysdeps/unix/sysv/linux/powerpc/configure.ac
@@ -16,24 +16,7 @@ long double foobar (long double x) { return x; }]])],
 CFLAGS="$save_CFLAGS"])
 
 if test "$libc_cv_mlong_double_128ibm" = no; then
-  AC_CACHE_CHECK(whether $CC $CFLAGS supports -mabi=ibmlongdouble,
-		 libc_cv_mabi_ibmlongdouble, [dnl
-  save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble"
-  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <float.h>]], [[
-#if LDBL_MANT_DIG != 106
-# error "compiler doesn't implement IBM extended format of long double"
-#endif
-long double foobar (long double x) { return x; }]])],
-		 libc_cv_mabi_ibmlongdouble=yes,
-		 libc_cv_mabi_ibmlongdouble=no)
-  CFLAGS="$save_CFLAGS"])
-
-  if test "$libc_cv_mabi_ibmlongdouble" = yes; then
-    CFLAGS="$CFLAGS -mabi=ibmlongdouble"
-  else
-    AC_MSG_ERROR([this configuration requires -mlong-double-128 IBM extended format support])
-  fi
+  CFLAGS="$CFLAGS -mabi=ibmlongdouble"
 fi
 
 LIBC_LINKER_FEATURE([--no-tls-get-addr-optimize], [-Wl,--no-tls-get-addr-optimize],
diff --git a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h b/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h
deleted file mode 100644
index f6ea570..0000000
--- a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-    cc_t c_line;		/* line discipline */
-    speed_t c_ispeed;           /* input speed */
-    speed_t c_ospeed;           /* output speed */
-  };
-
-#define _HAVE_C_ISPEED 1
-#define _HAVE_C_OSPEED 1
-
-/* We have the kernel termios structure, so we can presume this code knows
-   what it's doing...  */
-
-#undef  TCGETS
-#undef  TCSETS
-#undef  TCSETSW
-#undef  TCSETSF
-#define TCGETS	_IOR ('t', 19, struct __kernel_termios)
-#define TCSETS	_IOW ('t', 20, struct __kernel_termios)
-#define TCSETSW	_IOW ('t', 21, struct __kernel_termios)
-#define TCSETSF	_IOW ('t', 22, struct __kernel_termios)
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
index b3481e4..c371df8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
@@ -235,6 +235,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 346
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 286
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
index cc5e93c..883e66f 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
@@ -3142,6 +3142,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
index 4bb7707..7f584d3 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
@@ -1067,6 +1067,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -1079,6 +1085,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
index 9814997..84cd9e0 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
@@ -3187,6 +3187,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
index 99faf37..d1cd4b1 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
@@ -1066,6 +1066,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -1078,6 +1084,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
index 45108e8..df8844d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
@@ -220,6 +220,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 346
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 286
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
index 7f46295..8832568 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
@@ -2896,6 +2896,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
index a7059f8..bfc5310 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
@@ -1060,6 +1060,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -1072,6 +1078,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
index f24f81b..b6ff801 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
@@ -2972,6 +2972,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
index 5f5f543..dedfefc 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
@@ -1429,9 +1429,19 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 __compoundnieee128 F
 GLIBC_2.42 __pownieee128 F
 GLIBC_2.42 __powrieee128 F
+GLIBC_2.42 __rootnieee128 F
 GLIBC_2.42 __rsqrtieee128 F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1448,6 +1458,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/termios_arch.h b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h
new file mode 100644
index 0000000..20025f2
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h
@@ -0,0 +1,26 @@
+/* Architectural parameters for Linux termios - Alpha/PowerPC version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef TERMIOS_INTERNALS_H
+# error "<termios_arch.h> should only be included from <termios_internals.h>"
+#endif
+
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1
+#define _HAVE_STRUCT_OLD_TERMIOS 0
diff --git a/sysdeps/unix/sysv/linux/riscv/hwprobe.c b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
index e0cbd22..bc7f6f3 100644
--- a/sysdeps/unix/sysv/linux/riscv/hwprobe.c
+++ b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
@@ -23,13 +23,13 @@
 #include <sysdep-vdso.h>
 
 int __riscv_hwprobe (struct riscv_hwprobe *pairs, size_t pair_count,
-		     size_t cpu_count, unsigned long int *cpus,
+		     size_t cpusetsize, __RISCV_HWPROBE_CPUS_TYPE cpus,
 		     unsigned int flags)
 {
   int r;
 
   r = INTERNAL_VSYSCALL (riscv_hwprobe, 5, pairs, pair_count,
-                         cpu_count, cpus, flags);
+                         cpusetsize, cpus.__ul, flags);
 
   /* Negate negative errno values to match pthreads API. */
   return -r;
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
index 5333879..1bae763 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
@@ -168,6 +168,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
index 9330c7a..1771a23 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
@@ -2516,6 +2516,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
index 2fc2680..9342294 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
@@ -1124,6 +1124,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1140,6 +1148,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
index eed1dff..1a1ebf8 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
@@ -175,6 +175,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
index ea4555d..4b48352 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
@@ -2716,6 +2716,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
index a731d27..76e74c9 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
@@ -1221,6 +1221,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1237,6 +1245,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
index bebad6c..40415aa 100644
--- a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
+++ b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
@@ -21,6 +21,7 @@
 #define _SYS_HWPROBE_H 1
 
 #include <features.h>
+#include <sched.h>
 #include <stddef.h>
 #include <errno.h>
 #ifdef __has_include
@@ -63,22 +64,39 @@ struct riscv_hwprobe {
 
 __BEGIN_DECLS
 
-extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs, size_t __pair_count,
-			    size_t __cpu_count, unsigned long int *__cpus,
+#if defined __cplusplus || !__GNUC_PREREQ (2, 7)
+# define __RISCV_HWPROBE_CPUS_TYPE cpu_set_t *
+#else
+/* The fourth argument to __riscv_hwprobe should be a null pointer or a
+   pointer to a cpu_set_t (either the fixed-size type or allocated with
+   CPU_ALLOC).  However, early versions of this header file used the
+   argument type unsigned long int *.  The transparent union allows
+   the argument to be either cpu_set_t * or unsigned long int * for
+   compatibility.  The older header file requiring unsigned long int *
+   can be identified by the lack of the __RISCV_HWPROBE_CPUS_TYPE macro.
+   In C++ and with compilers that do not support transparent unions, the
+   argument type must be cpu_set_t *.  */
+typedef union {
+	cpu_set_t *__cs;
+	unsigned long int *__ul;
+} __RISCV_HWPROBE_CPUS_TYPE __attribute__ ((__transparent_union__));
+# define __RISCV_HWPROBE_CPUS_TYPE __RISCV_HWPROBE_CPUS_TYPE
+#endif
+
+extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs,
+			    size_t __pair_count, size_t __cpusetsize,
+			    __RISCV_HWPROBE_CPUS_TYPE __cpus,
 			    unsigned int __flags)
-     __nonnull ((1)) __wur
-     __fortified_attr_access (__read_write__, 1, 2)
-     __fortified_attr_access (__read_only__, 4, 3);
+     __THROW __nonnull ((1)) __attr_access ((__read_write__, 1, 2));
 
-/* A pointer to the __riscv_hwprobe vDSO function is passed as the second
+/* A pointer to the __riscv_hwprobe function is passed as the second
    argument to ifunc selector routines. Include a function pointer type for
    convenience in calling the function in those settings. */
-typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs, size_t __pair_count,
-				  size_t __cpu_count, unsigned long int *__cpus,
+typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs,
+				  size_t __pair_count, size_t __cpusetsize,
+				  __RISCV_HWPROBE_CPUS_TYPE __cpus,
 				  unsigned int __flags)
-     __nonnull ((1)) __wur
-     __fortified_attr_access (__read_write__, 1, 2)
-     __fortified_attr_access (__read_only__, 4, 3);
+     __nonnull ((1)) __attr_access ((__read_write__, 1, 2));
 
 /* Helper function usable from ifunc selectors that probes a single key. */
 static __inline int
diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h
index ee015df..05e0e05 100644
--- a/sysdeps/unix/sysv/linux/riscv/sysdep.h
+++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h
@@ -145,11 +145,12 @@
 #  define HAVE_CLOCK_GETRES64_VSYSCALL	"__vdso_clock_getres"
 #  define HAVE_CLOCK_GETTIME64_VSYSCALL	"__vdso_clock_gettime"
 #  define HAVE_GETTIMEOFDAY_VSYSCALL	"__vdso_gettimeofday"
+#  define HAVE_GETRANDOM_VSYSCALL	"__vdso_getrandom"
 # else
 #  define VDSO_NAME	"LINUX_5.4"
 #  define VDSO_HASH	61765876
 
-/* RV32 does not support the gettime VDSO syscalls.  */
+/* RV32 does not support the gettime and getrandom VDSO syscalls.  */
 # endif
 # define HAVE_CLONE3_WRAPPER		1
 
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
index 0bf8f95..f77f39f 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
@@ -232,6 +232,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 336
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
index 3e625fa..f0decc7 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
@@ -3140,6 +3140,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
index ec5dd91..be2d177 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
@@ -1348,6 +1348,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1364,6 +1372,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
index 061f8db..65d6644 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
@@ -204,6 +204,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 336
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
index 46b4a04..da8a2bf 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
@@ -2933,6 +2933,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
index c755532..7d7ba26 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
@@ -1348,6 +1348,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1364,6 +1372,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/s390/sysconf.c b/sysdeps/unix/sysv/linux/s390/sysconf.c
index 8386523..dcb335a 100644
--- a/sysdeps/unix/sysv/linux/s390/sysconf.c
+++ b/sysdeps/unix/sysv/linux/s390/sysconf.c
@@ -66,7 +66,7 @@ get_cache_info (int level, int attr, int type)
 	return 0L;
     }
 
-  if (!S390_IS_Z10 (features->stfle_bits))
+  if (!S390_IS_Z10 (features->stfle_orig))
     {
       /* We are at least on a z9 machine.
 	 Return 256byte for LINESIZE for L1 d/i-cache,
diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
index 52cc320..5948ab0 100644
--- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
@@ -228,6 +228,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 360
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
index 36a94c9..fb30341 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
@@ -2832,6 +2832,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/sh/be/libm.abilist b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
index 799996c..5b0b080 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
index f79aba6..d716673 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
@@ -2829,6 +2829,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libm.abilist b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
index 799996c..5b0b080 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
@@ -920,6 +920,12 @@ GLIBC_2.41 tanpif32 F
 GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf32 F
@@ -932,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/sparc/Versions b/sysdeps/unix/sysv/linux/sparc/Versions
index f127bdf..7dd61a5 100644
--- a/sysdeps/unix/sysv/linux/sparc/Versions
+++ b/sysdeps/unix/sysv/linux/sparc/Versions
@@ -29,6 +29,10 @@ libc {
 
     __getshmlba;
   }
+  GLIBC_2.42 {
+    tcgetattr;
+    tcsetattr;
+  }
   GLIBC_PRIVATE {
     # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime).
     __vdso_clock_gettime;
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h
index 677db7b..34eba18 100644
--- a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h
@@ -17,30 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
 # define CBAUD   0x0000100f
 # define CBAUDEX 0x00001000
-# define CIBAUD	 0x100f0000	/* input baud rate (not used) */
-# define CMSPAR  0x40000000	/* mark or space (stick) parity */
-# define CRTSCTS 0x80000000	/* flow control */
+# define CIBAUD	 0x100f0000	/* input baud rate */
+# define IBSHIFT 16
 #endif
 
-#define  B57600  0x00001001
-#define  B115200 0x00001002
-#define  B230400 0x00001003
-#define  B460800 0x00001004
-#define  B76800  0x00001005
-#define  B153600 0x00001006
-#define  B307200 0x00001007
-#define  B614400 0x00001008
-#define  B921600 0x00001009
-#define  B500000 0x0000100a
-#define  B576000 0x0000100b
-#define B1000000 0x0000100c
-#define B1152000 0x0000100d
-#define B1500000 0x0000100e
-#define B2000000 0x0000100f
-#define __MAX_BAUD B2000000
+#define  __B57600  0x00001001
+#define  __B115200 0x00001002
+#define  __B230400 0x00001003
+#define  __B460800 0x00001004
+#define  __B76800  0x00001005
+#define  __B153600 0x00001006
+#define  __B307200 0x00001007
+#define  __B614400 0x00001008
+#define  __B921600 0x00001009
+#define  __B500000 0x0000100a
+#define  __B576000 0x0000100b
+#define __B1000000 0x0000100c
+#define __B1152000 0x0000100d
+#define __B1500000 0x0000100e
+#define __B2000000 0x0000100f
+#define __BOTHER   0x00001000
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h
deleted file mode 100644
index 269ca9d..0000000
--- a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* struct termios definition.  Linux/sparc version.
-   Copyright (C) 2019-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _TERMIOS_H
-# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead."
-#endif
-
-#define NCCS 17
-struct termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[NCCS];		/* control characters */
-#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0
-#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0
-  };
diff --git a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h b/sysdeps/unix/sysv/linux/sparc/kernel_termios.h
deleted file mode 100644
index 401079c..0000000
--- a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
-
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
-
-#define __KERNEL_NCCS 17
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
index ee870bc..85828a8 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
@@ -230,6 +230,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 333
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 284
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
index 4a6acc0..6deedf2 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
@@ -3161,7 +3161,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
index f33a969..8107101 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
@@ -1355,6 +1355,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1371,6 +1379,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
index 3acbebe..d83ecd1 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
@@ -211,6 +211,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 333
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 284
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
index 931109d..1ce22bf 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
@@ -2797,7 +2797,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
index 609fdb5..418ed9d 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
@@ -1245,6 +1245,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1261,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/sparc/termios_arch.h b/sysdeps/unix/sysv/linux/sparc/termios_arch.h
new file mode 100644
index 0000000..f3b3f65
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sparc/termios_arch.h
@@ -0,0 +1,34 @@
+/* Architectural parameters for Linux termios - SPARC version
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
+
+#define _HAVE_STRUCT_OLD_TERMIOS 1
+
+#define OLD_NCCS 17
+struct old_termios
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[OLD_NCCS];		/* control characters */
+};
diff --git a/sysdeps/unix/sysv/linux/speed.c b/sysdeps/unix/sysv/linux/speed.c
index 017f741..4efb0de 100644
--- a/sysdeps/unix/sysv/linux/speed.c
+++ b/sysdeps/unix/sysv/linux/speed.c
@@ -16,82 +16,351 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <stddef.h>
-#include <errno.h>
-#include <termios.h>
-#include <sysdep.h>
+#include <termios_internals.h>
 
-/* This is a gross hack around a kernel bug.  If the cfsetispeed functions
-   is called with the SPEED argument set to zero this means use the same
-   speed as for output.  But we don't have independent input and output
-   speeds and therefore cannot record this.
+/* Conversions between legacy c_cflag fields and actual baud rates */
 
-   We use an unused bit in the `c_iflag' field to keep track of this
-   use of `cfsetispeed'.  The value here must correspond to the one used
-   in `tcsetattr.c'.  */
-#define IBAUD0	020000000000
+/* These expressions may seem complicated; the _cbix() macro
+   compresses the CBAUD field into an index in the range 0-31. On most
+   Linux platforms, the CBAUD field is 5 bits, but the topmost bit
+   indicated by CBAUDEX, is discontinous with the rest.
+
+   The resulting masks look like:
+
+		Alpha		PowerPC		others
+
+   CBAUD	0x001f		0x00ff		0x100f
+   CBAUDEX	0x0000		0x0010		0x1000
+
+   LOWCBAUD	0x001f		0x000f		0x000f
+   CBAUDMASK	0x001f		0x001f		0x100f
+
+   CBAUDMASK is used to test for invalid values passed to the
+   compatibility functions or in termios::c_cflag on PowerPC.
+
+   The divide-multiply sequence in the _cbix() macro gets converted
+   to shift and masks as necessary by the compiler. */
+
+#define LOWCBAUD (CBAUD & (CBAUDEX-1))
+#define _cbix(x) (((x) & LOWCBAUD) | \
+		  (CBAUDEX ? ((x) & CBAUDEX)/CBAUDEX * (LOWCBAUD+1) : 0))
+#define CBAUDMASK (LOWCBAUD | CBAUDEX)
+
+/* Compile time sanity checks for broken CBAUD or CIBAUD definitions */
+#if CIBAUD != (CBAUD << IBSHIFT)
+# error "CIBAUD should == CBAUD << IBSHIFT"
+#elif CBAUDEX & (CBAUDEX-1)
+# error "CBAUDEX should either be 0 or a single bit"
+#elif !(CBAUD & 1)
+# error "The CBAUD field should start at bit 0"
+#elif CBAUDEX & ~CBAUD
+# error "CBAUD should include the CBAUDEX bit"
+#endif
+
+speed_t
+___cbaud_to_speed (tcflag_t c_cflag, speed_t other)
+{
+  static const speed_t cbaudix_to_speed [] =
+    {
+      [0 ... _cbix(CBAUDMASK)] = -1,
+      [_cbix(__B0)] = 0,
+      [_cbix(__B50)] = 50,
+      [_cbix(__B75)] = 75,
+      [_cbix(__B110)] = 110,
+      [_cbix(__B134)] = 134,
+      [_cbix(__B150)] = 150,
+      [_cbix(__B200)] = 200,
+      [_cbix(__B300)] = 300,
+      [_cbix(__B600)] = 600,
+      [_cbix(__B1200)] = 1200,
+      [_cbix(__B1800)] = 1800,
+      [_cbix(__B2400)] = 2400,
+      [_cbix(__B4800)] = 4800,
+      [_cbix(__B9600)] = 9600,
+      [_cbix(__B19200)] = 19200,
+      [_cbix(__B38400)] = 38400,
+      [_cbix(__B57600)] = 57600,
+      [_cbix(__B115200)] = 115200,
+      [_cbix(__B230400)] = 230400,
+      [_cbix(__B460800)] = 460800,
+      [_cbix(__B500000)] = 500000,
+      [_cbix(__B576000)] = 576000,
+      [_cbix(__B921600)] = 921600,
+      [_cbix(__B1000000)] = 1000000,
+      [_cbix(__B1152000)] = 1152000,
+      [_cbix(__B1500000)] = 1500000,
+      [_cbix(__B2000000)] = 2000000,
+#ifdef __B7200
+      [_cbix(__B7200)] = 7200,
+#endif
+#ifdef __B14400
+      [_cbix(__B14400)] = 14400,
+#endif
+#ifdef __B28800
+      [_cbix(__B28800)] = 28800,
+#endif
+#ifdef __B76800
+      [_cbix(__B76800)] = 76800,
+#endif
+#ifdef __B153600
+      [_cbix(__B153600)] = 153600,
+#endif
+#ifdef __B307200
+      [_cbix(__B307200)] = 307200,
+#endif
+#ifdef __B614400
+      [_cbix(__B614400)] = 614400,
+#endif
+#ifdef __B2500000
+      [_cbix(__B2500000)] = 2500000,
+#endif
+#ifdef __B3000000
+      [_cbix(__B3000000)] = 3000000,
+#endif
+#ifdef __B3500000
+      [_cbix(__B3500000)] = 3500000,
+#endif
+#ifdef __B4000000
+      [_cbix(__B4000000)] = 4000000,
+#endif
+    };
+  speed_t speed;
+
+  if (c_cflag & (tcflag_t)(~CBAUDMASK))
+    return other;
+
+  speed = cbaudix_to_speed[_cbix(c_cflag)];
+  return speed == (speed_t)-1 ? other : speed;
+}
+
+tcflag_t
+___speed_to_cbaud (speed_t speed)
+{
+  switch (speed) {
+  case 0:
+    return __B0;
+  case 50:
+    return __B50;
+  case 75:
+    return __B75;
+  case 110:
+    return __B110;
+  case 134:
+    return __B134;
+  case 150:
+    return __B150;
+  case 200:
+    return __B200;
+  case 300:
+    return __B300;
+  case 600:
+    return __B600;
+  case 1200:
+    return __B1200;
+  case 1800:
+    return __B1800;
+  case 2400:
+    return __B2400;
+  case 4800:
+    return __B4800;
+  case 9600:
+    return __B9600;
+  case 19200:
+    return __B19200;
+  case 38400:
+    return __B38400;
+  case 57600:
+    return __B57600;
+  case 115200:
+    return __B115200;
+  case 230400:
+    return __B230400;
+  case 460800:
+    return __B460800;
+  case 500000:
+    return __B500000;
+  case 576000:
+    return __B576000;
+  case 921600:
+    return __B921600;
+  case 1000000:
+    return __B1000000;
+  case 1152000:
+    return __B1152000;
+  case 1500000:
+    return __B1500000;
+  case 2000000:
+    return __B2000000;
+#ifdef __B76800
+  case 76800:
+    return __B76800;
+#endif
+#ifdef __B153600
+  case 153600:
+    return __B153600;
+#endif
+#ifdef __B307200
+  case 307200:
+    return __B307200;
+#endif
+#ifdef __B614400
+  case 614400:
+    return __B614400;
+#endif
+#ifdef __B2500000
+  case 2500000:
+    return __B2500000;
+#endif
+#ifdef __B3000000
+  case 3000000:
+    return __B3000000;
+#endif
+#ifdef __B3500000
+  case 3500000:
+    return __B3500000;
+#endif
+#ifdef __B4000000
+  case 4000000:
+    return __B4000000;
+#endif
+  default:
+    return __BOTHER;
+  }
+}
+
+
+/* Canonicalize the representation of speed fields in a kernel
+   termios2 structure.  Specifically, if there is a valid legacy cbaud
+   representation (not __BOTHER), use it and propagate the
+   corresponding speed value to ispeed/ospeed, otherwise the other way
+   around if possible.  Finally, if the input speed is zero, copy the
+   output speed to the input speed.
+
+   The kernel doesn't do this canonicalization, which can affect
+   legacy utilities, so do it here.
+
+   This is used by tcgetattr() and tcsetattr(). */
+void
+___termios2_canonicalize_speeds (struct termios2 *k_termios_p)
+{
+  k_termios_p->c_ospeed =
+      ___cbaud_to_speed (cbaud (k_termios_p->c_cflag),  k_termios_p->c_ospeed);
+  k_termios_p->c_ispeed =
+      ___cbaud_to_speed (cibaud (k_termios_p->c_cflag), k_termios_p->c_ispeed);
+
+  if (!k_termios_p->c_ispeed)
+    k_termios_p->c_ispeed = k_termios_p->c_ospeed;
+
+  k_termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ospeed);
+  k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ispeed) << IBSHIFT;
+}
 
 
 /* Return the output baud rate stored in *TERMIOS_P.  */
 speed_t
-cfgetospeed (const struct termios *termios_p)
+__cfgetospeed (const struct termios *termios_p)
 {
-  return termios_p->c_cflag & (CBAUD | CBAUDEX);
+  return termios_p->c_ospeed;
 }
+libc_hidden_def (__cfgetospeed)
+versioned_symbol (libc, __cfgetospeed, cfgetospeed, GLIBC_2_42);
 
-/* Return the input baud rate stored in *TERMIOS_P.
-   Although for Linux there is no difference between input and output
-   speed, the numerical 0 is a special case for the input baud rate. It
-   should set the input baud rate to the output baud rate. */
+/* Return the input baud rate stored in *TERMIOS_P.  */
 speed_t
-cfgetispeed (const struct termios *termios_p)
+__cfgetispeed (const struct termios *termios_p)
 {
-  return ((termios_p->c_iflag & IBAUD0)
-	  ? 0 : termios_p->c_cflag & (CBAUD | CBAUDEX));
+  return termios_p->c_ispeed;
 }
+libc_hidden_def (__cfgetispeed)
+versioned_symbol (libc, __cfgetispeed, cfgetispeed, GLIBC_2_42);
 
 /* Set the output baud rate stored in *TERMIOS_P to SPEED.  */
 int
-cfsetospeed (struct termios *termios_p, speed_t speed)
+__cfsetospeed (struct termios *termios_p, speed_t speed)
 {
-  if ((speed & ~CBAUD) != 0
-      && (speed < B57600 || speed > __MAX_BAUD))
-    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
 
-#if _HAVE_STRUCT_TERMIOS_C_OSPEED
   termios_p->c_ospeed = speed;
+  termios_p->c_cflag &= ~CBAUD;
+  termios_p->c_cflag |= cbaud;
+
+  return 0;
+}
+libc_hidden_def (__cfsetospeed)
+versioned_symbol (libc, __cfsetospeed, cfsetospeed, GLIBC_2_42);
+
+/* Set the input baud rate stored in *TERMIOS_P to SPEED. */
+int
+__cfsetispeed (struct termios *termios_p, speed_t speed)
+{
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
+
+  termios_p->c_ispeed = speed;
+  termios_p->c_cflag &= ~CIBAUD;
+  termios_p->c_cflag |= cbaud << IBSHIFT;
+
+  return 0;
+}
+libc_hidden_def (__cfsetispeed)
+versioned_symbol (libc, __cfsetispeed, cfsetispeed, GLIBC_2_42);
+
+#if _TERMIOS_OLD_COMPAT
+
+/* Legacy versions which returns cbaud-encoded speed_t values */
+
+speed_t
+attribute_compat_text_section
+__old_cfgetospeed (const old_termios_t *termios_p)
+{
+  return cbaud (termios_p->c_cflag);
+}
+compat_symbol (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0);
+
+speed_t
+attribute_compat_text_section
+__old_cfgetispeed (const old_termios_t *termios_p)
+{
+  return cibaud (termios_p->c_cflag);
+}
+compat_symbol (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0);
+
+int
+attribute_compat_text_section
+__old_cfsetospeed (old_termios_t *termios_p, speed_t speed)
+{
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise this field doesn't exist in old_termios_t */
+  termios_p->c_ospeed = real_speed;
 #endif
-  termios_p->c_cflag &= ~(CBAUD | CBAUDEX);
+  termios_p->c_cflag &= ~CBAUD;
   termios_p->c_cflag |= speed;
 
   return 0;
 }
-libc_hidden_def (cfsetospeed)
+compat_symbol (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0);
 
-
-/* Set the input baud rate stored in *TERMIOS_P to SPEED.
-   Although for Linux there is no difference between input and output
-   speed, the numerical 0 is a special case for the input baud rate.  It
-   should set the input baud rate to the output baud rate.  */
 int
-cfsetispeed (struct termios *termios_p, speed_t speed)
+attribute_compat_text_section
+__old_cfsetispeed (old_termios_t *termios_p, speed_t speed)
 {
-  if ((speed & ~CBAUD) != 0
-      && (speed < B57600 || speed > __MAX_BAUD))
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
     return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
 
-#if _HAVE_STRUCT_TERMIOS_C_ISPEED
-  termios_p->c_ispeed = speed;
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise this field doesn't exist in old_termios_t */
+  termios_p->c_ispeed = real_speed;
 #endif
-  if (speed == 0)
-    termios_p->c_iflag |= IBAUD0;
-  else
-    {
-      termios_p->c_iflag &= ~IBAUD0;
-      termios_p->c_cflag &= ~(CBAUD | CBAUDEX);
-      termios_p->c_cflag |= speed;
-    }
+  termios_p->c_cflag &= ~CIBAUD;
+  termios_p->c_cflag |= speed << IBSHIFT;
 
   return 0;
 }
-libc_hidden_def (cfsetispeed)
+compat_symbol (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0);
+
+#endif /* _TERMIOS_OLD_COMPAT */
diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list
index 6f3351a..bde20e4 100644
--- a/sysdeps/unix/sysv/linux/syscall-names.list
+++ b/sysdeps/unix/sysv/linux/syscall-names.list
@@ -21,8 +21,8 @@
 # This file can list all potential system calls.  The names are only
 # used if the installed kernel headers also provide them.
 
-# The list of system calls is current as of Linux 6.14.
-kernel 6.14
+# The list of system calls is current as of Linux 6.15.
+kernel 6.15
 
 FAST_atomic_update
 FAST_cmpxchg
@@ -316,6 +316,7 @@ olduname
 open
 open_by_handle_at
 open_tree
+open_tree_attr
 openat
 openat2
 or1k_atomic
diff --git a/sysdeps/unix/sysv/linux/tcgetattr.c b/sysdeps/unix/sysv/linux/tcgetattr.c
index d672e0c..ca17569 100644
--- a/sysdeps/unix/sysv/linux/tcgetattr.c
+++ b/sysdeps/unix/sysv/linux/tcgetattr.c
@@ -15,66 +15,56 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
-#include <string.h>
-#include <termios.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sysdep.h>
-
-/* The difference here is that the termios structure used in the
-   kernel is not the same as we use in the libc.  Therefore we must
-   translate it here.  */
-#include <kernel_termios.h>
+#include <termios_internals.h>
 
 /* Put the state of FD into *TERMIOS_P.  */
 int
 __tcgetattr (int fd, struct termios *termios_p)
 {
-  struct __kernel_termios k_termios;
-  int retval;
-
-  retval = INLINE_SYSCALL (ioctl, 3, fd, TCGETS, &k_termios);
+  struct termios2 k_termios;
+  long int retval = INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios);
 
-  if (__glibc_likely (retval == 0))
+  if (__glibc_likely (retval != -1))
     {
-      termios_p->c_iflag = k_termios.c_iflag;
-      termios_p->c_oflag = k_termios.c_oflag;
-      termios_p->c_cflag = k_termios.c_cflag;
-      termios_p->c_lflag = k_termios.c_lflag;
-      termios_p->c_line = k_termios.c_line;
-#if _HAVE_STRUCT_TERMIOS_C_ISPEED
-# if _HAVE_C_ISPEED
-      termios_p->c_ispeed = k_termios.c_ispeed;
-# else
-      termios_p->c_ispeed = k_termios.c_cflag & (CBAUD | CBAUDEX);
-# endif
-#endif
-#if _HAVE_STRUCT_TERMIOS_C_OSPEED
-# if _HAVE_C_OSPEED
+      ___termios2_canonicalize_speeds (&k_termios);
+
+      memset (termios_p, 0, sizeof (*termios_p));
+      termios_p->c_iflag  = k_termios.c_iflag;
+      termios_p->c_oflag  = k_termios.c_oflag;
+      termios_p->c_cflag  = k_termios.c_cflag;
+      termios_p->c_lflag  = k_termios.c_lflag;
+      termios_p->c_line   = k_termios.c_line;
       termios_p->c_ospeed = k_termios.c_ospeed;
-# else
-      termios_p->c_ospeed = k_termios.c_cflag & (CBAUD | CBAUDEX);
-# endif
-#endif
-      if (sizeof (cc_t) == 1 || _POSIX_VDISABLE == 0
-	  || (unsigned char) _POSIX_VDISABLE == (unsigned char) -1)
-	memset (__mempcpy (&termios_p->c_cc[0], &k_termios.c_cc[0],
-			   __KERNEL_NCCS * sizeof (cc_t)),
-		_POSIX_VDISABLE, (NCCS - __KERNEL_NCCS) * sizeof (cc_t));
-      else
-	{
-	  memcpy (&termios_p->c_cc[0], &k_termios.c_cc[0],
-		  __KERNEL_NCCS * sizeof (cc_t));
+      termios_p->c_ispeed = k_termios.c_ispeed;
 
-	  for (size_t cnt = __KERNEL_NCCS; cnt < NCCS; ++cnt)
-	    termios_p->c_cc[cnt] = _POSIX_VDISABLE;
-	}
+      copy_c_cc (termios_p->c_cc, NCCS, k_termios.c_cc, _TERMIOS2_NCCS);
     }
 
   return retval;
 }
-
 libc_hidden_def (__tcgetattr)
+
+#if _TERMIOS_OLD_COMPAT && _HAVE_STRUCT_OLD_TERMIOS
+
+versioned_symbol (libc, __tcgetattr, tcgetattr, GLIBC_2_42);
+
+/* Legacy version for shorter struct termios */
+int
+attribute_compat_text_section
+__old_tcgetattr (int fd, old_termios_t *termios_p)
+{
+  struct termios new_termios;
+  int retval = __tcgetattr (fd, &new_termios);
+  if (__glibc_likely (retval != -1))
+    {
+      memcpy (termios_p, &new_termios, sizeof (*termios_p));
+    }
+  return retval;
+}
+compat_symbol (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0);
+
+#else
+
 weak_alias (__tcgetattr, tcgetattr)
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/tcsetattr.c b/sysdeps/unix/sysv/linux/tcsetattr.c
index 5a13ad8..4f07a03 100644
--- a/sysdeps/unix/sysv/linux/tcsetattr.c
+++ b/sysdeps/unix/sysv/linux/tcsetattr.c
@@ -15,67 +15,94 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
-#include <string.h>
-#include <termios.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sysdep.h>
-
-/* The difference here is that the termios structure used in the
-   kernel is not the same as we use in the libc.  Therefore we must
-   translate it here.  */
-#include <kernel_termios.h>
-
-
-/* This is a gross hack around a kernel bug.  If the cfsetispeed functions
-   is called with the SPEED argument set to zero this means use the same
-   speed as for output.  But we don't have independent input and output
-   speeds and therefore cannot record this.
-
-   We use an unused bit in the `c_iflag' field to keep track of this
-   use of `cfsetispeed'.  The value here must correspond to the one used
-   in `speed.c'.  */
-#define IBAUD0	020000000000
+#include <termios_internals.h>
 
+#define static_assert_equal(x,y) _Static_assert ((x) == (y), #x " != " #y)
 
 /* Set the state of FD to *TERMIOS_P.  */
 int
 __tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
 {
-  struct __kernel_termios k_termios;
-  unsigned long int cmd;
+  struct termios2 k_termios;
+  unsigned long cmd;
 
-  switch (optional_actions)
-    {
-    case TCSANOW:
-      cmd = TCSETS;
-      break;
-    case TCSADRAIN:
-      cmd = TCSETSW;
-      break;
-    case TCSAFLUSH:
-      cmd = TCSETSF;
-      break;
-    default:
-      return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
-    }
+  memset (&k_termios, 0, sizeof k_termios);
 
-  k_termios.c_iflag = termios_p->c_iflag & ~IBAUD0;
+  k_termios.c_iflag = termios_p->c_iflag;
   k_termios.c_oflag = termios_p->c_oflag;
   k_termios.c_cflag = termios_p->c_cflag;
   k_termios.c_lflag = termios_p->c_lflag;
-  k_termios.c_line = termios_p->c_line;
-#if _HAVE_C_ISPEED && _HAVE_STRUCT_TERMIOS_C_ISPEED
-  k_termios.c_ispeed = termios_p->c_ispeed;
-#endif
-#if _HAVE_C_OSPEED && _HAVE_STRUCT_TERMIOS_C_OSPEED
+  k_termios.c_line  = termios_p->c_line;
+
   k_termios.c_ospeed = termios_p->c_ospeed;
-#endif
-  memcpy (&k_termios.c_cc[0], &termios_p->c_cc[0],
-	  __KERNEL_NCCS * sizeof (cc_t));
+  k_termios.c_ispeed = termios_p->c_ispeed;
+
+  ___termios2_canonicalize_speeds (&k_termios);
+
+  copy_c_cc (k_termios.c_cc, _TERMIOS2_NCCS, termios_p->c_cc, NCCS);
+
+  /*
+   * Choose the proper ioctl number to invoke.
+   *
+   * Alpha got TCSETS2 late (Linux 4.20), but has the same structure
+   * format, and it only needs TCSETS2 if either it needs to use
+   * __BOTHER or split speed.  All other architectures have TCSETS2 as
+   * far back as the current glibc supports.  Calling TCSETS with
+   * __BOTHER causes unpredictable results on old Alpha kernels and
+   * could even crash them.
+   */
+  static_assert_equal(TCSADRAIN, TCSANOW + 1);
+  static_assert_equal(TCSAFLUSH, TCSANOW + 2);
+  static_assert_equal(TCSETSW2,  TCSETS2 + 1);
+  static_assert_equal(TCSETSF2,  TCSETS2 + 2);
+  static_assert_equal(TCSETSW,   TCSETS  + 1);
+  static_assert_equal(TCSETSF,   TCSETS  + 2);
+
+  cmd = (long)optional_actions - TCSANOW;
+  if (cmd > 2)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+  if (__ASSUME_TERMIOS2 ||
+      k_termios.c_ospeed != k_termios.c_ispeed ||
+      cbaud (k_termios.c_cflag) == __BOTHER)
+    {
+      cmd += TCSETS2;
+    }
+  else
+    {
+      cmd += TCSETS;
+      k_termios.c_cflag &= ~CIBAUD;
+    }
+
+  return INLINE_SYSCALL_CALL (ioctl, fd, cmd, &k_termios);
+}
+libc_hidden_def (__tcsetattr)
+
+#if _HAVE_STRUCT_OLD_TERMIOS && _TERMIOS_OLD_COMPAT
+
+versioned_symbol (libc, __tcsetattr, tcsetattr, GLIBC_2_42);
 
-  return INLINE_SYSCALL (ioctl, 3, fd, cmd, &k_termios);
+/* Legacy version for shorter struct termios without speed fields */
+int
+attribute_compat_text_section
+__old_tcsetattr (int fd, int optional_actions, const old_termios_t *termios_p)
+{
+  struct termios new_termios;
+
+  memset (&new_termios, 0, sizeof (new_termios));
+  new_termios.c_iflag  = termios_p->c_iflag;
+  new_termios.c_oflag  = termios_p->c_oflag;
+  new_termios.c_cflag  = termios_p->c_cflag;
+  new_termios.c_lflag  = termios_p->c_lflag;
+  new_termios.c_line   = termios_p->c_line;
+  copy_c_cc(new_termios.c_cc, NCCS, termios_p->c_cc, OLD_NCCS);
+
+  return __tcsetattr (fd, optional_actions, &new_termios);
 }
+compat_symbol (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0);
+
+#else
+
 weak_alias (__tcsetattr, tcsetattr)
-libc_hidden_def (tcsetattr)
+
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c b/sysdeps/unix/sysv/linux/termios_arch.h
index a5b0685..8dbf420 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c
+++ b/sysdeps/unix/sysv/linux/termios_arch.h
@@ -1,4 +1,6 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
+/* Architectural parameters for Linux termios - generic version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,10 +17,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <string.h>
-
-#define __strncasecmp __strncasecmp_power7
-
-extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden;
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
 
-#include <string/strncase.c>
+#define _HAVE_STRUCT_OLD_TERMIOS 0
diff --git a/sysdeps/unix/sysv/linux/termios_internals.h b/sysdeps/unix/sysv/linux/termios_internals.h
new file mode 100644
index 0000000..10b6732
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/termios_internals.h
@@ -0,0 +1,143 @@
+/* termios functions internal implementation header for Linux
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef TERMIOS_INTERNALS_H
+#define TERMIOS_INTERNALS_H 1
+
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sysdep.h>
+#include <shlib-compat.h>
+
+#include <termios_arch.h>
+
+/* ---- Kernel interface definitions ---- */
+
+/* The the termios2 structure used in the kernel interfaces is not the
+   same as the termios structure we use in the libc.  Therefore we
+   must translate it here.  */
+
+struct termios2
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+#if _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE
+  cc_t c_cc[_TERMIOS2_NCCS];	/* control characters */
+  cc_t c_line;			/* line discipline */
+#else
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[_TERMIOS2_NCCS];	/* control characters */
+#endif
+  speed_t c_ispeed;		/* input speed */
+  speed_t c_ospeed;		/* output speed */
+};
+
+/* Alpha got termios2 late, but TCGETS has exactly the same structure
+   format and function as TCGETS2. On all other platforms, the termios2
+   interface exists as far back as this version of glibc supports.
+
+   For TCGETS* it is more complicated; this is handled in tcsetattr.c.
+
+   Some other architectures only have the equivalent of the termios2
+   interface, in which case the old ioctl names are the only ones
+   presented, but are equivalent to the new ones. */
+#ifndef TCGETS2
+# define TCGETS2  TCGETS
+# define TCSETS2  TCSETS
+# define TCSETSW2 TCSETSW
+# define TCSETSF2 TCSETSF
+#elif !__ASSUME_TERMIOS2
+/* Hack for Alpha */
+# undef  TCGETS2
+# define TCGETS2 TCGETS
+#endif
+
+/* ---- Application interface definitions ---- */
+
+/*
+ * Should old speed_t and struct termios (if applicable) compatibility
+ * functions be included?
+ */
+#define _TERMIOS_OLD_COMPAT SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_42)
+
+/*
+ * Old struct termios (without c_ispeed and c_ospeed fields) if
+ * applicable. The new struct termios *must* be binary identical up to
+ * the sizeof the old structure.
+ *
+ * This only applies to SPARC and MIPS; for other architectures the
+ * new and old speed_t interfaces both use the same struct termios.
+ */
+#if _HAVE_STRUCT_OLD_TERMIOS
+typedef struct old_termios old_termios_t;
+#else
+# define OLD_NCCS NCCS
+typedef struct termios old_termios_t;
+#endif
+
+/* ---- Internal function definitions ---- */
+
+/*
+ * Copy a set of c_cc fields of possibly different width. If the target
+ * field is longer, then fill with _POSIX_VDISABLE == -1.
+ */
+static inline void
+copy_c_cc (cc_t *to, size_t nto, const cc_t *from, size_t nfrom)
+{
+  if (nto < nfrom)
+    nfrom = nto;
+
+  to = __mempcpy (to, from, nfrom * sizeof(cc_t));
+  if (nto > nfrom)
+    memset (to, _POSIX_VDISABLE, (nto - nfrom) * sizeof(cc_t));
+}
+
+/* Extract the output and input legacy speed fields from c_cflag. */
+static inline tcflag_t
+cbaud (tcflag_t c_cflag)
+{
+  return c_cflag & CBAUD;
+}
+
+static inline tcflag_t
+cibaud (tcflag_t c_cflag)
+{
+  return cbaud (c_cflag >> IBSHIFT);
+}
+
+extern speed_t
+___cbaud_to_speed (tcflag_t c_cflag, speed_t other)
+    __attribute_const__ attribute_hidden;
+
+extern tcflag_t
+___speed_to_cbaud (speed_t speed)
+    __attribute_const__ attribute_hidden;
+
+extern void
+___termios2_canonicalize_speeds (struct termios2 *k_termios_p)
+    attribute_hidden;
+
+#endif /* TERMIOS_INTERNALS_H */
diff --git a/sysdeps/unix/sysv/linux/tst-pkey.c b/sysdeps/unix/sysv/linux/tst-pkey.c
index 4d12d2e..1000d8f 100644
--- a/sysdeps/unix/sysv/linux/tst-pkey.c
+++ b/sysdeps/unix/sysv/linux/tst-pkey.c
@@ -191,7 +191,7 @@ do_test (void)
   pthread_t delayed_thread = xpthread_create
     (NULL, &delayed_thread_func, &delayed_thread_check_access);
 
-  keys[0] = pkey_alloc (0, 0);
+  keys[0] = pkey_alloc (0, PKEY_UNRESTRICTED);
   if (keys[0] < 0)
     {
       if (errno == ENOSYS)
@@ -333,7 +333,7 @@ do_test (void)
           if (i == allowed_key)
             {
               if (do_write)
-                TEST_COMPARE (pkey_set (keys[i], 0), 0);
+                TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0);
               else
                 TEST_COMPARE (pkey_set (keys[i], PKEY_DISABLE_WRITE), 0);
             }
@@ -360,7 +360,7 @@ do_test (void)
      inherit that access.  */
   for (int i = 0; i < key_count; ++i)
     {
-      TEST_COMPARE (pkey_set (keys[i], 0), 0);
+      TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0);
       TEST_VERIFY (check_page_access (i, false));
       TEST_VERIFY (check_page_access (i, true));
     }
diff --git a/sysdeps/unix/sysv/linux/tst-termios-linux.c b/sysdeps/unix/sysv/linux/tst-termios-linux.c
new file mode 100644
index 0000000..e4b0c8b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-termios-linux.c
@@ -0,0 +1,592 @@
+/* Linux termios regression tests
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include <shlib-compat.h>
+#include <array_length.h>
+
+#include <support/check.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/tty.h>
+
+/* Evaluate an expression and make sure errno did not get set; return
+   the value of the expression */
+#define CHECKERR(expr)				\
+  ({						\
+    errno = 0;					\
+    const __typeof (expr) _val = (expr);	\
+    TEST_COMPARE(errno, 0);			\
+    _val;					\
+  })
+
+/* Evaluate an expression and verify that is return a specific value,
+   as well as errno not having been set. */
+#define VERIFY(expr,val) TEST_COMPARE(CHECKERR(expr), val)
+/* Check for zero and errno not set */
+#define CHECKZERO(expr)  VERIFY(expr, 0)
+
+/* Table of legacy speed constants */
+
+#define BOGUS ((speed_t)-1)
+#define ANY   ((speed_t)-2)
+
+struct cbaud_table
+{
+  speed_t speed;
+  speed_t cbaud;
+  const char *name;
+};
+
+static const struct cbaud_table cbaud_table [] =
+{
+  { 0, __B0, "__B0" },
+  { 50, __B50, "__B50" },
+  { 75, __B75, "__B75" },
+  { 110, __B110, "__B110" },
+  { 134, __B134, "__B134" },
+  { 150, __B150, "__B150" },
+  { 200, __B200, "__B200" },
+  { 300, __B300, "__B300" },
+  { 600, __B600, "__B600" },
+  { 1200, __B1200, "__B1200" },
+  { 1800, __B1800, "__B1800" },
+  { 2400, __B2400, "__B2400" },
+  { 4800, __B4800, "__B4800" },
+#ifdef __B7200
+  { 7200, __B7200, "__B7200" },
+#endif
+  { 9600, __B9600, "__B9600" },
+#ifdef __B14400
+  { 14400, __B14400, "__B14400" },
+#endif
+  { 19200, __B19200, "__B19200" },
+#ifdef __B28800
+  { 28800, __B28800, "__B28800" },
+#endif
+  { 38400, __B38400, "__B38400" },
+  { 57600, __B57600, "__B57600" },
+#ifdef __B76800
+  { 76800, __B76800, "__B76800" },
+#endif
+  { 115200, __B115200, "__B115200" },
+#ifdef __B153600
+  { 153600, __B153600, "__B153600" },
+#endif
+  { 230400, __B230400, "__B230400" },
+#ifdef __B307200
+  { 307200, __B307200, "__B307200" },
+#endif
+  { 460800, __B460800, "__B460800" },
+  { 500000, __B500000, "__B500000" },
+  { 576000, __B576000, "__B576000" },
+#ifdef __B614400
+  { 614400, __B614400, "__B614400" },
+#endif
+  { 921600, __B921600, "__B921600" },
+  { 1000000, __B1000000, "__B1000000" },
+  { 1152000, __B1152000, "__B1152000" },
+  { 1500000, __B1500000, "__B1500000" },
+  { 2000000, __B2000000, "__B2000000" },
+#ifdef __B2500000
+  { 2500000, __B2500000, "__B2500000" },
+#endif
+#ifdef __B3000000
+  { 3000000, __B3000000, "__B3000000" },
+#endif
+#ifdef __B3500000
+  { 3500000, __B3500000, "__B3500000" },
+#endif
+#ifdef __B4000000
+  { 4000000, __B4000000, "__B4000000" },
+#endif
+  { ANY, __BOTHER, "__BOTHER" },
+  { BOGUS, BOGUS, "invalid" }
+};
+
+/* List of common speeds to test */
+
+static const speed_t test_speeds [] =
+{
+  0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400,
+  4800, 7200, 9600, 14400, 19200, 28800, 33600, 38400, 57600,
+  76800, 115200, 153600, 230400, 307200, 460800, 500000,
+  576000, 614400, 921600, 1000000, 1152000, 1500000, 2000000,
+  2500000, 3000000, 3500000, 4000000, 5000000, 10000000
+};
+
+/* Speed function tests */
+
+/* These intentionally are a separate implementation from speed.c;
+   these should be "trivially correct" and don't need to be optimized
+   in any way */
+
+/* Returns __BOTHER if there is no legacy value for this speed */
+static speed_t speed_to_cbaud (speed_t speed)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->speed != ANY; ct++)
+    {
+      if (ct->speed == speed)
+	break;
+    }
+  return ct->cbaud;
+}
+
+/* Returns ANY if cbaud is __BOTHER, or BOGUS if invalid */
+static speed_t cbaud_to_speed (speed_t cbaud)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->cbaud != BOGUS; ct++)
+    {
+      if (ct->cbaud == cbaud)
+	break;
+    }
+  return ct->speed;
+}
+
+static const char *cbaud_name (speed_t cbaud)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->cbaud != BOGUS; ct++)
+    {
+      if (ct->cbaud == cbaud)
+	break;
+    }
+  return ct->name;
+}
+
+static int check_speed (speed_t expected, speed_t speed, speed_t cbaud,
+			speed_t cfspeed, baud_t cfbaud, char io)
+{
+  speed_t want_cbaud;
+  cbaud &= CBAUD;
+
+  if (expected != ANY && speed != expected)
+    FAIL_RET ("c_%cspeed = %u, expected %u", io, speed, expected);
+
+  if (cfspeed != speed)
+    FAIL_RET ("cfget%cspeed = %u, expected %u", io, cfspeed, speed);
+
+  if (cfbaud != cfspeed)
+    FAIL_RET ("cfget%cbaud = %u, but cfget%cspeed = %u",
+	      io, cfbaud, io, cfspeed);
+
+  want_cbaud = speed_to_cbaud (speed);
+
+  if (cbaud != want_cbaud)
+    FAIL_RET ("c_%cspeed = %u: %s = %s (%06o), should be %s (%06o)",
+	      io, speed,
+	      io == 'o' ? "CBAUD" : "CIBAUD", cbaud_name (cbaud), cbaud,
+	      cbaud_name (want_cbaud), want_cbaud);
+
+  return 0;
+}
+
+/* Validate that the speeds in the struct termios are properly normalized.
+   The difference is the handling of ispeed == 0. */
+
+/* Use this after cfset* () */
+static void check_speeds_cf (const struct termios *tio_p,
+			     speed_t ospeed, speed_t ispeed)
+{
+  check_speed (ospeed, tio_p->c_ospeed, tio_p->c_cflag,
+	       CHECKERR (cfgetospeed (tio_p)),
+	       CHECKERR (cfgetobaud (tio_p)), 'o');
+  check_speed (ispeed, tio_p->c_ispeed, tio_p->c_cflag >> IBSHIFT,
+	       CHECKERR (cfgetispeed (tio_p)),
+	       CHECKERR (cfgetibaud (tio_p)), 'i');
+}
+
+/* Use this after tc[gs]etattr () */
+static void check_speeds_tc (int fd, speed_t ospeed, speed_t ispeed)
+{
+  struct termios tio;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  check_speeds_cf (&tio, ospeed, ispeed ? ispeed : ospeed);
+}
+
+/* For search and replace convenience */
+#define check_bauds_cf check_speeds_cf
+#define check_bauds_tc check_speeds_tc
+
+/* Common routine for setting speeds, with checking */
+static void
+set_speeds (int fd, speed_t ospeed, speed_t ispeed)
+{
+  struct termios tio;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  CHECKZERO (cfsetospeed (&tio, ospeed));
+  CHECKZERO (cfsetispeed (&tio, ispeed));
+  check_speeds_cf (&tio, ospeed, ispeed);
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, ospeed, ispeed ? ispeed : ospeed);
+}
+
+/* Actual tests */
+
+typedef void (*speed_test_t)(int ttyfd, speed_t speed);
+static void
+run_speed_test (int fd, speed_test_t test);
+
+/* New interface cfset*speed test */
+static void
+new_cfspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t old_ospeed, old_ispeed;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_ospeed = CHECKERR (cfgetospeed (&tio));
+  old_ispeed = CHECKERR (cfgetispeed (&tio));
+
+  /* Check initial normalization */
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+
+  /* Check cfset*speed normalization */
+  CHECKZERO (cfsetospeed (&tio, speed));
+  check_speeds_cf (&tio, speed, old_ispeed);
+  CHECKZERO (cfsetispeed (&tio, speed));
+  check_speeds_cf (&tio, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  check_speeds_cf (&tio, old_ospeed, speed);
+  CHECKZERO (cfsetispeed (&tio, B0));
+  check_speeds_cf (&tio, old_ospeed, B0);
+  CHECKZERO (cfsetspeed (&tio, speed));
+  check_speeds_cf (&tio, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (cfsetispeed (&tio, old_ispeed));
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+}
+
+/* New interface cfset*speed test with tcsetattr */
+static void
+new_tcspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t old_ospeed, old_ispeed;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_ospeed = CHECKERR (cfgetospeed (&tio));
+  old_ispeed = CHECKERR (cfgetispeed (&tio));
+
+  /* Check initial normalization */
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+
+  /* Check cfset*speed normalization */
+  CHECKZERO (cfsetospeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, old_ispeed);
+  CHECKZERO (cfsetispeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, speed);
+  CHECKZERO (cfsetispeed (&tio, B0));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, B0);
+  CHECKZERO (cfsetspeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (cfsetispeed (&tio, old_ispeed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, old_ispeed);
+}
+
+/* New interface cfset*baud test */
+static void
+new_cfbaud_test (int fd, baud_t baud)
+{
+  struct termios tio;
+  baud_t old_obaud, old_ibaud;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_obaud = CHECKERR (cfgetobaud (&tio));
+  old_ibaud = CHECKERR (cfgetibaud (&tio));
+
+  /* Check initial normalization */
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+
+  /* Check cfset*baud normalization */
+  CHECKZERO (cfsetobaud (&tio, baud));
+  check_bauds_cf (&tio, baud, old_ibaud);
+  CHECKZERO (cfsetibaud (&tio, baud));
+  check_bauds_cf (&tio, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  check_bauds_cf (&tio, old_obaud, baud);
+  CHECKZERO (cfsetibaud (&tio, B0));
+  check_bauds_cf (&tio, old_obaud, B0);
+  CHECKZERO (cfsetbaud (&tio, baud));
+  check_bauds_cf (&tio, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (cfsetibaud (&tio, old_ibaud));
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+}
+
+/* New interface cfset*baud test with tcsetattr */
+static void
+new_tcbaud_test (int fd, baud_t baud)
+{
+  struct termios tio;
+  baud_t old_obaud, old_ibaud;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_obaud = CHECKERR (cfgetobaud (&tio));
+  old_ibaud = CHECKERR (cfgetibaud (&tio));
+
+  /* Check initial normalization */
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+
+  /* Check cfset*baud normalization */
+  CHECKZERO (cfsetobaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, old_ibaud);
+  CHECKZERO (cfsetibaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, baud);
+  CHECKZERO (cfsetibaud (&tio, B0));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, B0);
+  CHECKZERO (cfsetbaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (cfsetibaud (&tio, old_ibaud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, old_ibaud);
+}
+
+/*
+ * Old interface tests. This depends critically on the new struct
+ * termios being guaranteed to be a superset of the legacy struct
+ * termios.
+ */
+#if TEST_COMPAT (libc, GLIBC_2_0, GLIBC_2_42)
+extern int __old_cfsetospeed (struct termios *tio_p, speed_t speed);
+compat_symbol_reference (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0);
+extern int __old_cfsetispeed (struct termios *tio_p, speed_t speed);
+compat_symbol_reference (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0);
+extern speed_t __old_cfgetospeed (const struct termios *tio_p);
+compat_symbol_reference (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0);
+extern speed_t __old_cfgetispeed (const struct termios *tio_p);
+compat_symbol_reference (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0);
+extern int __old_tcsetattr (int fd, int act, const struct termios *tio_p);
+compat_symbol_reference (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0);
+extern int __old_tcgetattr (int fd, struct termios *tio_p);
+compat_symbol_reference (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0);
+
+static int old_tcsetattr (int fd, const struct termios *tio_p)
+{
+  struct termios old_tio = *tio_p;
+
+  /* Deliberately corrupt c_ispeed and c_ospeed */
+  old_tio.c_ispeed = 0xdeadbeef;
+  old_tio.c_ospeed = 0xfeedface;
+  return __old_tcsetattr (fd, TCSANOW, &old_tio);
+}
+static int old_tcgetattr (int fd, struct termios *tio_p)
+{
+  int rv;
+  memset (tio_p, 0xde, sizeof *tio_p);
+  rv = __old_tcgetattr (fd, tio_p);
+  if (rv)
+    return rv;
+
+  /* Deliberately corrupt c_ispeed and c_ospeed */
+  tio_p->c_ispeed = 0xdeadbeef;
+  tio_p->c_ospeed = 0xfeedface;
+  return 0;
+}
+
+/* Old interface test. This relies on the new struct termios always
+   being a binary superset of the old one.
+   This doesn't bother testing split speed, since that never worked
+   on the old glibc. */
+static void
+old_tcspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t cbaud;
+
+  if (!speed)
+    return;			/* Skip B0 for this test */
+
+  cbaud = speed_to_cbaud (speed);
+  if (cbaud == __BOTHER)
+    return;
+
+  CHECKZERO (old_tcgetattr (fd, &tio));
+  CHECKZERO (__old_cfsetospeed (&tio, cbaud));
+  VERIFY (__old_cfgetospeed (&tio), cbaud);
+  CHECKZERO (__old_cfsetispeed (&tio, cbaud));
+  VERIFY (__old_cfgetispeed (&tio), cbaud);
+  CHECKZERO (old_tcsetattr (fd, &tio));
+  check_speeds_tc (fd, speed, speed);
+}
+
+/* Verify that invalid CBAUD values return error for the old interfaces */
+static void
+old_invalid_speeds_test (int fd)
+{
+  struct termios tio;
+  speed_t cbaud;
+
+  for (cbaud = 0 ; cbaud ; cbaud > 0xffff ? (cbaud <<= 1) : cbaud++) {
+    speed_t realspeed;
+    realspeed = (cbaud & ~CBAUD) ? BOGUS : cbaud_to_speed (cbaud);
+    if (realspeed >= ANY)
+      {
+	int rv;
+
+	errno = 0;
+	rv = __old_cfsetospeed (&tio, cbaud);
+	if (rv != -1 || errno != EINVAL)
+	  FAIL("__old_cfsetospeed() accepted invalid value %06o", cbaud);
+
+	errno = 0;
+	rv = __old_cfsetispeed (&tio, cbaud);
+	if (rv != -1 || errno != EINVAL)
+	  FAIL("__old_cfsetispeed() accepted invalid value %06o", cbaud);
+      }
+    else
+      {
+	CHECKZERO (__old_cfsetospeed (&tio, cbaud));
+	VERIFY (__old_cfgetospeed (&tio), cbaud);
+	CHECKZERO (__old_cfsetispeed (&tio, cbaud));
+	VERIFY (__old_cfgetispeed (&tio), cbaud);
+	if (cbaud)
+	  {
+	    CHECKZERO (old_tcsetattr (fd, &tio));
+	    check_speeds_tc (fd, realspeed, realspeed);
+	  }
+      }
+  }
+}
+
+static void
+compat_tests (int fd)
+{
+  run_speed_test (fd, old_tcspeed_test);
+  old_invalid_speeds_test (fd);
+}
+#else /* No TEST_COMPAT */
+#define compat_tests(fd) ((void)(fd))
+#endif
+
+static void
+run_speed_test (int fd, speed_test_t test)
+{
+  unsigned short seed [3] = { 0x1234, 0x5678, 0x9abc };
+  struct speeds {
+    speed_t ospeed, ispeed;
+  };
+  static const struct speeds initial_speeds [] = {
+    { 2400, 2400 },		/* Standard speed, non-split */
+    { 123456, 123456 },		/* Nonstandard speed, non-split */
+    { 75, 1200 },		/* Standard split speeds */
+    { 9600, 456789 },		/* One standard, one nonstandard */
+    { 54321, 1234567890 }	/* Nonstandard, one very high */
+  };
+
+  array_foreach_const (is, initial_speeds)
+    {
+      /* Set up initial conditions */
+      set_speeds (fd, is->ospeed, is->ispeed);
+
+      /* Test all common speeds */
+      array_foreach_const (ts, test_speeds)
+	test (fd, *ts);
+
+      /* Test pseudorandom speeds; array_length(test_speeds)
+	 here is an arbitrary value */
+      const size_t random_test_count = array_length(test_speeds);
+      for (size_t i = 0 ; i < random_test_count ; i++)
+	test (fd, (speed_t) jrand48 (seed));
+
+      /* Test power-of-2 speeds */
+      for (speed_t s = 1 ; s ; s <<= 1)
+	test (fd, s);
+
+      /* Test power of 2 multiples of 75; 75 << 25 is the maximum below 2^32 */
+      for (int i = 0 ; i < 26 ; i++)
+	test (fd, (speed_t)75 << i);
+    }
+}
+
+static void
+run_speed_tests (int fd)
+{
+  /* Test proper canonicalization using the new interface */
+  run_speed_test (fd, new_cfspeed_test);
+  run_speed_test (fd, new_tcspeed_test);
+
+  /* Try the new cfset*baud() functions */
+  run_speed_test (fd, new_cfbaud_test);
+  run_speed_test (fd, new_tcbaud_test);
+
+  /* Tests of the legacy functions */
+  compat_tests (fd);
+}
+
+/* test dispatch */
+
+static void
+run_in_chroot (void)
+{
+  /* Create a pty slave to use as a tty. Most of the termios settings,
+     including the speeds, have no impact on a pty, but they are still
+     settable like for any other tty, which makes them very convenient
+     for testing. */
+  int ptmfd, ttyfd;
+
+  support_openpty (&ptmfd, &ttyfd, NULL, NULL, NULL);
+  run_speed_tests (ttyfd);
+  close (ttyfd);
+  close (ptmfd);
+}
+
+static int
+do_test (void)
+{
+  support_become_root ();
+  run_in_chroot ();
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
index 17b84c7..06fbae5 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
@@ -207,6 +207,7 @@
 #define __NR_open 2
 #define __NR_open_by_handle_at 304
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 257
 #define __NR_openat2 437
 #define __NR_pause 34
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
index 7ab9073..5648772 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
@@ -2748,6 +2748,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
index b7207e1..6719814 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
@@ -1278,6 +1278,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1294,6 +1302,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
index fb834a7..6938382 100644
--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
@@ -87,10 +87,10 @@ $(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
 $(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
 $(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
 
-CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
-CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile -DTEST_AMX
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
 endif
 
 endif # $(subdir) == elf
diff --git a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
index 006c532..812e023 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
+++ b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
@@ -22,7 +22,7 @@
 extern void restore_rt (void) asm ("__restore_rt") attribute_hidden;
 
 #define SET_SA_RESTORER(kact, act)			\
-  (kact)->sa_flags = (act)->sa_flags | SA_RESTORER;	\
+  (kact)->sa_flags |= SA_RESTORER;			\
   (kact)->sa_restorer = &restore_rt
 
 #define RESET_SA_RESTORER(act, kact) 			\
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
index 1dcd6ab..135ef3d 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
@@ -200,6 +200,7 @@
 #define __NR_open 1073741826
 #define __NR_open_by_handle_at 1073742128
 #define __NR_open_tree 1073742252
+#define __NR_open_tree_attr 1073742291
 #define __NR_openat 1073742081
 #define __NR_openat2 1073742261
 #define __NR_pause 1073741858
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
index e11876f..25a39d0 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
@@ -2767,6 +2767,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
index 14fa3f8..1a1069a 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
@@ -1278,6 +1278,14 @@ GLIBC_2.41 tanpif32x F
 GLIBC_2.41 tanpif64 F
 GLIBC_2.41 tanpif64x F
 GLIBC_2.41 tanpil F
+GLIBC_2.42 compoundn F
+GLIBC_2.42 compoundnf F
+GLIBC_2.42 compoundnf128 F
+GLIBC_2.42 compoundnf32 F
+GLIBC_2.42 compoundnf32x F
+GLIBC_2.42 compoundnf64 F
+GLIBC_2.42 compoundnf64x F
+GLIBC_2.42 compoundnl F
 GLIBC_2.42 pown F
 GLIBC_2.42 pownf F
 GLIBC_2.42 pownf128 F
@@ -1294,6 +1302,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 01b0192..2aca36c 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -4,7 +4,13 @@ endif
 
 ifeq ($(subdir),elf)
 sysdep_routines += get-cpuid-feature-leaf
-sysdep-dl-routines += dl-get-cpu-features
+sysdep-dl-routines += \
+  dl-get-cpu-features \
+  dl-tlsdesc \
+  tls_get_addr \
+  tlsdesc \
+# sysdep-dl-routines
+
 sysdep_headers += \
   bits/platform/features.h \
   bits/platform/x86.h \
@@ -90,14 +96,22 @@ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512
 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
 tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
 
-CFLAGS-tst-gnu2-tls2.c += -msse
+CFLAGS-tst-gnu2-tls2.c += -msse2
 CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
 CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
 CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
 
-LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy
+LDFLAGS-tst-gnu2-tls2 += -rdynamic
+LDFLAGS-tst-gnu2-tls2mod0.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod1.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod2.so += -Wl,-z,undefs
+
+CFLAGS-tst-gnu2-tls2-x86-noxsave.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavec.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavexsavec.c += -msse2
+LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy -rdynamic
 
 # Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled
 # via tunable.
@@ -113,6 +127,14 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \
   $(objpfx)tst-gnu2-tls2mod0.so \
   $(objpfx)tst-gnu2-tls2mod1.so \
   $(objpfx)tst-gnu2-tls2mod2.so
+
+CFLAGS-tst-tls23.c += -msse2
+CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell
+
+LDFLAGS-tst-tls23 += -rdynamic
+tst-tls23-mod.so-no-z-defs = yes
+
+$(objpfx)tst-tls23-mod.so: $(libsupport)
 endif
 
 ifeq ($(subdir),math)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index e50f1d6..b7d1506 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1256,7 +1256,7 @@ no_cpuid:
 #endif
 
   if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
-      || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
+      || cpu_features->xsave_state_size != 0)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
 	{
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index c3c73e7..b8e963b 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -183,6 +183,29 @@
 
 #define atom_text_section .section ".text.atom", "ax"
 
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+   __tls_get_addr may be called with 8-byte/4-byte stack alignment.
+   Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
+   assume that stack will be always aligned at 16 bytes.  */
+# ifdef __x86_64__
+#  define DL_STACK_ALIGNMENT 8
+#  define MINIMUM_ALIGNMENT 16
+# else
+#  define DL_STACK_ALIGNMENT 4
+# endif
+#endif
+
+/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
+   STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
+   _dl_fixup/__tls_get_addr.  */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
+
 #endif	/* __ASSEMBLER__ */
 
 #endif	/* _X86_SYSDEP_H */
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
index de900a4..b3195ff 100644
--- a/sysdeps/x86/tst-gnu2-tls2.c
+++ b/sysdeps/x86/tst-gnu2-tls2.c
@@ -1,20 +1,26 @@
-#ifndef __x86_64__
-#include <sys/platform/x86.h>
+#ifndef TEST_AMX
+# ifndef __x86_64__
+# include <sys/platform/x86.h>
 
-#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
-#endif
+# define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+# endif
 
-/* Clear XMM0...XMM7  */
-#define PREPARE_MALLOC()				\
-{							\
-  asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" );	\
-  asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" );	\
-  asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" );	\
-  asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" );	\
-  asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" );	\
-  asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" );	\
-  asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" );	\
-  asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" );	\
+/* Set XMM0...XMM7 to all 1s.  */
+# define PREPARE_MALLOC()					\
+{								\
+  asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" );	\
+  asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" );	\
+  asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" );	\
+  asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" );	\
+  asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" );	\
+  asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" );	\
+  asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" );	\
+  asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" );	\
 }
+#endif
 
 #include <elf/tst-gnu2-tls2.c>
+
+#ifndef TEST_AMX
+v2di v1, v2, v3;
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/x86/tst-gnu2-tls2.h
index c9d2f4e..fdbb565 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S
+++ b/sysdeps/x86/tst-gnu2-tls2.h
@@ -1,5 +1,5 @@
-/* Optimized memchr implementation for POWER10/PPC64.
-   Copyright (C) 2016-2025 Free Software Foundation, Inc.
+/* Test TLSDESC relocation, x86 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,22 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define MEMCHR __memchr_power10
+#ifndef TEST_AMX
+# include <support/check.h>
 
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-#undef weak_alias
-#define weak_alias(name,alias)
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
 
-#include <sysdeps/powerpc/powerpc64/le/power10/memchr.S>
+# define BEFORE_TLSDESC_CALL()					\
+  v1 = __extension__(v2di){0, 0};				\
+  v2 = __extension__(v2di){0, 0};
+
+# define AFTER_TLSDESC_CALL()					\
+  v3 = __extension__(v2di){0, 0};				\
+  asm volatile ("" : "+x" (v3));				\
+  union { v2di x; long long a[2]; } u;				\
+  u.x = v3;							\
+  TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
 #endif
+
+#include <elf/tst-gnu2-tls2.h>
diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c
new file mode 100644
index 0000000..6130d91
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.c
@@ -0,0 +1,22 @@
+#ifndef __x86_64__
+#include <sys/platform/x86.h>
+
+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+#endif
+
+/* Set XMM0...XMM7 to all 1s.  */
+#define PREPARE_MALLOC()					\
+{								\
+  asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" );	\
+  asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" );	\
+  asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" );	\
+  asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" );	\
+  asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" );	\
+  asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" );	\
+  asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" );	\
+  asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" );	\
+}
+
+#include <elf/tst-tls23.c>
+
+v2di v1, v2, v3;
diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h
new file mode 100644
index 0000000..21cee4c
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.h
@@ -0,0 +1,35 @@
+/* Test that __tls_get_addr preserves XMM registers.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <support/check.h>
+
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
+
+#define BEFORE_TLS_CALL()					\
+  v1 = __extension__(v2di){0, 0};				\
+  v2 = __extension__(v2di){0, 0};
+
+#define AFTER_TLS_CALL()					\
+  v3 = __extension__(v2di){0, 0};				\
+  asm volatile ("" : "+x" (v3));				\
+  union { v2di x; long long a[2]; } u;				\
+  u.x = v3;							\
+  TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
+
+#include <elf/tst-tls23.h>
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 5723ec1..be64eb2 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -41,9 +41,6 @@ ifeq ($(subdir),elf)
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
 		   -mno-mmx)
 
-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
-
-tests += ifuncmain8
 modules-names += ifuncmod8
 
 $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so
@@ -214,6 +211,24 @@ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
 endif
 
 test-internal-extras += tst-gnu2-tls2mod1
+
+tests-special += $(objpfx)check-rtld.out
+
+$(objpfx)rtld.reloc: $(objpfx)rtld.os
+	@rm -f $@T
+	LC_ALL=C $(READELF) -rW $< > $@T
+	test -s $@T
+	mv -f $@T $@
+common-generated += $(objpfx)rtld.reloc
+
+# Verify that there are no run-time relocations against __ehdr_start nor
+# _end.
+$(objpfx)check-rtld.out: $(objpfx)rtld.reloc
+	LC_ALL=C; \
+	if grep -E "R_X86_64_64.*(__ehdr_start|_end)" $^ > $@; \
+	  then false; else true; fi; \
+	$(evaluate-test)
+generated += check-rtld.out
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index d1bb125..9a55fc5 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -22,7 +22,6 @@
 #include <features-offsets.h>
 #include <isa-level.h>
 #include "tlsdesc.h"
-#include "dl-trampoline-save.h"
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_tlsdesc_dynamic.  */
diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
deleted file mode 100644
index 761128d..0000000
--- a/sysdeps/x86_64/dl-trampoline-save.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* x86-64 PLT trampoline register save macros.
-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
-
-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-
-   __tls_get_addr may be called with 8-byte stack alignment.  Although
-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
-   that stack will be always aligned at 16 bytes.  */
-# define DL_STACK_ALIGNMENT 8
-#endif
-
-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
-   stack to 16 bytes before calling _dl_fixup.  */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
-   || 16 > DL_STACK_ALIGNMENT)
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a055722..ac85f96 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -22,7 +22,6 @@
 #include <features-offsets.h>
 #include <link-defines.h>
 #include <isa-level.h>
-#include "dl-trampoline-save.h"
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_fixup.  */
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a834977..c2dcadd 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -922,7 +922,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				     (CPU_FEATURE_USABLE (AVX2)
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncpy_avx2)
-	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
 				     1,
 				     __wcsncpy_generic))
 
@@ -952,7 +952,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				     (CPU_FEATURE_USABLE (AVX2)
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcpncpy_avx2)
-	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
 				     1,
 				     __wcpncpy_generic))