366 files changed, 9290 insertions, 4125 deletions
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index 4b7f8a5..bb97d31 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -41,15 +41,18 @@ gen-as-const-headers += \
   dl-link.sym \
   rtld-global-offsets.sym
 
-tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
+tests-internal += \
+  tst-ifunc-arg-1 \
+  tst-ifunc-arg-2 \
+  tst-ifunc-arg-3 \
+  tst-ifunc-arg-4 \
+  # tests-internal
 
-ifeq (yes,$(aarch64-variant-pcs))
 tests += tst-vpcs
 modules-names += tst-vpcs-mod
 LDFLAGS-tst-vpcs-mod.so = -Wl,-z,lazy
 $(objpfx)tst-vpcs: $(objpfx)tst-vpcs-mod.so
 endif
-endif
 
 ifeq ($(subdir),csu)
 gen-as-const-headers += \
@@ -75,7 +78,9 @@ sysdep_routines += \
   __alloc_gcs
 
 tests += \
-  tst-sme-jmp
+  tst-sme-jmp \
+  tst-sme-za-state \
+  # tests
 endif
 
 ifeq ($(subdir),malloc)
diff --git a/sysdeps/aarch64/__alloc_gcs.c b/sysdeps/aarch64/__alloc_gcs.c
index e70b459..b98e5fc 100644
--- a/sysdeps/aarch64/__alloc_gcs.c
+++ b/sysdeps/aarch64/__alloc_gcs.c
@@ -15,6 +15,8 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include "aarch64-gcs.h"
+
 #include <sysdep.h>
 #include <unistd.h>
 #include <sys/mman.h>
@@ -34,7 +36,7 @@ map_shadow_stack (void *addr, size_t size, unsigned long flags)
 #define GCS_ALTSTACK_RESERVE 160
 
 void *
-__alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
+__alloc_gcs (size_t stack_size, struct gcs_record *gcs)
 {
   size_t size = (stack_size / 2 + GCS_ALTSTACK_RESERVE) & -8UL;
   if (size > GCS_MAX_SIZE)
@@ -45,9 +47,6 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
   if (base == MAP_FAILED)
     return NULL;
 
-  *ss_base = base;
-  *ss_size = size;
-
   uint64_t *gcsp = (uint64_t *) ((char *) base + size);
   /* Skip end of GCS token.  */
   gcsp--;
@@ -58,6 +57,14 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size)
       __munmap (base, size);
       return NULL;
     }
+
+  if (gcs != NULL)
+    {
+      gcs->gcs_base = base;
+      gcs->gcs_token = gcsp;
+      gcs->gcs_size = size;
+    }
+
   /* Return the target GCS pointer for context switch.  */
   return gcsp + 1;
 }
diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S
index 6290803..92f4814 100644
--- a/sysdeps/aarch64/__arm_za_disable.S
+++ b/sysdeps/aarch64/__arm_za_disable.S
@@ -88,10 +88,8 @@ L(save_loop):
 L(end):
 	ret
 L(fail):
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-#endif
+	paciasp
+	cfi_negate_ra_state
 	stp	x29, x30, [sp, -32]!
 	cfi_adjust_cfa_offset (32)
 	cfi_rel_offset (x29, 0)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 981bf80..70ac02c 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -24,51 +24,43 @@
 /* __longjmp(jmpbuf, val) */
 
 ENTRY (__longjmp)
-	cfi_def_cfa(x0, 0)
-	cfi_offset(x19, JB_X19<<3)
-	cfi_offset(x20, JB_X20<<3)
-	cfi_offset(x21, JB_X21<<3)
-	cfi_offset(x22, JB_X22<<3)
-	cfi_offset(x23, JB_X23<<3)
-	cfi_offset(x24, JB_X24<<3)
-	cfi_offset(x25, JB_X25<<3)
-	cfi_offset(x26, JB_X26<<3)
-	cfi_offset(x27, JB_X27<<3)
-	cfi_offset(x28, JB_X28<<3)
-	cfi_offset(x29, JB_X29<<3)
-	cfi_offset(x30, JB_LR<<3)
-
-	cfi_offset( d8, JB_D8<<3)
-	cfi_offset( d9, JB_D9<<3)
-	cfi_offset(d10, JB_D10<<3)
-	cfi_offset(d11, JB_D11<<3)
-	cfi_offset(d12, JB_D12<<3)
-	cfi_offset(d13, JB_D13<<3)
-	cfi_offset(d14, JB_D14<<3)
-	cfi_offset(d15, JB_D15<<3)
 
 #if IS_IN(libc)
-	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.  */
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# endif
-	stp	x29, x30, [sp, -16]!
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
+	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.
+	   The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
 	bl	__libc_arm_za_disable
-	ldp	x29, x30, [sp], 16
-	cfi_adjust_cfa_offset (-16)
-	cfi_restore (x29)
-	cfi_restore (x30)
-# if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-# endif
+	mov	x30, x13
+	cfi_register (x13, x30)
 #endif
 
+	cfi_def_cfa (x0, 0)
+	cfi_offset (x19, JB_X19<<3)
+	cfi_offset (x20, JB_X20<<3)
+	cfi_offset (x21, JB_X21<<3)
+	cfi_offset (x22, JB_X22<<3)
+	cfi_offset (x23, JB_X23<<3)
+	cfi_offset (x24, JB_X24<<3)
+	cfi_offset (x25, JB_X25<<3)
+	cfi_offset (x26, JB_X26<<3)
+	cfi_offset (x27, JB_X27<<3)
+	cfi_offset (x28, JB_X28<<3)
+	cfi_offset (x29, JB_X29<<3)
+	cfi_offset (x30, JB_LR<<3)
+
+	cfi_offset ( d8, JB_D8<<3)
+	cfi_offset ( d9, JB_D9<<3)
+	cfi_offset (d10, JB_D10<<3)
+	cfi_offset (d11, JB_D11<<3)
+	cfi_offset (d12, JB_D12<<3)
+	cfi_offset (d13, JB_D13<<3)
+	cfi_offset (d14, JB_D14<<3)
+	cfi_offset (d15, JB_D15<<3)
+
 	ldp	x19, x20, [x0, #JB_X19<<3]
 	ldp	x21, x22, [x0, #JB_X21<<3]
 	ldp	x23, x24, [x0, #JB_X23<<3]
diff --git a/sysdeps/aarch64/aarch64-gcs.h b/sysdeps/aarch64/aarch64-gcs.h
index 162ef18..8e253ed 100644
--- a/sysdeps/aarch64/aarch64-gcs.h
+++ b/sysdeps/aarch64/aarch64-gcs.h
@@ -23,6 +23,21 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-void *__alloc_gcs (size_t, void **, size_t *) attribute_hidden;
+struct gcs_record
+{
+  void *gcs_base;
+  void *gcs_token;
+  size_t gcs_size;
+};
+
+void *__alloc_gcs (size_t, struct gcs_record *) attribute_hidden;
+
+static inline bool
+has_gcs (void)
+{
+  register unsigned long x16 asm ("x16") = 1;
+  asm ("hint	40" /* chkfeat x16 */ : "+r" (x16));
+  return x16 == 0;
+}
 
 #endif
diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
index 4bd5496..f364e65 100755
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -185,219 +185,14 @@ else
 default-abi = lp64"
 fi
 
-# Only consider BTI supported if -mbranch-protection=bti is
-# on by default in the compiler and the linker produces
-# binaries with GNU property notes in PT_GNU_PROPERTY segment.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BTI support" >&5
-printf %s "checking for BTI support... " >&6; }
-if test ${libc_cv_aarch64_bti+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-void foo (void) { }
-EOF
-  libc_cv_aarch64_bti=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -lW conftest.so | grep -q GNU_PROPERTY'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_bti=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_bti" >&5
-printf "%s\n" "$libc_cv_aarch64_bti" >&6; }
-config_vars="$config_vars
-aarch64-bti = $libc_cv_aarch64_bti"
-if test $libc_cv_aarch64_bti = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_BTI 1" >>confdefs.h
-
-fi
-
-# Check if glibc is built with return address signing, i.e.
-# if -mbranch-protection=pac-ret is on. We need this because
-# pac-ret relies on unwinder support so it's not safe to use
-# it in assembly code unconditionally, but there is no
-# feature test macro for it in gcc.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if pac-ret is enabled" >&5
-printf %s "checking if pac-ret is enabled... " >&6; }
-if test ${libc_cv_aarch64_pac_ret+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-int bar (void);
-int foo (void) { return bar () + 1; }
-EOF
-  libc_cv_aarch64_pac_ret=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='grep -q -E '\''(hint( |	)+25|paciasp)'\'' conftest.s'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_pac_ret=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_pac_ret" >&5
-printf "%s\n" "$libc_cv_aarch64_pac_ret" >&6; }
-if test $libc_cv_aarch64_pac_ret = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_PAC_RET 1" >>confdefs.h
-
-fi
-
-# Check if binutils supports variant PCS symbols.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for variant PCS support" >&5
-printf %s "checking for variant PCS support... " >&6; }
-if test ${libc_cv_aarch64_variant_pcs+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.S <<EOF
-.global foo
-.type foo, %function
-.variant_pcs foo
-foo:
-	ret
-.global bar
-.type bar, %function
-bar:
-	b foo
-EOF
-  libc_cv_aarch64_variant_pcs=no
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; } \
-     && { ac_try='$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }
-  then
-    libc_cv_aarch64_variant_pcs=yes
-  fi
-  rm -rf conftest.* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_variant_pcs" >&5
-printf "%s\n" "$libc_cv_aarch64_variant_pcs" >&6; }
-config_vars="$config_vars
-aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs"
-
-# Check if asm support armv8.2-a+sve
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SVE support in assembler" >&5
-printf %s "checking for SVE support in assembler... " >&6; }
-if test ${libc_cv_aarch64_sve_asm+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e) cat > conftest.s <<\EOF
-	.arch armv8.2-a+sve
-	ptrue p0.b
-EOF
-if { ac_try='${CC-cc} -c conftest.s 1>&5'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-  libc_cv_aarch64_sve_asm=yes
-else
-  libc_cv_aarch64_sve_asm=no
-fi
-rm -f conftest* ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_asm" >&5
-printf "%s\n" "$libc_cv_aarch64_sve_asm" >&6; }
-if test $libc_cv_aarch64_sve_asm = yes; then
-  printf "%s\n" "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h
-
-fi
-
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
-# Check if compiler supports SVE ACLE.
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for availability of SVE ACLE" >&5
-printf %s "checking for availability of SVE ACLE... " >&6; }
-if test ${libc_cv_aarch64_sve_acle+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   cat > conftest.c <<EOF
-#include <arm_sve.h>
-EOF
-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c'
-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-    libc_cv_aarch64_sve_acle=yes
-  else
-    libc_cv_aarch64_sve_acle=no
-  fi
-  rm conftest.c ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_acle" >&5
-printf "%s\n" "$libc_cv_aarch64_sve_acle" >&6; }
-
-# Check if compiler is sufficient to build mathvec
-if test $build_mathvec = yes; then
-  fail=no
-  if test $libc_cv_aarch64_variant_pcs = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but linker does not support variant PCS." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but linker does not support variant PCS." >&2;}
-  fi
-  if test $libc_cv_aarch64_sve_asm = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but assembler does not support SVE." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but assembler does not support SVE." >&2;}
-  fi
-  if test $libc_cv_aarch64_sve_acle = no; then
-    fail=yes
-    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&5
-printf "%s\n" "$as_me: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&2;}
-  fi
-  if test $fail = yes; then
-    as_fn_error $? "use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI)." "$LINENO" 5
-  fi
-else
+if test $build_mathvec = no; then
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is disabled, this results in incomplete ABI." >&5
 printf "%s\n" "$as_me: WARNING: mathvec is disabled, this results in incomplete ABI." >&2;}
 fi
 
+libc_cv_support_sframe=yes
+
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 56d12d6..a9a1b74 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -24,119 +24,12 @@ else
   LIBC_CONFIG_VAR([default-abi], [lp64])
 fi
 
-# Only consider BTI supported if -mbranch-protection=bti is
-# on by default in the compiler and the linker produces
-# binaries with GNU property notes in PT_GNU_PROPERTY segment.
-AC_CACHE_CHECK([for BTI support], [libc_cv_aarch64_bti], [dnl
-  cat > conftest.c <<EOF
-void foo (void) { }
-EOF
-  libc_cv_aarch64_bti=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c]) \
-     && AC_TRY_COMMAND([$READELF -lW conftest.so | grep -q GNU_PROPERTY]) \
-     && AC_TRY_COMMAND([$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"])
-  then
-    libc_cv_aarch64_bti=yes
-  fi
-  rm -rf conftest.*])
-LIBC_CONFIG_VAR([aarch64-bti], [$libc_cv_aarch64_bti])
-if test $libc_cv_aarch64_bti = yes; then
-  AC_DEFINE(HAVE_AARCH64_BTI)
-fi
-
-# Check if glibc is built with return address signing, i.e.
-# if -mbranch-protection=pac-ret is on. We need this because
-# pac-ret relies on unwinder support so it's not safe to use
-# it in assembly code unconditionally, but there is no
-# feature test macro for it in gcc.
-AC_CACHE_CHECK([if pac-ret is enabled], [libc_cv_aarch64_pac_ret], [dnl
-  cat > conftest.c <<EOF
-int bar (void);
-int foo (void) { return bar () + 1; }
-EOF
-  libc_cv_aarch64_pac_ret=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
-     && AC_TRY_COMMAND([grep -q -E '\''(hint( |	)+25|paciasp)'\'' conftest.s])
-  then
-    libc_cv_aarch64_pac_ret=yes
-  fi
-  rm -rf conftest.*])
-if test $libc_cv_aarch64_pac_ret = yes; then
-  AC_DEFINE(HAVE_AARCH64_PAC_RET)
-fi
-
-# Check if binutils supports variant PCS symbols.
-AC_CACHE_CHECK([for variant PCS support], [libc_cv_aarch64_variant_pcs], [dnl
-  cat > conftest.S <<EOF
-.global foo
-.type foo, %function
-.variant_pcs foo
-foo:
-	ret
-.global bar
-.type bar, %function
-bar:
-	b foo
-EOF
-  libc_cv_aarch64_variant_pcs=no
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S]) \
-     && AC_TRY_COMMAND([$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS])
-  then
-    libc_cv_aarch64_variant_pcs=yes
-  fi
-  rm -rf conftest.*])
-LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
-
-# Check if asm support armv8.2-a+sve
-AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl
-cat > conftest.s <<\EOF
-	.arch armv8.2-a+sve
-	ptrue p0.b
-EOF
-if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
-  libc_cv_aarch64_sve_asm=yes
-else
-  libc_cv_aarch64_sve_asm=no
-fi
-rm -f conftest*])
-if test $libc_cv_aarch64_sve_asm = yes; then
-  AC_DEFINE(HAVE_AARCH64_SVE_ASM)
-fi
-
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
-# Check if compiler supports SVE ACLE.
-AC_CACHE_CHECK(for availability of SVE ACLE, libc_cv_aarch64_sve_acle, [dnl
-  cat > conftest.c <<EOF
-#include <arm_sve.h>
-EOF
-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c]); then
-    libc_cv_aarch64_sve_acle=yes
-  else
-    libc_cv_aarch64_sve_acle=no
-  fi
-  rm conftest.c])
-
-# Check if compiler is sufficient to build mathvec
-if test $build_mathvec = yes; then
-  fail=no
-  if test $libc_cv_aarch64_variant_pcs = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but linker does not support variant PCS.])
-  fi
-  if test $libc_cv_aarch64_sve_asm = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but assembler does not support SVE.])
-  fi
-  if test $libc_cv_aarch64_sve_acle = no; then
-    fail=yes
-    AC_MSG_WARN([mathvec is enabled but compiler does not have SVE ACLE.])
-  fi
-  if test $fail = yes; then
-    AC_MSG_ERROR([use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI).])
-  fi
-else
+if test $build_mathvec = no; then
   AC_MSG_WARN([mathvec is disabled, this results in incomplete ABI.])
 fi
+
+libc_cv_support_sframe=yes
diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S
index 0c3ee40..e9e530c 100644
--- a/sysdeps/aarch64/crti.S
+++ b/sysdeps/aarch64/crti.S
@@ -65,7 +65,7 @@ call_weak_fn:
 	cbz	x0, 1f
 	b	PREINIT_FUNCTION
 1:
-	RET
+	ret
 	.size	call_weak_fn, .-call_weak_fn
 #endif
 
@@ -75,11 +75,7 @@ call_weak_fn:
 	.hidden	_init
 	.type	_init, %function
 _init:
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-#else
-	BTI_C
-#endif
+	paciasp
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
 #if PREINIT_FUNCTION_WEAK
@@ -94,10 +90,6 @@ _init:
 	.hidden	_fini
 	.type	_fini, %function
 _fini:
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-#else
-	BTI_C
-#endif
+	paciasp
 	stp	x29, x30, [sp, -16]!
 	mov	x29, sp
diff --git a/sysdeps/aarch64/crtn.S b/sysdeps/aarch64/crtn.S
index b52b10e..653a548 100644
--- a/sysdeps/aarch64/crtn.S
+++ b/sysdeps/aarch64/crtn.S
@@ -41,14 +41,10 @@
 
 	.section .init,"ax",%progbits
 	ldp	x29, x30, [sp], 16
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-#endif
-	RET
+	autiasp
+	ret
 
 	.section .fini,"ax",%progbits
 	ldp	x29, x30, [sp], 16
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-#endif
-	RET
+	autiasp
+	ret
diff --git a/sysdeps/aarch64/dl-irel.h b/sysdeps/aarch64/dl-irel.h
index ae402bc..7bae3c3 100644
--- a/sysdeps/aarch64/dl-irel.h
+++ b/sysdeps/aarch64/dl-irel.h
@@ -21,11 +21,26 @@
 #define _DL_IREL_H
 
 #include <stdio.h>
-#include <unistd.h>
 #include <ldsodefs.h>
-#include <sysdep.h>
 #include <sys/ifunc.h>
 
+#define _IFUNC_ARG_SIZE_VER0 24 /* sizeof 1st published __ifunc_arg_t */
+#define _IFUNC_ARG_SIZE_VER1 40 /* sizeof 2nd published __ifunc_arg_t */
+
+#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+  (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER))
+
+_Static_assert (sizeof (__ifunc_arg_t) == _IFUNC_ARG_SIZE_VER1,
+  "sizeof (__ifunc_arg_t) != _IFUNC_ARG_SIZE_VER1");
+
+_Static_assert (_IFUNC_ARG_SIZE_VER1
+  == (_IFUNC_HWCAP_MAX + 1) * sizeof (unsigned long),
+  "_IFUNC_ARG_SIZE_VER1 and _IFUNC_HWCAP_MAX mismatch");
+
+#undef offsetofend
+#undef sizeof_field
+
 #define ELF_MACHINE_IRELA	1
 
 static inline ElfW(Addr)
@@ -37,6 +52,8 @@ elf_ifunc_invoke (ElfW(Addr) addr)
   arg._size = sizeof (arg);
   arg._hwcap = GLRO(dl_hwcap);
   arg._hwcap2 = GLRO(dl_hwcap2);
+  arg._hwcap3 = GLRO(dl_hwcap3);
+  arg._hwcap4 = GLRO(dl_hwcap4);
   return ((ElfW(Addr) (*) (uint64_t, const __ifunc_arg_t *)) (addr))
 	 (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg);
 }
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index fc40d66..2ff8d95 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,9 +74,9 @@
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
-	BTI_C
+	bti	c
 	ldr	x0, [x0, 8]
-	RET
+	ret
 	cfi_endproc
 	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
 
@@ -95,7 +95,7 @@ _dl_tlsdesc_return:
 	cfi_startproc
 	.align  2
 _dl_tlsdesc_undefweak:
-	BTI_C
+	bti	c
 	str	x1, [sp, #-16]!
 	cfi_adjust_cfa_offset (16)
 	ldr	x0, [x0, 8]
@@ -103,7 +103,7 @@ _dl_tlsdesc_undefweak:
 	sub	x0, x0, x1
 	ldr	x1, [sp], #16
 	cfi_adjust_cfa_offset (-16)
-	RET
+	ret
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
@@ -141,12 +141,8 @@ _dl_tlsdesc_undefweak:
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_dynamic:
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# else
-	BTI_C
-# endif
+	paciasp
+	cfi_negate_ra_state
 
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
@@ -177,12 +173,10 @@ _dl_tlsdesc_dynamic:
 1:
 	ldp	 x3,  x4, [sp, #16]
 	ldp	 x1,  x2, [sp], #32
-# if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-# endif
+	autiasp
+	cfi_negate_ra_state
 	cfi_adjust_cfa_offset (-32)
-	RET
+	ret
 2:
 	/* This is the slow path. We need to call __tls_get_addr() which
 	   means we need to save and restore all the register that the
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index d6bed96..d628b01 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -34,7 +34,7 @@
 	cfi_startproc
 	.align 2
 _dl_runtime_resolve:
-	BTI_C
+	bti	c
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -127,12 +127,8 @@ _dl_runtime_resolve:
 	cfi_startproc
 	.align 2
 _dl_runtime_profile:
-# if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-# else
-	BTI_C
-# endif
+	paciasp
+	cfi_negate_ra_state
 	/* AArch64 we get called with:
 	   ip0		&PLTGOT[2]
 	   ip1		temp(dl resolver entry point)
@@ -251,17 +247,12 @@ _dl_runtime_profile:
 	cfi_restore(x29)
 	cfi_restore(x30)
 
-# if HAVE_AARCH64_PAC_RET
 	add	sp, sp, SF_SIZE
 	cfi_adjust_cfa_offset (-SF_SIZE)
-	AUTIASP
-	cfi_window_save
+	autiasp
+	cfi_negate_ra_state
 	add	sp, sp, 16
 	cfi_adjust_cfa_offset (-16)
-# else
-	add	sp, sp, SF_SIZE + 16
-	cfi_adjust_cfa_offset (- SF_SIZE - 16)
-# endif
 
 	/* Jump to the newly found address.  */
 	br	ip0
@@ -321,10 +312,8 @@ _dl_runtime_profile:
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
 	cfi_restore(lr)
-# if HAVE_AARCH64_PAC_RET
 	/* Note: LR restored from La_aarch64_reg has no PAC.  */
-	cfi_window_save
-# endif
+	cfi_negate_ra_state
 	mov	sp, x29
 	cfi_def_cfa_register (sp)
 	ldr	x29, [x29, #0]
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index aadedf1..068c11c 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -1,10 +1,14 @@
 libmvec-supported-funcs = acos \
                           acosh \
+                          acospi \
                           asin \
                           asinh \
+                          asinpi \
                           atan \
                           atanh \
+                          atanpi \
                           atan2 \
+                          atan2pi \
                           cbrt \
                           cos \
                           cosh \
@@ -52,8 +56,11 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
                   v_powf_data
 endif
 
-sve-cflags = -march=armv8-a+sve
+# Enable SVE for building libmvec.  Since CFLAGS may contain a -mcpu or -march,
+# add a generic -mcpu and -march with SVE enabled.  Also use a tune for a modern
+# SVE core.
 
+sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1
 
 ifeq ($(build-mathvec),yes)
 bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 0f9503f..2980cb7 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -157,4 +157,26 @@ libmvec {
     _ZGVsMxv_tanpi;
     _ZGVsMxv_tanpif;
   }
+  GLIBC_2.42 {
+    _ZGVnN2v_acospi;
+    _ZGVnN2v_acospif;
+    _ZGVnN4v_acospif;
+    _ZGVsMxv_acospi;
+    _ZGVsMxv_acospif;
+    _ZGVnN2v_asinpi;
+    _ZGVnN2v_asinpif;
+    _ZGVnN4v_asinpif;
+    _ZGVsMxv_asinpi;
+    _ZGVsMxv_asinpif;
+    _ZGVnN2v_atanpi;
+    _ZGVnN2v_atanpif;
+    _ZGVnN4v_atanpif;
+    _ZGVsMxv_atanpi;
+    _ZGVsMxv_atanpif;
+    _ZGVnN2vv_atan2pi;
+    _ZGVnN2vv_atan2pif;
+    _ZGVnN4vv_atan2pif;
+    _ZGVsMxvv_atan2pi;
+    _ZGVsMxvv_atan2pif;
+  }
 }
diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
index 7709b54..453f780 100644
--- a/sysdeps/aarch64/fpu/acos_advsimd.c
+++ b/sysdeps/aarch64/fpu/acos_advsimd.c
@@ -18,24 +18,23 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
-  float64x2_t pi, pi_over_2;
+  double c1, c3, c5, c7, c9, c11;
+  float64x2_t c0, c2, c4, c6, c8, c10;
   uint64x2_t abs_mask;
+  float64x2_t pi, pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi = V2 (0x1.921fb54442d18p+1),
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+  .c0 = V2 (0x1.555555555554ep-3),     .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),   .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi = V2 (0x1.921fb54442d18p+1),     .pi_over_2 = V2 (0x1.921fb54442d18p+0),
   .abs_mask = V2 (0x7fffffffffffffff),
 };
 
@@ -63,7 +62,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
-   The largest observed error in this region is 1.18 ulps,
+   The largest observed error in this region is 1.18 ulp:
    _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
 				       want 0x1.0d54d1985c069p+0.
 
@@ -71,9 +70,9 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 1.52 ulps,
-   _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
-				       want 0x1.edbbedf8a7d6cp-1.  */
+   The largest observed error in this region is 1.50 ulp:
+   _ZGVnN2v_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
+				       want 0x1.ec1a46aa829p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -99,13 +98,32 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
   float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z3 = vmulq_f64 (z2, z);
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
-  float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
 
-  /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+  /* Order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p411 = vfmaq_f64 (p47, z8, p811);
+  float64x2_t p = vfmaq_f64 (p03, z8, p411);
+
+  /* Finalize polynomial: z + z3 * P(z2).  */
+  p = vfmaq_f64 (z, z3, p);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c
index 74e2f7d..104f0d7 100644
--- a/sysdeps/aarch64/fpu/acos_sve.c
+++ b/sysdeps/aarch64/fpu/acos_sve.c
@@ -18,20 +18,21 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[12];
-  float64_t pi, pi_over_2;
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5,
-	    0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
-	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8,
-	    0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
-  .pi = 0x1.921fb54442d18p+1,
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
@@ -42,20 +43,21 @@ static const struct data
 
      acos(x) ~ pi/2 - (x + x^3 P(x^2)).
 
-   The largest observed error in this region is 1.18 ulps,
-   _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0
-				       want 0x1.0d4d0f55667f7p+0.
+   The largest observed error in this region is 1.18 ulp:
+   _ZGVsMxv_acos (0x1.fbb7c9079b429p-2) got 0x1.0d51266607582p+0
+				       want 0x1.0d51266607583p+0.
 
    For |x| in [0.5, 1.0], use same approximation with a change of variable
 
      acos(x) = y + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 1.52 ulps,
-   _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1
-				       want 0x1.ed82df4243f0bp-1.  */
+   The largest observed error in this region is 1.50 ulp:
+   _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
+				       want 0x1.ec1a46aa829p-1.  */
 svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
@@ -70,24 +72,41 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
-  svfloat64_t z4 = svmul_x (pg, z2, z2);
-  svfloat64_t z8 = svmul_x (pg, z4, z4);
-  svfloat64_t z16 = svmul_x (pg, z8, z8);
-  svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+  svfloat64_t z3 = svmul_x (ptrue, z2, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmad_x (pg, p411, z8, p03);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+  p = svmad_x (pg, p, z3, z);
 
   /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for  |x| < 0.5
 	       = 2 Q(|x|)               , for  0.5 < x < 1.0
 	       = pi - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
-  svfloat64_t y
-      = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign));
-
-  svbool_t is_neg = svcmplt (pg, x, 0.0);
-  svfloat64_t off = svdup_f64_z (is_neg, d->pi);
-  svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0));
-  svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2));
-
-  return svmla_x (pg, add, mul, y);
+  svfloat64_t mul = svreinterpret_f64 (
+      svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10));
+  mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign));
+  svfloat64_t add = svreinterpret_f64 (
+      svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->pi_over_2))));
+  add = svsub_m (a_gt_half, sv_f64 (d->pi_over_2), add);
+
+  return svmsb_x (pg, p, mul, add);
 }
diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c
index 326b2cc..3a84959 100644
--- a/sysdeps/aarch64/fpu/acosh_sve.c
+++ b/sysdeps/aarch64/fpu/acosh_sve.c
@@ -30,10 +30,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 }
 
 /* SVE approximation for double-precision acosh, based on log1p.
-   The largest observed error is 3.19 ULP in the region where the
+   The largest observed error is 3.14 ULP in the region where the
    argument to log1p falls in the k=0 interval, i.e. x close to 1:
-   SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
-					   want 0x1.ed23399f51373p-2.  */
+   SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2
+					   want 0x1.ef0cee7c33ce4p-2.  */
 svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
 {
   /* (ix - One) >= (BigBound - One).  */
diff --git a/sysdeps/aarch64/fpu/acospi_advsimd.c b/sysdeps/aarch64/fpu/acospi_advsimd.c
new file mode 100644
index 0000000..bb6c209
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospi_advsimd.c
@@ -0,0 +1,118 @@
+/* Double-Precision vector (Advanced SIMD) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10;
+  uint64x2_t abs_mask;
+  float64x2_t one, inv_pi;
+  double c1, c3, c5, c7, c9, c11;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = V2 (0x1.b2995e7b7b5fbp-5),     .c1 = 0x1.8723a1d58d83p-6,
+  .c2 = V2 (0x1.d1a452eacf2fep-7),     .c3 = 0x1.3ce52c4d75582p-7,
+  .c4 = V2 (0x1.d2b2a0aea27d5p-8),     .c5 = 0x1.6a0b9b92cad8bp-8,
+  .c6 = V2 (0x1.2290c84438caep-8),     .c7 = 0x1.efba896580d02p-9,
+  .c8 = V2 (0x1.44446707af38p-9),      .c9 = 0x1.5070b3e7aa03ep-8,
+  .c10 = V2 (-0x1.c70015d0ebdafp-9),   .c11 = 0x1.27029c383fed9p-7,
+  .abs_mask = V2 (0x7fffffffffffffff), .one = V2 (1.0),
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+};
+
+/* Double-precision implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order-11 polynomial P to approximate asinpi
+   such that the final approximation of acospi is an odd polynomial:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.35 ulp:
+   _ZGVnN2v_acospi (0x1.fb16ed35a6d64p-2) got 0x1.5722a3dbcafb4p-2
+					 want 0x1.5722a3dbcafb5p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.55 ulp:
+   _ZGVnN2v_acospi (0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
+					 want 0x1.ffd43d5dd3a9bp-4.  */
+float64x2_t VPCS_ATTR NOINLINE V_NAME_D1 (acospi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t ia = vandq_u64 (ix, d->abs_mask);
+
+  float64x2_t ax = vreinterpretq_f64_u64 (ia);
+  uint64x2_t a_le_half = vcaltq_f64 (x, v_f64 (0.5));
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x),
+			      vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
+  float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p411 = vfmaq_f64 (p47, z8, p811);
+  float64x2_t p = vfmaq_f64 (p03, z8, p411);
+
+  /* Finalize polynomial: z + z * z2 * P(z2).  */
+  p = vfmaq_f64 (d->inv_pi, z2, p);
+  p = vmulq_f64 (p, z);
+
+  /* acospi(|x|)
+		= 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+		= 2 Q(|x|)              , for  0.5 < x < 1.0
+		= 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  float64x2_t y = vbslq_f64 (d->abs_mask, p, x);
+  uint64x2_t is_neg = vcltzq_f64 (x);
+  float64x2_t off = vreinterpretq_f64_u64 (
+      vandq_u64 (is_neg, vreinterpretq_u64_f64 (d->one)));
+  float64x2_t mul = vbslq_f64 (a_le_half, d->one, v_f64 (-2.0));
+  float64x2_t add = vbslq_f64 (a_le_half, v_f64 (0.5), off);
+
+  return vfmsq_f64 (add, mul, y);
+}
diff --git a/sysdeps/aarch64/fpu/acospi_sve.c b/sysdeps/aarch64/fpu/acospi_sve.c
new file mode 100644
index 0000000..e41eaad
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospi_sve.c
@@ -0,0 +1,112 @@
+/* Double-Precision vector (SVE) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t inv_pi, half;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = 0x1.b2995e7b7b5fbp-5,	  .c1 = 0x1.8723a1d58d83p-6,
+  .c2 = 0x1.d1a452eacf2fep-7,	  .c3 = 0x1.3ce52c4d75582p-7,
+  .c4 = 0x1.d2b2a0aea27d5p-8,	  .c5 = 0x1.6a0b9b92cad8bp-8,
+  .c6 = 0x1.2290c84438caep-8,	  .c7 = 0x1.efba896580d02p-9,
+  .c8 = 0x1.44446707af38p-9,	  .c9 = 0x1.5070b3e7aa03ep-8,
+  .c10 = -0x1.c70015d0ebdafp-9,	  .c11 = 0x1.27029c383fed9p-7,
+  .inv_pi = 0x1.45f306dc9c883p-2, .half = 0.5,
+};
+
+/* Double-precision SVE implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 11 polynomial P to approximate asinpi
+   such that the final approximation of acospi is:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.35 ulp:
+   _ZGVsMxv_acospi (0x1.fb014996aea18p-2) got 0x1.572a91755bbf6p-2
+					 want 0x1.572a91755bbf7p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.55 ulp:
+   _ZGVsMxv_acospi(0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
+					want 0x1.ffd43d5dd3a9bp-4.  */
+svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+  svfloat64_t ax = svabs_x (pg, x);
+  svbool_t a_gt_half = svacgt (pg, x, 0.5f);
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  svfloat64_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+			  svmul_x (ptrue, x, x));
+  svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
+  p = svmla_x (pg, sv_f64 (d->inv_pi), z2, p);
+  p = svmul_x (ptrue, p, z);
+
+  /* acospi(|x|) = 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+		 = 2 Q(|x|)              , for  0.5 < x < 1.0
+		 = 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  svfloat64_t mul = svreinterpret_f64 (
+      svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10));
+  mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign));
+  svfloat64_t add = svreinterpret_f64 (
+      svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->half))));
+  add = svsub_m (a_gt_half, sv_f64 (d->half), add);
+
+  return svmsb_x (pg, p, mul, add);
+}
diff --git a/sysdeps/aarch64/fpu/acospif_advsimd.c b/sysdeps/aarch64/fpu/acospif_advsimd.c
new file mode 100644
index 0000000..8486b62
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospif_advsimd.c
@@ -0,0 +1,106 @@
+/* Single-Precision vector (Advanced SIMD) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, inv_pi;
+  float c1, c3, c5, null;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in asinpif Sollya
+     file.  */
+  .c0 = V4 (0x1.b2995ep-5f),	 .c1 = 0x1.8724ep-6f,
+  .c2 = V4 (0x1.d1301ep-7f),	 .c3 = 0x1.446d3cp-7f,
+  .c4 = V4 (0x1.654848p-8f),	 .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = V4 (0x1.45f306p-2f),
+};
+
+#define AbsMask 0x7fffffff
+
+/* Single-precision implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi
+   such that the final approximation of acospi is an odd polynomial:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+   The largest observed error in this region is 1.23 ulps,
+      _ZGVnN4v_acospif (0x1.fee13ep-2) got 0x1.55beb4p-2 want 0x1.55beb2p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.53 ulps,
+   _ZGVnN4v_acospif (0x1.6ad644p-1) got 0x1.fe8f96p-3
+				   want 0x1.fe8f9cp-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acospi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+  float32x4_t ax = vreinterpretq_f32_u32 (ia);
+  uint32x4_t a_le_half = vcaltq_f32 (x, v_f32 (0.5f));
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+
+  float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x),
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
+  float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+
+  /* Order-5 Estrin evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+  float32x4_t c135 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2);
+  float32x4_t p = vfmaq_f32 (p03, z8, p45);
+  /* Add 1/pi as final coeff.  */
+  p = vfmaq_f32 (d->inv_pi, z2, p);
+
+  /* Finalize polynomial: z * P(z^2).  */
+  p = vmulq_f32 (z, p);
+
+  /* acospi(|x|)
+			= 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+			= 2 Q(|x|)              , for  0.5 < x < 1.0
+			= 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+
+  float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x);
+  uint32x4_t is_neg = vcltzq_f32 (x);
+  float32x4_t off = vreinterpretq_f32_u32 (
+      vandq_u32 (vreinterpretq_u32_f32 (v_f32 (1.0f)), is_neg));
+  float32x4_t mul = vbslq_f32 (a_le_half, v_f32 (1.0f), v_f32 (-2.0f));
+  float32x4_t add = vbslq_f32 (a_le_half, v_f32 (0.5f), off);
+
+  return vfmsq_f32 (add, mul, y);
+}
+libmvec_hidden_def (V_NAME_F1 (acospi))
+HALF_WIDTH_ALIAS_F1 (acospi)
diff --git a/sysdeps/aarch64/fpu/acospif_sve.c b/sysdeps/aarch64/fpu/acospif_sve.c
new file mode 100644
index 0000000..ea4fc4a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acospif_sve.c
@@ -0,0 +1,91 @@
+/* Single-Precision vector (SVE) inverse cospi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c0, c1, c2, c3, c4, inv_pi, half;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error.  */
+  .c0 = 0x1.b29968p-5f, .c1 = 0x1.871424p-6f, .c2 = 0x1.d56e44p-7f,
+  .c3 = 0x1.149bb8p-7f, .c4 = 0x1.8e07fep-7f, .inv_pi = 0x1.45f306p-2f,
+  .half = 0.5f,
+};
+
+/* Single-precision SVE implementation of vector acospi(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi
+   such that the final approximation of acospi is:
+
+     acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)).
+
+    The largest observed error in this region is 1.3 ulps,
+      _ZGVsMxv_acospif(0x1.ffa9d2p-2) got 0x1.557504p-2
+				     want 0x1.557502p-2.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+      acospi(x) = y/pi + y * z * P(z), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 2.61 ulps,
+   _ZGVsMxv_acospif (0x1.6b232ep-1) got 0x1.fe04bap-3
+				   want 0x1.fe04cp-3.  */
+svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+  svfloat32_t ax = svabs_x (pg, x);
+  svbool_t a_gt_half = svacgt (pg, x, 0.5f);
+
+  /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+     z2 = x ^ 2         and z = |x|     , if |x| < 0.5
+     z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5.  */
+  svfloat32_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f32 (0.5f), ax, 0.5f),
+			  svmul_x (ptrue, x, x));
+  svfloat32_t z = svsqrt_m (ax, a_gt_half, z2);
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  svfloat32_t p = svmla_x (pg, sv_f32 (d->c3), z2, d->c4);
+  p = svmad_x (pg, z2, p, d->c2);
+  p = svmad_x (pg, z2, p, d->c1);
+  p = svmad_x (pg, z2, p, d->c0);
+  /* Add 1/pi as final coeff.  */
+  p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p);
+  /* Finalize polynomial: z * P(z^2).  */
+  p = svmul_x (ptrue, z, p);
+
+  /* acospi(|x|)
+			  = 1/2 - sign(x) * Q(|x|), for       |x| < 0.5
+			  = 2 Q(|x|)              , for  0.5 < x < 1.0
+			  = 1 - 2 Q(|x|)          , for -1.0 < x < -0.5.  */
+  svfloat32_t y
+      = svreinterpret_f32 (svorr_x (ptrue, svreinterpret_u32 (p), sign));
+  svfloat32_t mul = svsel (a_gt_half, sv_f32 (2.0f), sv_f32 (-1.0f));
+  svfloat32_t add = svreinterpret_f32 (
+      svorr_x (ptrue, sign, svreinterpret_u32 (sv_f32 (d->half))));
+  add = svsub_m (a_gt_half, sv_f32 (d->half), add);
+
+  return svmad_x (pg, y, mul, add);
+}
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 38681a4..c202bda 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -19,10 +19,13 @@
 
 libmvec_hidden_proto (V_NAME_F1(acos));
 libmvec_hidden_proto (V_NAME_F1(acosh));
+libmvec_hidden_proto (V_NAME_F1(acospi));
 libmvec_hidden_proto (V_NAME_F1(asin));
 libmvec_hidden_proto (V_NAME_F1(asinh));
+libmvec_hidden_proto (V_NAME_F1(asinpi));
 libmvec_hidden_proto (V_NAME_F1(atan));
 libmvec_hidden_proto (V_NAME_F1(atanh));
+libmvec_hidden_proto (V_NAME_F1(atanpi));
 libmvec_hidden_proto (V_NAME_F1(cbrt));
 libmvec_hidden_proto (V_NAME_F1(cos));
 libmvec_hidden_proto (V_NAME_F1(cosh));
@@ -47,3 +50,4 @@ libmvec_hidden_proto (V_NAME_F1(tan));
 libmvec_hidden_proto (V_NAME_F1(tanh));
 libmvec_hidden_proto (V_NAME_F1(tanpi));
 libmvec_hidden_proto (V_NAME_F2(atan2));
+libmvec_hidden_proto (V_NAME_F2(atan2pi));
diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c
index 4142116..f74141c 100644
--- a/sysdeps/aarch64/fpu/asin_advsimd.c
+++ b/sysdeps/aarch64/fpu/asin_advsimd.c
@@ -18,24 +18,23 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
-  float64x2_t poly[12];
+  float64x2_t c0, c2, c4, c6, c8, c10;
   float64x2_t pi_over_2;
   uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
-	    V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
-	    V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
-	    V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
-	    V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
-	    V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
-  .pi_over_2 = V2 (0x1.921fb54442d18p+0),
-  .abs_mask = V2 (0x7fffffffffffffff),
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
@@ -68,8 +67,8 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
      asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
    The largest observed error in this region is 2.69 ulps,
-   _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				       want 0x1.110d7e85fdd53p-1.  */
+   _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1
+				       want 0x1.1111dd54ddf99p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -86,7 +85,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
     return special_case (x, x, AllMask);
 #endif
 
-  uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5));
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
@@ -99,7 +98,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
   float64x2_t z4 = vmulq_f64 (z2, z2);
   float64x2_t z8 = vmulq_f64 (z4, z4);
   float64x2_t z16 = vmulq_f64 (z8, z8);
-  float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+  /* order-11 estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
 
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c
index 9314466..975f408 100644
--- a/sysdeps/aarch64/fpu/asin_sve.c
+++ b/sysdeps/aarch64/fpu/asin_sve.c
@@ -18,45 +18,43 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[12];
-  float64_t pi_over_2f;
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
-  .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4,
-	    0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6,
-	    0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
-	    0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,
-	    0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6,
-	    -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
-  .pi_over_2f = 0x1.921fb54442d18p+0,
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
-#define P(i) sv_f64 (d->poly[i])
-
 /* Double-precision SVE implementation of vector asin(x).
 
    For |x| in [0, 0.5], use an order 11 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
-   The largest observed error in this region is 0.52 ulps,
-   _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2
-				      want 0x1.ec13757305f26p-2.
-
-   For |x| in [0.5, 1.0], use same approximation with a change of variable
+   The largest observed error in this region is 0.98 ulp:
+   _ZGVsMxv_asin (0x1.d98f6a748ed8ap-2) got 0x1.ec4eb661a73d3p-2
+				       want 0x1.ec4eb661a73d2p-2.
 
-     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+   For |x| in [0.5, 1.0], use same approximation with a change of variable:
+   asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
 
-   The largest observed error in this region is 2.69 ulps,
-   _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
-				      want 0x1.110d7e85fdd53p-1.  */
+   The largest observed error in this region is 2.66 ulp:
+   _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1
+				       want 0x1.10b9586f087abp-1.  */
 svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
 
   svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
   svfloat64_t ax = svabs_x (pg, x);
@@ -70,17 +68,37 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
 
   /* Use a single polynomial approximation P for both intervals.  */
+  svfloat64_t z3 = svmul_x (pg, z2, z);
   svfloat64_t z4 = svmul_x (pg, z2, z2);
   svfloat64_t z8 = svmul_x (pg, z4, z4);
-  svfloat64_t z16 = svmul_x (pg, z8, z8);
-  svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  /* Order-11 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
   /* Finalize polynomial: z + z * z2 * P(z2).  */
-  p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+  p = svmla_x (pg, z, z3, p);
 
-  /* asin(|x|) = Q(|x|)         , for |x| < 0.5
-	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
-  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f);
+  /* asin(|x|) = Q(|x|), for |x| <  0.5
+	    = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2);
 
-  /* Copy sign.  */
+  /* Reinsert the sign from the argument.  */
   return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
 }
diff --git a/sysdeps/aarch64/fpu/asinf_advsimd.c b/sysdeps/aarch64/fpu/asinf_advsimd.c
index 52c7c0e..013936c 100644
--- a/sysdeps/aarch64/fpu/asinf_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinf_advsimd.c
@@ -18,22 +18,21 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
 
 static const struct data
 {
-  float32x4_t poly[5];
+  float32x4_t c0, c2, c4;
+  float c1, c3;
   float32x4_t pi_over_2f;
 } data = {
   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
-  .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
-	    V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
-  .pi_over_2f = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (0x1.55555ep-3f), .c1 = 0x1.33261ap-4f,
+  .c2 = V4 (0x1.70d7dcp-5f), .c3 = 0x1.b059dp-6f,
+  .c4 = V4 (0x1.3af7d8p-5f), .pi_over_2f = V4 (0x1.921fb6p+0f),
 };
 
 #define AbsMask 0x7fffffff
-#define Half 0x3f000000
 #define One 0x3f800000
 #define Small 0x39800000 /* 2^-12.  */
 
@@ -47,11 +46,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
 /* Single-precision implementation of vector asin(x).
 
-   For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
-   rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
-   following approximation.
 
-   For |x| in [Small, 0.5], use order 4 polynomial P such that the final
+   For |x| <0.5, use order 4 polynomial P such that the final
    approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
 
     The largest observed error in this region is 0.83 ulps,
@@ -80,24 +76,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x)
 #endif
 
   float32x4_t ax = vreinterpretq_f32_u32 (ia);
-  uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half));
+  uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f));
 
   /* Evaluate polynomial Q(x) = y + y * z * P(z) with
      z = x ^ 2 and y = |x|            , if |x| < 0.5
      z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
   float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
-			      vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
   float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
 
   /* Use a single polynomial approximation P for both intervals.  */
-  float32x4_t p = v_horner_4_f32 (z2, d->poly);
+
+  /* PW Horner 3 evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t c13 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c13, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c13, 1);
+  float32x4_t p = vfmaq_f32 (p23, d->c4, z4);
+  p = vfmaq_f32 (p01, p, z4);
   /* Finalize polynomial: z + z * z2 * P(z2).  */
   p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
 
   /* asin(|x|) = Q(|x|)         , for |x| < 0.5
 	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
   float32x4_t y
-      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0));
+      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0f));
 
   /* Copy sign.  */
   return vbslq_f32 (v_u32 (AbsMask), y, x);
diff --git a/sysdeps/aarch64/fpu/asinpi_advsimd.c b/sysdeps/aarch64/fpu/asinpi_advsimd.c
new file mode 100644
index 0000000..b11f98b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpi_advsimd.c
@@ -0,0 +1,109 @@
+/* Double-Precision vector (Advanced SIMD) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64x2_t c0, c2, c4, c6, c8, c10;
+  float64x2_t pi_over_2, inv_pi;
+  uint64x2_t abs_mask;
+  double c1, c3, c5, c7, c9, c11;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+     on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
+  .c0 = V2 (0x1.555555555554ep-3),	  .c1 = 0x1.3333333337233p-4,
+  .c2 = V2 (0x1.6db6db67f6d9fp-5),	  .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = V2 (0x1.6e8b264d467d6p-6),	  .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = V2 (0x1.c86a22cd9389dp-7),	  .c7 = 0x1.856073c22ebbep-7,
+  .c8 = V2 (0x1.fd1151acb6bedp-8),	  .c9 = 0x1.087182f799c1dp-6,
+  .c10 = V2 (-0x1.6602748120927p-7),	  .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff),
+  .inv_pi = V2 (0x1.45f306dc9c883p-2),
+};
+
+/* Double-precision implementation of vector asinpi(x).
+
+   For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+   approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+   asinpi(x) = asin(x) * 1/pi.
+
+   The largest observed error in this region is 1.63 ulps,
+   _ZGVnN2v_asinpi (0x1.9125919fa617p-19) got 0x1.fec183497ea53p-21
+					 want 0x1.fec183497ea51p-21.
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+     asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.04 ulps,
+   _ZGVnN2v_asinpi (0x1.0479b7bd98553p-1) got 0x1.5beebec797326p-3
+					 want 0x1.5beebec797329p-3.  */
+
+float64x2_t VPCS_ATTR V_NAME_D1 (asinpi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+  float64x2_t ax = vabsq_f64 (x);
+
+  uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5));
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x),
+			      vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
+  float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  /* order-11 Estrin.  */
+  float64x2_t c13 = vld1q_f64 (&d->c1);
+  float64x2_t c57 = vld1q_f64 (&d->c5);
+  float64x2_t c911 = vld1q_f64 (&d->c9);
+
+  float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
+  float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
+
+  float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
+  float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
+
+  float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
+  float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
+
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
+  float64x2_t p = vfmaq_f64 (p07, z16, p811);
+
+  /* Finalize polynomial: z + z * z2 * P(z2).  */
+  p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+
+  /* asin(|x|) = Q(|x|)          , for |x| < 0.5
+	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  float64x2_t y = vbslq_f64 (a_lt_half, p, vfmsq_n_f64 (d->pi_over_2, p, 2.0));
+  /* asinpi(|x|) = asin(|x|) /pi.  */
+  y = vmulq_f64 (y, d->inv_pi);
+
+  /* Copy sign.  */
+  return vbslq_f64 (d->abs_mask, y, x);
+}
diff --git a/sysdeps/aarch64/fpu/asinpi_sve.c b/sysdeps/aarch64/fpu/asinpi_sve.c
new file mode 100644
index 0000000..71ef8ce
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpi_sve.c
@@ -0,0 +1,107 @@
+/* Double-Precision vector (SVE) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c1, c3, c5, c7, c9, c11;
+  float64_t c0, c2, c4, c6, c8, c10;
+  float64_t pi_over_2, inv_pi;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+     on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
+  .c0 = 0x1.555555555554ep-3,	     .c1 = 0x1.3333333337233p-4,
+  .c2 = 0x1.6db6db67f6d9fp-5,	     .c3 = 0x1.f1c71fbd29fbbp-6,
+  .c4 = 0x1.6e8b264d467d6p-6,	     .c5 = 0x1.1c5997c357e9dp-6,
+  .c6 = 0x1.c86a22cd9389dp-7,	     .c7 = 0x1.856073c22ebbep-7,
+  .c8 = 0x1.fd1151acb6bedp-8,	     .c9 = 0x1.087182f799c1dp-6,
+  .c10 = -0x1.6602748120927p-7,	     .c11 = 0x1.cfa0dd1f9478p-6,
+  .pi_over_2 = 0x1.921fb54442d18p+0, .inv_pi = 0x1.45f306dc9c883p-2,
+};
+
+/* Double-precision SVE implementation of vector asinpi(x).
+
+   For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+   approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+   The largest observed error in this region is 1.32 ulp:
+   _ZGVsMxv_asinpi (0x1.fc12356dbdefbp-2) got 0x1.5272e9658ba66p-3
+					 want 0x1.5272e9658ba64p-3
+
+   For |x| in [0.5, 1.0], use same approximation with a change of variable:
+  asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.48 ulp:
+   _ZGVsMxv_asinpi (0x1.03da0c2295424p-1) got 0x1.5b02b3dcafaefp-3
+					 want 0x1.5b02b3dcafaf2p-3.  */
+svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+  svfloat64_t ax = svabs_x (pg, x);
+  svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+			  svmul_x (ptrue, x, x));
+  svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
+
+  /* Use a single polynomial approximation P for both intervals.  */
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+
+  /* Order-11 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p411 = svmla_x (pg, p47, z8, p811);
+  svfloat64_t p = svmla_x (pg, p03, z8, p411);
+
+  /* Finalize polynomial: z + z3 * P(z2).  */
+  p = svmla_x (pg, z, z3, p);
+
+  /* asin(|x|) = Q(|x|)         , for |x| < 0.5
+	       = pi/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2);
+
+  /* Reinsert the sign from the argument.  */
+  svfloat64_t inv_pi = svreinterpret_f64 (
+      svorr_x (pg, svreinterpret_u64 (sv_f64 (d->inv_pi)), sign));
+
+  return svmul_x (pg, y, inv_pi);
+}
diff --git a/sysdeps/aarch64/fpu/asinpif_advsimd.c b/sysdeps/aarch64/fpu/asinpif_advsimd.c
new file mode 100644
index 0000000..1483ea8
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpif_advsimd.c
@@ -0,0 +1,95 @@
+/* Single-Precision vector (Advanced SIMD) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c0, c2, c4, inv_pi;
+  float c1, c3, c5, null;
+} data = {
+  /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2)
+     on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using
+     iterative approach for minimisation of relative error in Sollya file.  */
+  .c0 = V4 (0x1.b2995ep-5f),	 .c1 = 0x1.8724ep-6f,
+  .c2 = V4 (0x1.d1301ep-7f),	 .c3 = 0x1.446d3cp-7f,
+  .c4 = V4 (0x1.654848p-8f),	 .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = V4 (0x1.45f306p-2f),
+};
+
+#define AbsMask 0x7fffffff
+
+/* Single-precision implementation of vector asinpi(x).
+
+    For |x| < 0.5, use order 5 polynomial P such that the final
+   approximation is an odd polynomial: asinpif(x) ~ x/pi + x^3 P(x^2).
+
+    The largest observed error in this region is 1.68 ulps,
+      _ZGVnN4v_asinpif (0x1.86e514p-2) got 0x1.fea8c8p-4 want 0x1.fea8ccp-4.
+
+    For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+    asin(x) = pi/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.49 ulps,
+   _ZGVnN4v_asinpif(0x1.0d93fep-1) got 0x1.697aap-3 want 0x1.697a9ap-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asinpi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+  float32x4_t ax = vreinterpretq_f32_u32 (ia);
+  uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f));
+
+  /* Evaluate polynomial Q(x) = y/pi + y * z * P(z) with
+     z = x ^ 2 and y = |x|            , if |x| < 0.5
+     z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
+			      vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f));
+  float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
+
+  /* Use a single polynomial approximation P for both intervals.  */
+
+  /* Order-5 Estrin evaluation scheme.  */
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+  float32x4_t c135 = vld1q_f32 (&d->c1);
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1);
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2);
+  float32x4_t p = vfmaq_f32 (p03, z8, p45);
+  /* Add 1/pi as final coeff.  */
+  p = vfmaq_f32 (d->inv_pi, z2, p);
+
+  /* Finalize polynomial: z * P(z2).  */
+  p = vmulq_f32 (z, p);
+
+  /*  asinpi(|x|) = Q(|x|), for |x| < 0.5
+	       =  1/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  float32x4_t y
+      = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (v_f32 (0.5f), p, 2.0f));
+
+  /* Copy sign.  */
+  return vbslq_f32 (v_u32 (AbsMask), y, x);
+}
+libmvec_hidden_def (V_NAME_F1 (asinpi))
+HALF_WIDTH_ALIAS_F1 (asinpi)
diff --git a/sysdeps/aarch64/fpu/asinpif_sve.c b/sysdeps/aarch64/fpu/asinpif_sve.c
new file mode 100644
index 0000000..046b258
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinpif_sve.c
@@ -0,0 +1,88 @@
+/* Single-Precision vector (SVE) inverse sinpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c1, c3, c5;
+  float32_t c0, c2, c4, inv_pi;
+} data = {
+  /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
+    [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
+  .c0 = 0x1.b2995ep-5f,	    .c1 = 0x1.8724ep-6f,  .c2 = 0x1.d1301ep-7f,
+  .c3 = 0x1.446d3cp-7f,	    .c4 = 0x1.654848p-8f, .c5 = 0x1.5fdaa8p-7f,
+  .inv_pi = 0x1.45f306p-2f,
+};
+
+/* Single-precision SVE implementation of vector asin(x).
+
+   For |x| in [0, 0.5], use order 5 polynomial P such that the final
+   approximation is an odd polynomial: asinpi(x) ~ x/pi + x^3 P(x^2).
+
+    The largest observed error in this region is 1.96 ulps:
+    _ZGVsMxv_asinpif (0x1.8e534ep-3) got 0x1.fe6ab4p-5
+				    want 0x1.fe6ab8p-5.
+
+    For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+    asinpi(x) = 1/2 - (y + y * z * P(z)), with  z = (1-x)/2 and y = sqrt(z).
+
+   The largest observed error in this region is 3.46 ulps:
+   _ZGVsMxv_asinpif (0x1.0df892p-1) got 0x1.6a114cp-3
+				   want 0x1.6a1146p-3.  */
+svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+  /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+   z = x ^ 2 and y = |x|            , if |x| < 0.5
+   z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5.  */
+  svfloat32_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
+			  svmul_x (pg, x, x));
+  svfloat32_t z = svsqrt_m (ax, a_ge_half, z2);
+
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t c135_two = svld1rq (ptrue, &d->c1);
+
+  /* Order-5 Pairwise Horner evaluation scheme.  */
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, c135_two, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, c135_two, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, c135_two, 2);
+
+  svfloat32_t p25 = svmla_x (pg, p23, z4, p45);
+  svfloat32_t p = svmla_x (pg, p01, z4, p25);
+
+  /* Add 1/pi as final coeff.  */
+  p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p);
+  p = svmul_x (pg, p, z);
+
+  /*  asinpi(|x|) = Q(|x|), for |x| < 0.5
+	       =  1/2 - 2 Q(|x|), for |x| >= 0.5.  */
+  svfloat32_t y = svmsb_m (a_ge_half, p, sv_f32 (2.0), 0.5);
+
+  /* Reinsert sign from argument.  */
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c
index 00b4a4f..a31d52f 100644
--- a/sysdeps/aarch64/fpu/atan2_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan2_advsimd.c
@@ -19,40 +19,38 @@
 
 #include "math_config.h"
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
   float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
   float64x2_t pi_over_2;
-  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
-  uint64x2_t zeroinfnan, minustwo;
+  uint64x2_t zeroinfnan;
 } data = {
-  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
-	      [2**-1022, 1.0].  */
-  .c0 = V2 (-0x1.5555555555555p-2),
-  .c1 = 0x1.99999999996c1p-3,
-  .c2 = V2 (-0x1.2492492478f88p-3),
-  .c3 = 0x1.c71c71bc3951cp-4,
-  .c4 = V2 (-0x1.745d160a7e368p-4),
-  .c5 = 0x1.3b139b6a88ba1p-4,
-  .c6 = V2 (-0x1.11100ee084227p-4),
-  .c7 = 0x1.e1d0f9696f63bp-5,
-  .c8 = V2 (-0x1.aebfe7b418581p-5),
-  .c9 = 0x1.842dbe9b0d916p-5,
-  .c10 = V2 (-0x1.5d30140ae5e99p-5),
-  .c11 = 0x1.338e31eb2fbbcp-5,
-  .c12 = V2 (-0x1.00e6eece7de8p-5),
-  .c13 = 0x1.860897b29e5efp-6,
-  .c14 = V2 (-0x1.0051381722a59p-6),
-  .c15 = 0x1.14e9dc19a4a4ep-7,
-  .c16 = V2 (-0x1.d0062b42fe3bfp-9),
-  .c17 = 0x1.17739e210171ap-10,
-  .c18 = V2 (-0x1.ab24da7be7402p-13),
-  .c19 = 0x1.358851160a528p-16,
+  /* Coefficients of polynomial P such that
+     atan(x)~x+x*P(x^2) on [2^-1022, 1.0].  */
+  .c0 = V2 (-0x1.555555555552ap-2),
+  .c1 = 0x1.9999999995aebp-3,
+  .c2 = V2 (-0x1.24924923923f6p-3),
+  .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = V2 (-0x1.745d11fb3d32bp-4),
+  .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = V2 (-0x1.110e6d985f496p-4),
+  .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = V2 (-0x1.ae644e28058c3p-5),
+  .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = V2 (-0x1.59d7f901566cbp-5),
+  .c11 = 0x1.2c982855ab069p-5,
+  .c12 = V2 (-0x1.eb49592998177p-6),
+  .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = V2 (-0x1.ca980345c4204p-7),
+  .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = V2 (-0x1.7ea70755b8eccp-9),
+  .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = V2 (-0x1.44a4b059b6f67p-13),
+  .c19 = 0x1.c4a45029e5a91p-17,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
   .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
-  .minustwo = V2 (0xc000000000000000),
 };
 
 #define SignMask v_u64 (0x8000000000000000)
@@ -77,10 +75,9 @@ zeroinfnan (uint64x2_t i, const struct data *d)
 }
 
 /* Fast implementation of vector atan2.
-   Maximum observed error is 2.8 ulps:
-   _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
-	got 0x1.92d628ab678ccp-1
-       want 0x1.92d628ab678cfp-1.  */
+   Maximum observed error is 1.97 ulps:
+   _ZGVnN2vv_atan2 (0x1.42337dba73768p+5, 0x1.422d748cd3e29p+5)
+   got 0x1.9224810264efcp-1 want 0x1.9224810264efep-1.  */
 float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -101,26 +98,29 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
   uint64x2_t pred_xlt0 = vcltzq_f64 (x);
   uint64x2_t pred_aygtax = vcagtq_f64 (y, x);
 
-  /* Set up z for call to atan.  */
-  float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
-  float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax);
-  float64x2_t z = vdivq_f64 (n, q);
-
-  /* Work out the correct shift.  */
-  float64x2_t shift
-      = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo));
-  shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
-  shift = vmulq_f64 (shift, d->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+  /* Set up z for evaluation of atan.  */
+  float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0))));
+  float64x2_t shift2 = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (1.0))));
+  shift = vaddq_f64 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
   float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
+  float64x2_t z3 = vmulq_f64 (z2, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
 
   float64x2_t c13 = vld1q_f64 (&d->c1);
   float64x2_t c57 = vld1q_f64 (&d->c5);
@@ -128,45 +128,43 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
   float64x2_t c1315 = vld1q_f64 (&d->c13);
   float64x2_t c1719 = vld1q_f64 (&d->c17);
 
-  /* estrin_7.  */
+  /* Order-7 Estrin.  */
   float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
   float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
-  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
 
   float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
   float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
-  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
 
-  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
 
-  /* estrin_11.  */
+  /* Order-11 Estrin.  */
   float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
   float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
-  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
 
   float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
   float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
-  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+  float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415);
 
   float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
   float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
-  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+  float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819);
 
-  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
-  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+  float64x2_t p815 = vfmaq_f64 (p811, z8, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, z16, p1619);
 
-  float64x2_t ret = vfmaq_f64 (p07, p819, x8);
+  float64x2_t poly = vfmaq_f64 (p07, p819, z16);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
-  ret = vaddq_f64 (ret, shift);
+  float64x2_t ret = vfmaq_f64 (z, shift, d->pi_over_2);
+  ret = vfmaq_f64 (ret, z3, poly);
 
   if (__glibc_unlikely (v_any_u64 (special_cases)))
     return special_case (y, x, ret, sign_xy, special_cases);
 
   /* Account for the sign of x and y.  */
-  ret = vreinterpretq_f64_u64 (
+  return vreinterpretq_f64_u64 (
       veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
-
-  return ret;
 }
diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
index 163f613..9e2dd24 100644
--- a/sysdeps/aarch64/fpu/atan2_sve.c
+++ b/sysdeps/aarch64/fpu/atan2_sve.c
@@ -19,25 +19,25 @@
 
 #include "math_config.h"
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[20];
-  float64_t pi_over_2;
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
-  .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
-            0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-            -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
-            0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-            -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
-            0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-            -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
-  .pi_over_2 = 0x1.921fb54442d18p+0,
+  .c0 = -0x1.555555555552ap-2,	 .c1 = 0x1.9999999995aebp-3,
+  .c2 = -0x1.24924923923f6p-3,	 .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = -0x1.745d11fb3d32bp-4,	 .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = -0x1.110e6d985f496p-4,	 .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = -0x1.ae644e28058c3p-5,	 .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = -0x1.59d7f901566cbp-5,	 .c11 = 0x1.2c982855ab069p-5,
+  .c12 = -0x1.eb49592998177p-6,	 .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = -0x1.ca980345c4204p-7,	 .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = -0x1.7ea70755b8eccp-9,	 .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17,
 };
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
 static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
@@ -56,15 +56,17 @@ zeroinfnan (svuint64_t i, const svbool_t pg)
 }
 
 /* Fast implementation of SVE atan2. Errors are greatest when y and
-   x are reasonably close together. The greatest observed error is 2.28 ULP:
-   _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
-   got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1.  */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+   x are reasonably close together. The greatest observed error is 1.94 ULP:
+   _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5)
+   got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1.  */
+svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+				const svbool_t pg)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
 
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t iy = svreinterpret_u64 (y);
+  svbool_t ptrue = svptrue_b64 ();
 
   svbool_t cmp_x = zeroinfnan (ix, pg);
   svbool_t cmp_y = zeroinfnan (iy, pg);
@@ -81,32 +83,67 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
 
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
-  /* Set up z for call to atan.  */
-  svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
-  svfloat64_t d = svsel (pred_aygtax, ay, ax);
-  svfloat64_t z = svdiv_x (pg, n, d);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atan.  */
+  svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat64_t den = svsel (pred_aygtax, ay, ax);
+  svfloat64_t z = svdiv_x (pg, num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  svfloat64_t shift_mul = svreinterpret_f64 (
+      svorr_x (pg, sign_x, svreinterpret_u64 (sv_f64 (0x1.921fb54442d18p+0))));
   shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
-  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
-  shift = svmul_x (pg, shift, data_ptr->pi_over_2);
+  shift = svmla_x (pg, z, shift, shift_mul);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
   svfloat64_t z2 = svmul_x (pg, z, z);
-  svfloat64_t x2 = svmul_x (pg, z2, z2);
-  svfloat64_t x4 = svmul_x (pg, x2, x2);
-  svfloat64_t x8 = svmul_x (pg, x4, x4);
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+  svfloat64_t z16 = svmul_x (pg, z8, z8);
 
-  svfloat64_t ret = svmla_x (
-      pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly),
-      sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8);
+  /* Order-7 Estrin.  */
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
 
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat64_t z3 = svmul_x (pg, z2, z);
-  ret = svmla_x (pg, z, z3, ret);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, z8, p47);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+  svfloat64_t c1315 = svld1rq (ptrue, &d->c13);
+  svfloat64_t c1719 = svld1rq (ptrue, &d->c17);
 
-  ret = svadd_m (pg, ret, shift);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
+
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1);
+  svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415);
+
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0);
+  svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1);
+  svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819);
+
+  svfloat64_t p815 = svmla_x (pg, p811, z8, p1215);
+  svfloat64_t p819 = svmla_x (pg, p815, z16, p1619);
+
+  svfloat64_t poly = svmla_x (pg, p07, z16, p819);
+
+  /* y = shift + z + z^3 * P(z^2).  */
+  svfloat64_t ret = svmla_x (pg, shift, z3, poly);
 
   /* Account for the sign of x and y.  */
   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c
index e65406f..75d8738 100644
--- a/sysdeps/aarch64/fpu/atan2f_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c
@@ -18,22 +18,22 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f32.h"
 
 static const struct data
 {
-  float32x4_t c0, pi_over_2, c4, c6, c2;
+  float32x4_t c0, c4, c6, c2;
   float c1, c3, c5, c7;
   uint32x4_t comp_const;
+  float32x4_t pi;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .c0 = V4 (-0x1.55555p-2f),	    .c1 = 0x1.99935ep-3f,
-  .c2 = V4 (-0x1.24051ep-3f),	    .c3 = 0x1.bd7368p-4f,
-  .c4 = V4 (-0x1.491f0ep-4f),	    .c5 = 0x1.93a2c0p-5f,
-  .c6 = V4 (-0x1.4c3c60p-6f),	    .c7 = 0x1.01fd88p-8f,
-  .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1),
+  .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3,
+  .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4,
+  .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5,
+  .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9,
+  .pi = V4 (0x1.921fb6p+1f), .comp_const = V4 (2 * 0x7f800000lu - 1),
 };
 
 #define SignMask v_u32 (0x80000000)
@@ -54,13 +54,13 @@ static inline uint32x4_t
 zeroinfnan (uint32x4_t i, const struct data *d)
 {
   /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
-  return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const);
+  return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const);
 }
 
 /* Fast implementation of vector atan2f. Maximum observed error is
-   2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
-   _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
-						 want 0x1.967f00p-1.  */
+   2.13 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
+   _ZGVnN4vv_atan2f (0x1.14a9d4p-87, 0x1.0eb886p-87) got 0x1.97aea2p-1
+						    want 0x1.97ae9ep-1.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -81,28 +81,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
   uint32x4_t pred_xlt0 = vcltzq_f32 (x);
   uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
 
-  /* Set up z for call to atanf.  */
-  float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
-  float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax);
-  float32x4_t z = vdivq_f32 (n, q);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atanf.  */
+  float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
+  float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
-      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f))));
-  shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift);
-  shift = vmulq_f32 (shift, d->pi_over_2);
-
-  /* Calculate the polynomial approximation.
-     Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
-     a standard implementation using z8 creates spurious underflow
-     in the very last fma (when z^8 is small enough).
-     Therefore, we split the last fma into a mul and an fma.
-     Horner and single-level Estrin have higher errors that exceed
-     threshold.  */
+      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f))));
+  float32x4_t shift2 = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f))));
+  shift = vaddq_f32 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
   float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z2, z);
   float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
 
   float32x4_t c1357 = vld1q_f32 (&d->c1);
+
   float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0);
   float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1);
   float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2);
@@ -110,10 +113,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
   float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
   float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
 
-  float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47));
+  float32x4_t poly = vfmaq_f32 (p03, z8, p47);
 
   /* y = shift + z * P(z^2).  */
-  ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift);
+  float32x4_t ret = vfmaq_f32 (z, shift, d->pi);
+  ret = vfmaq_f32 (ret, z3, poly);
 
   if (__glibc_unlikely (v_any_u32 (special_cases)))
     {
diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
index 5f26e2a..4d93419 100644
--- a/sysdeps/aarch64/fpu/atan2f_sve.c
+++ b/sysdeps/aarch64/fpu/atan2f_sve.c
@@ -18,18 +18,18 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 static const struct data
 {
-  float32_t poly[8];
+  float32_t c0, c2, c4, c6;
+  float32_t c1, c3, c5, c7;
   float32_t pi_over_2;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].  */
-  .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
-  .pi_over_2 = 0x1.921fb6p+0f,
+  .c0 = -0x1.5554dcp-2, .c1 = 0x1.9978ecp-3,  .c2 = -0x1.230a94p-3,
+  .c3 = 0x1.b4debp-4,	.c4 = -0x1.3550dap-4, .c5 = 0x1.61eebp-5,
+  .c6 = -0x1.0c17d4p-6, .c7 = 0x1.7ea694p-9,  .pi_over_2 = 0x1.921fb6p+0f,
 };
 
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
@@ -51,12 +51,14 @@ zeroinfnan (svuint32_t i, const svbool_t pg)
 
 /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 *
    P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum
-   observed error is 2.95 ULP:
-   _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
-						 want 0x1.967f00p-1.  */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+   observed error is 2.21 ULP:
+   _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1
+						 want 0x1.95ed36p-1.  */
+svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+				const svbool_t pg)
 {
-  const struct data *data_ptr = ptr_barrier (&data);
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
 
   svuint32_t ix = svreinterpret_u32 (x);
   svuint32_t iy = svreinterpret_u32 (y);
@@ -76,29 +78,42 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
 
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
-  /* Set up z for call to atan.  */
-  svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
-  svfloat32_t d = svsel (pred_aygtax, ay, ax);
-  svfloat32_t z = svdiv_x (pg, n, d);
-
-  /* Work out the correct shift.  */
+  /* Set up z for evaluation of atanf.  */
+  svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat32_t den = svsel (pred_aygtax, ay, ax);
+  svfloat32_t z = svdiv_x (ptrue, num, den);
+
+  /* Work out the correct shift for atan2:
+     Multiplication by pi is done later.
+     -pi   when x < 0  and ax < ay
+     -pi/2 when x < 0  and ax > ay
+      0    when x >= 0 and ax < ay
+      pi/2 when x >= 0 and ax > ay.  */
   svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
   shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
   shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
-  shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
 
   /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z3 = svmul_x (pg, z2, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
 
-  svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly);
+  svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1);
 
-  /* ret = shift + z + z^3 * P(z^2).  */
-  svfloat32_t z3 = svmul_x (pg, z2, z);
-  ret = svmla_x (pg, z, z3, ret);
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3);
 
-  ret = svadd_m (pg, ret, shift);
+  svfloat32_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat32_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat32_t poly = svmla_x (pg, p03, z8, p47);
+
+  /* ret = shift + z + z^3 * P(z^2).  */
+  svfloat32_t ret = svmla_x (pg, z, shift, sv_f32 (d->pi_over_2));
+  ret = svmla_x (pg, ret, z3, poly);
 
   /* Account for the sign of x and y.  */
 
diff --git a/sysdeps/aarch64/fpu/atan2pi_advsimd.c b/sysdeps/aarch64/fpu/atan2pi_advsimd.c
new file mode 100644
index 0000000..3cf231b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pi_advsimd.c
@@ -0,0 +1,175 @@
+/* Double-Precision vector (Advanced SIMD) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64x2_t c0;
+  uint64x2_t zeroinfnan;
+  float64x2_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+	      [2**-1022, 1.0].  */
+  .c0 = V2 (0x1.45f306dc9c883p-2),
+  .c1 = V2 (-0x1.b2995e7b7ba4ap-4),
+  .c2 = 0x1.04c26be3d2c1p-4,
+  .c3 = V2 (-0x1.7483759c17ea1p-5),
+  .c4 = 0x1.21bb95c315d57p-5,
+  .c5 = V2 (-0x1.da1bdc3d453f3p-6),
+  .c6 = 0x1.912d20459b4bfp-6,
+  .c7 = V2 (-0x1.5bbd4545cad1fp-6),
+  .c8 = 0x1.331b83bec30a1p-6,
+  .c9 = V2 (-0x1.13d6457f44de3p-6),
+  .c10 = 0x1.f8e802974db94p-7,
+  .c11 = V2 (-0x1.d7e173ab04a1ap-7),
+  .c12 = 0x1.bdfa47d6a4f28p-7,
+  .c13 = V2 (-0x1.9ba78f3232ceep-7),
+  .c14 = 0x1.5e6044590ab4fp-7,
+  .c15 = V2 (-0x1.01ccfdeb9f77fp-7),
+  .c16 = 0x1.345cf0d4eb1c1p-8,
+  .c17 = V2 (-0x1.19e5f00f67e3ap-9),
+  .c18 = 0x1.6d3035ac7625bp-11,
+  .c19 = V2 (-0x1.286bb9ae4ed79p-13),
+  .c20 = 0x1.c37ec36da0e1ap-17,
+  .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+#define OneOverPi v_f64 (0x1.45f306dc9c883p-2)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls).  */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t y, float64x2_t x, float64x2_t ret,
+	      uint64x2_t sign_xy, uint64x2_t cmp)
+{
+  /* Account for the sign of x and y.  */
+  ret = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+
+  /* Since we have no scalar fallback for atan2pi,
+     we can instead make a call to atan2f and divide by pi.  */
+  ret = v_call2_f64 (atan2, y, x, ret, cmp);
+
+  /* Only divide the special cases by pi, and leave the rest unchanged.  */
+  return vbslq_f64 (cmp, vmulq_f64 (ret, OneOverPi), ret);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint64x2_t
+zeroinfnan (uint64x2_t i, const struct data *d)
+{
+  /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1).  */
+  return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan);
+}
+
+/* Fast implementation of vector atan2pi.
+   Maximum observed error is 3.04 ulps:
+   _ZGVnN2vv_atan2pi (0x1.1e0733532ce28p+5, 0x1.2d803379cca1fp+5)
+   got 0x1.eed60c1e89317p-3 want 0x1.eed60c1e89314p-3.  */
+float64x2_t VPCS_ATTR V_NAME_D2 (atan2pi) (float64x2_t y, float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t iy = vreinterpretq_u64_f64 (y);
+
+  uint64x2_t special_cases
+      = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint64x2_t sign_x = vandq_u64 (ix, SignMask);
+  uint64x2_t sign_y = vandq_u64 (iy, SignMask);
+  uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
+
+  float64x2_t ax = vabsq_f64 (x);
+  float64x2_t ay = vabsq_f64 (y);
+
+  uint64x2_t pred_xlt0 = vcltzq_f64 (x);
+  uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax);
+
+  /* Set up z for evaluation of atanpi.  */
+  float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+  float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax);
+  float64x2_t z = vdivq_f64 (num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-1.0))));
+  float64x2_t shift2 = vreinterpretq_f64_u64 (
+      vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (0.5))));
+  shift = vaddq_f64 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t z3 = vmulq_f64 (z2, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  float64x2_t c24 = vld1q_f64 (&d->c2);
+  float64x2_t c68 = vld1q_f64 (&d->c6);
+
+  /* Order-7 Estrin.  */
+  float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0);
+  float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1);
+  float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0);
+  float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1);
+
+  float64x2_t p14 = vfmaq_f64 (p12, z4, p34);
+  float64x2_t p58 = vfmaq_f64 (p56, z4, p78);
+  float64x2_t p18 = vfmaq_f64 (p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c1012 = vld1q_f64 (&d->c10);
+  float64x2_t c1416 = vld1q_f64 (&d->c14);
+  float64x2_t c1820 = vld1q_f64 (&d->c18);
+
+  float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0);
+  float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1);
+  float64x2_t p912 = vfmaq_f64 (p910, z4, p1112);
+
+  float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0);
+  float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1);
+  float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516);
+
+  float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0);
+  float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1);
+  float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920);
+
+  float64x2_t p916 = vfmaq_f64 (p912, z8, p1316);
+  float64x2_t p920 = vfmaq_f64 (p916, z16, p1720);
+
+  float64x2_t poly = vfmaq_f64 (p18, z16, p920);
+
+  /* y = shift + z * P(z^2).  */
+  float64x2_t ret = vfmaq_f64 (shift, z, d->c0);
+  ret = vfmaq_f64 (ret, z3, poly);
+
+  if (__glibc_unlikely (v_any_u64 (special_cases)))
+    return special_case (y, x, ret, sign_xy, special_cases);
+
+  /* Account for the sign of x and y.  */
+  return vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan2pi_sve.c b/sysdeps/aarch64/fpu/atan2pi_sve.c
new file mode 100644
index 0000000..f1d1f1c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pi_sve.c
@@ -0,0 +1,159 @@
+/* Double-Precision vector (SVE) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val;
+} data = {
+  /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on
+     [2^-1022, 1.0].  */
+  .c0 = 0x1.45f306dc9c883p-2,	.c1 = -0x1.b2995e7b7ba4ap-4,
+  .c2 = 0x1.04c26be3d2c1p-4,	.c3 = -0x1.7483759c17ea1p-5,
+  .c4 = 0x1.21bb95c315d57p-5,	.c5 = -0x1.da1bdc3d453f3p-6,
+  .c6 = 0x1.912d20459b4bfp-6,	.c7 = -0x1.5bbd4545cad1fp-6,
+  .c8 = 0x1.331b83bec30a1p-6,	.c9 = -0x1.13d6457f44de3p-6,
+  .c10 = 0x1.f8e802974db94p-7,	.c11 = -0x1.d7e173ab04a1ap-7,
+  .c12 = 0x1.bdfa47d6a4f28p-7,	.c13 = -0x1.9ba78f3232ceep-7,
+  .c14 = 0x1.5e6044590ab4fp-7,	.c15 = -0x1.01ccfdeb9f77fp-7,
+  .c16 = 0x1.345cf0d4eb1c1p-8,	.c17 = -0x1.19e5f00f67e3ap-9,
+  .c18 = 0x1.6d3035ac7625bp-11, .c19 = -0x1.286bb9ae4ed79p-13,
+  .c20 = 0x1.c37ec36da0e1ap-17, .shift_val = 0.5,
+};
+
+#define OneOverPi sv_f64 (0x1.45f306dc9c883p-2)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+static svfloat64_t NOINLINE
+special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
+	      const svbool_t cmp)
+{
+  ret = sv_call2_f64 (atan2, y, x, ret, cmp);
+  return svmul_f64_m (cmp, ret, OneOverPi);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+   of 0, infinity or nan.  */
+static inline svbool_t
+zeroinfnan (svuint64_t i, const svbool_t pg)
+{
+  return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+		  sv_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of SVE atan2pi.
+   Maximum observed error is 3.11 ulps:
+   _ZGVsMxvv_atan2pi (0x1.ef284a877f6b5p+6, 0x1.03fdde8242b17p+7)
+   got 0x1.f00f800163079p-3 want 0x1.f00f800163076p-3.  */
+svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
+				  const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b64 ();
+
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t iy = svreinterpret_u64 (y);
+
+  svbool_t cmp_x = zeroinfnan (ix, pg);
+  svbool_t cmp_y = zeroinfnan (iy, pg);
+  svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+  svfloat64_t ax = svabs_x (pg, x);
+  svfloat64_t ay = svabs_x (pg, y);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t iay = svreinterpret_u64 (ay);
+
+  svuint64_t sign_x = sveor_x (pg, ix, iax);
+  svuint64_t sign_y = sveor_x (pg, iy, iay);
+  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+  svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+  /* Set up z for evaluation of atanpi.  */
+  svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat64_t den = svsel (pred_aygtax, ay, ax);
+  svfloat64_t z = svdiv_x (pg, num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  shift = svmul_x (ptrue, shift, sv_f64 (d->shift_val));
+  shift = svsel (pred_aygtax, sv_f64 (d->shift_val), shift);
+  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
+
+  /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
+  svfloat64_t z2 = svmul_x (pg, z, z);
+  svfloat64_t z3 = svmul_x (pg, z2, z);
+  svfloat64_t z4 = svmul_x (pg, z2, z2);
+  svfloat64_t z8 = svmul_x (pg, z4, z4);
+  svfloat64_t z16 = svmul_x (pg, z8, z8);
+
+  /* Order-7 Estrin.  */
+  svfloat64_t c24 = svld1rq (ptrue, &d->c2);
+  svfloat64_t c68 = svld1rq (ptrue, &d->c6);
+
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1);
+  svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0);
+  svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1);
+
+  svfloat64_t p14 = svmla_x (pg, p12, z4, p34);
+  svfloat64_t p58 = svmla_x (pg, p56, z4, p78);
+  svfloat64_t p18 = svmla_x (pg, p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c1012 = svld1rq (ptrue, &d->c10);
+  svfloat64_t c1416 = svld1rq (ptrue, &d->c14);
+  svfloat64_t c1820 = svld1rq (ptrue, &d->c18);
+
+  svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0);
+  svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1);
+  svfloat64_t p912 = svmla_x (pg, p910, z4, p1112);
+
+  svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0);
+  svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1);
+  svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516);
+
+  svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0);
+  svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1);
+  svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920);
+
+  svfloat64_t p916 = svmla_x (pg, p912, z8, p1316);
+  svfloat64_t p920 = svmla_x (pg, p916, z16, p1720);
+
+  svfloat64_t poly = svmla_x (pg, p18, z16, p920);
+
+  svfloat64_t ret = svmla_x (pg, shift, z, sv_f64 (d->c0));
+  ret = svmla_x (pg, ret, z3, poly);
+
+  /* Account for the sign of x and y.  */
+  if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+    return special_case (
+	y, x,
+	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
+	cmp_xy);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan2pif_advsimd.c b/sysdeps/aarch64/fpu/atan2pif_advsimd.c
new file mode 100644
index 0000000..f1f542b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pif_advsimd.c
@@ -0,0 +1,138 @@
+/* Single-Precision vector (Advanced SIMD) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  float32x4_t c1, c3, c5, c7;
+  float c2, c4, c6, c8;
+  float32x4_t c0;
+  uint32x4_t comp_const;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+     [2^-128, 1.0].
+     Generated using fpminimax between FLT_MIN and 1.  */
+  .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4),
+  .c2 = 0x1.0490e4p-4,	    .c3 = V4 (-0x1.70c272p-5),
+  .c4 = 0x1.0eef52p-5,	    .c5 = V4 (-0x1.6abbbap-6),
+  .c6 = 0x1.78157p-7,	    .c7 = V4 (-0x1.f0b406p-9),
+  .c8 = 0x1.2ae7fep-11,	    .comp_const = V4 (2 * 0x7f800000lu - 1),
+};
+
+#define SignMask v_u32 (0x80000000)
+#define OneOverPi v_f32 (0x1.45f307p-2)
+
+/* Special cases i.e. 0, infinity and nan (fall back to scalar calls).  */
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t y, float32x4_t x, float32x4_t ret,
+	      uint32x4_t sign_xy, uint32x4_t cmp)
+{
+  /* Account for the sign of y.  */
+  ret = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
+
+  /* Since we have no scalar fallback for atan2pif,
+     we can instead make a call to atan2f and divide by pi.  */
+  ret = v_call2_f32 (atan2f, y, x, ret, cmp);
+
+  /* Only divide the special cases by pi, and leave the rest unchanged.  */
+  return vbslq_f32 (cmp, vmulq_f32 (ret, OneOverPi), ret);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
+static inline uint32x4_t
+zeroinfnan (uint32x4_t i, const struct data *d)
+{
+  /* 2 * i - 1 >= 2 * 0x7f800000lu - 1.  */
+  return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const);
+}
+
+/* Fast implementation of vector atan2f. Maximum observed error is 2.89 ULP:
+   _ZGVnN4vv_atan2pif (0x1.bd397p+54, 0x1.e79a4ap+54) got 0x1.e2678ep-3
+						     want 0x1.e26794p-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2pi) (float32x4_t y,
+						    float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t iy = vreinterpretq_u32_f32 (y);
+
+  uint32x4_t special_cases
+      = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d));
+
+  uint32x4_t sign_x = vandq_u32 (ix, SignMask);
+  uint32x4_t sign_y = vandq_u32 (iy, SignMask);
+  uint32x4_t sign_xy = veorq_u32 (sign_x, sign_y);
+
+  float32x4_t ax = vabsq_f32 (x);
+  float32x4_t ay = vabsq_f32 (y);
+
+  uint32x4_t pred_xlt0 = vcltzq_f32 (x);
+  uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
+
+  /* Set up z for evaluation of atanpif.  */
+  float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
+  float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax);
+  float32x4_t z = vdivq_f32 (num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  float32x4_t shift = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f))));
+  float32x4_t shift2 = vreinterpretq_f32_u32 (
+      vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f))));
+  shift = vaddq_f32 (shift, shift2);
+
+  /* Calculate the polynomial approximation.  */
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z2, z);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  float32x4_t c2468 = vld1q_f32 (&d->c2);
+
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, c2468, 0);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, c2468, 1);
+  float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, c2468, 2);
+  float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, c2468, 3);
+  float32x4_t p14 = vfmaq_f32 (p12, z4, p34);
+  float32x4_t p58 = vfmaq_f32 (p56, z4, p78);
+
+  float32x4_t poly = vfmaq_f32 (p14, z8, p58);
+
+  /* y = shift + z * P(z^2).  */
+  float32x4_t ret = vfmaq_f32 (shift, z, d->c0);
+  ret = vfmaq_f32 (ret, z3, poly);
+
+  if (__glibc_unlikely (v_any_u32 (special_cases)))
+    {
+      return special_case (y, x, ret, sign_xy, special_cases);
+    }
+
+  /* Account for the sign of y.  */
+  return vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
+}
+libmvec_hidden_def (V_NAME_F2 (atan2pi))
+HALF_WIDTH_ALIAS_F2 (atan2pi)
diff --git a/sysdeps/aarch64/fpu/atan2pif_sve.c b/sysdeps/aarch64/fpu/atan2pif_sve.c
new file mode 100644
index 0000000..d5ac4b7
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atan2pif_sve.c
@@ -0,0 +1,137 @@
+/* Single-Precision vector (SVE) inverse tan2pi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c0, c1, c3, c5, c7;
+  float32_t c2, c4, c6, c8;
+  float32_t shift_val;
+  uint32_t comp_const;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+     [2**-128, 1.0].  */
+  .c0 = 0x1.45f306p-2,
+  .c1 = -0x1.b2975ep-4,
+  .c2 = 0x1.0490e4p-4,
+  .c3 = -0x1.70c272p-5,
+  .c4 = 0x1.0eef52p-5,
+  .c5 = -0x1.6abbbap-6,
+  .c6 = 0x1.78157p-7,
+  .c7 = -0x1.f0b406p-9,
+  .c8 = 0x1.2ae7fep-11,
+  .shift_val = 0.5f,
+  .comp_const = 2 * 0x7f800000lu - 1,
+};
+
+#define OneOverPi sv_f32 (0x1.45f307p-2)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
+static svfloat32_t NOINLINE
+special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
+	      const svbool_t cmp)
+{
+  ret = sv_call2_f32 (atan2f, y, x, ret, cmp);
+  return svmul_f32_x (cmp, ret, OneOverPi);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+   of 0, infinity or nan.  */
+static inline svbool_t
+zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
+{
+  return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+		  sv_u32 (d->comp_const));
+}
+
+/* Fast implementation of SVE atan2pif based on atan(x) ~ shift + z + z^3 *
+   P(z^2) with reduction to [0,1] using z=1/x and shift = 1/2. Maximum
+   observed error is 2.90 ULP:
+   _ZGVsMxvv_atan2pif (0x1.a28542p+5, 0x1.adb7c6p+5) got 0x1.f76524p-3
+						    want 0x1.f7651ep-3.  */
+svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
+				  const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  svuint32_t ix = svreinterpret_u32 (x);
+  svuint32_t iy = svreinterpret_u32 (y);
+
+  svbool_t cmp_x = zeroinfnan (ix, pg, d);
+  svbool_t cmp_y = zeroinfnan (iy, pg, d);
+  svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+  svfloat32_t ax = svabs_x (pg, x);
+  svfloat32_t ay = svabs_x (pg, y);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t iay = svreinterpret_u32 (ay);
+
+  svuint32_t sign_x = sveor_x (pg, ix, iax);
+  svuint32_t sign_y = sveor_x (pg, iy, iay);
+  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+  svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+  /* Set up z for evaluation of atanpif.  */
+  svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+  svfloat32_t den = svsel (pred_aygtax, ay, ax);
+  svfloat32_t z = svdiv_x (ptrue, num, den);
+
+  /* Work out the correct shift for atan2pi:
+     -1.0 when x < 0  and ax < ay
+     -0.5 when x < 0  and ax > ay
+      0   when x >= 0 and ax < ay
+      0.5 when x >= 0 and ax > ay.  */
+  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
+  shift = svmul_x (ptrue, shift, sv_f32 (d->shift_val));
+  shift = svsel (pred_aygtax, sv_f32 (d->shift_val), shift);
+  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
+
+  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
+  svfloat32_t z2 = svmul_x (pg, z, z);
+  svfloat32_t z4 = svmul_x (pg, z2, z2);
+  svfloat32_t z8 = svmul_x (pg, z4, z4);
+
+  svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2);
+
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1);
+  svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2);
+  svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3);
+
+  svfloat32_t p14 = svmad_x (pg, z4, p34, p12);
+  svfloat32_t p58 = svmad_x (pg, z4, p78, p56);
+
+  svfloat32_t p18 = svmad_x (pg, z8, p58, p14);
+
+  /* ret = shift + z + z^3 * P(z^2).  */
+  svfloat32_t poly = svmad_x (pg, z2, p18, d->c0);
+  svfloat32_t ret = svmad_x (pg, poly, z, shift);
+
+  if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
+    return special_case (
+	y, x,
+	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
+	cmp_xy);
+
+  /* Account for the sign of x and y.  */
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
+}
diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c
index f024fd1..da0d371 100644
--- a/sysdeps/aarch64/fpu/atan_advsimd.c
+++ b/sysdeps/aarch64/fpu/atan_advsimd.c
@@ -18,7 +18,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "v_math.h"
-#include "poly_advsimd_f64.h"
 
 static const struct data
 {
@@ -28,16 +27,16 @@ static const struct data
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
 	      [2**-1022, 1.0].  */
-  .c0 = V2 (-0x1.5555555555555p-2),	  .c1 = 0x1.99999999996c1p-3,
-  .c2 = V2 (-0x1.2492492478f88p-3),	  .c3 = 0x1.c71c71bc3951cp-4,
-  .c4 = V2 (-0x1.745d160a7e368p-4),	  .c5 = 0x1.3b139b6a88ba1p-4,
-  .c6 = V2 (-0x1.11100ee084227p-4),	  .c7 = 0x1.e1d0f9696f63bp-5,
-  .c8 = V2 (-0x1.aebfe7b418581p-5),	  .c9 = 0x1.842dbe9b0d916p-5,
-  .c10 = V2 (-0x1.5d30140ae5e99p-5),	  .c11 = 0x1.338e31eb2fbbcp-5,
-  .c12 = V2 (-0x1.00e6eece7de8p-5),	  .c13 = 0x1.860897b29e5efp-6,
-  .c14 = V2 (-0x1.0051381722a59p-6),	  .c15 = 0x1.14e9dc19a4a4ep-7,
-  .c16 = V2 (-0x1.d0062b42fe3bfp-9),	  .c17 = 0x1.17739e210171ap-10,
-  .c18 = V2 (-0x1.ab24da7be7402p-13),	  .c19 = 0x1.358851160a528p-16,
+  .c0 = V2 (-0x1.555555555552ap-2),	  .c1 = 0x1.9999999995aebp-3,
+  .c2 = V2 (-0x1.24924923923f6p-3),	  .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = V2 (-0x1.745d11fb3d32bp-4),	  .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = V2 (-0x1.110e6d985f496p-4),	  .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = V2 (-0x1.ae644e28058c3p-5),	  .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = V2 (-0x1.59d7f901566cbp-5),	  .c11 = 0x1.2c982855ab069p-5,
+  .c12 = V2 (-0x1.eb49592998177p-6),	  .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = V2 (-0x1.ca980345c4204p-7),	  .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = V2 (-0x1.7ea70755b8eccp-9),	  .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = V2 (-0x1.44a4b059b6f67p-13),	  .c19 = 0x1.c4a45029e5a91p-17,
   .pi_over_2 = V2 (0x1.921fb54442d18p+0),
 };
 
@@ -47,9 +46,9 @@ static const struct data
 
 /* Fast implementation of vector atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
-   z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
-   _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
-				       want 0x1.9225645bdd7c3p-1.  */
+   z=1/x and shift = pi/2. Maximum observed error is 2.45 ulps:
+   _ZGVnN2v_atan (0x1.0008d737eb3e6p+0) got 0x1.92288c551a4c1p-1
+				       want 0x1.92288c551a4c3p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -78,59 +77,53 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0));
+  uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0));
   /* Avoid dependency in abs(x) in division (and comparison).  */
-  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x);
+  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x);
+
   float64x2_t shift = vreinterpretq_f64_u64 (
       vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2)));
-  /* Use absolute value only when needed (odd powers of z).  */
-  float64x2_t az = vbslq_f64 (
-      SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z);
-
-  /* Calculate the polynomial approximation.
-     Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
-     full scheme to avoid underflow in x^16.
-     The order 19 polynomial P approximates
-     (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (shift), sign));
+
+  /* Calculate polynomial approximation P(z^2) with deg(P)=19.  */
   float64x2_t z2 = vmulq_f64 (z, z);
-  float64x2_t x2 = vmulq_f64 (z2, z2);
-  float64x2_t x4 = vmulq_f64 (x2, x2);
-  float64x2_t x8 = vmulq_f64 (x4, x4);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
 
-  /* estrin_7.  */
+  /* Order-7 Estrin.  */
   float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0);
   float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1);
-  float64x2_t p03 = vfmaq_f64 (p01, x2, p23);
+  float64x2_t p03 = vfmaq_f64 (p01, z4, p23);
 
   float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0);
   float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1);
-  float64x2_t p47 = vfmaq_f64 (p45, x2, p67);
+  float64x2_t p47 = vfmaq_f64 (p45, z4, p67);
 
-  float64x2_t p07 = vfmaq_f64 (p03, x4, p47);
+  float64x2_t p07 = vfmaq_f64 (p03, z8, p47);
 
-  /* estrin_11.  */
+  /* Order-11 Estrin.  */
   float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0);
   float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1);
-  float64x2_t p811 = vfmaq_f64 (p89, x2, p1011);
+  float64x2_t p811 = vfmaq_f64 (p89, z4, p1011);
 
   float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0);
   float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1);
-  float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415);
+  float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415);
 
   float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0);
   float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1);
-  float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819);
+  float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819);
 
-  float64x2_t p815 = vfmaq_f64 (p811, x4, p1215);
-  float64x2_t p819 = vfmaq_f64 (p815, x8, p1619);
+  float64x2_t p815 = vfmaq_f64 (p811, z8, p1215);
+  float64x2_t p819 = vfmaq_f64 (p815, z16, p1619);
 
-  float64x2_t y = vfmaq_f64 (p07, p819, x8);
+  float64x2_t y = vfmaq_f64 (p07, p819, z16);
 
   /* Finalize. y = shift + z + z^3 * P(z^2).  */
-  y = vfmaq_f64 (az, y, vmulq_f64 (z2, az));
-  y = vaddq_f64 (y, shift);
-
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign));
-  return y;
+  y = vfmsq_f64 (v_f64 (-1.0), z2, y);
+  return vfmsq_f64 (shift, z, y);
 }
diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c
index 3880ced..a6b0489 100644
--- a/sysdeps/aarch64/fpu/atan_sve.c
+++ b/sysdeps/aarch64/fpu/atan_sve.c
@@ -18,23 +18,26 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[20];
-  float64_t pi_over_2;
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val, neg_one;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-1022, 1.0].  */
-  .poly = { -0x1.5555555555555p-2,  0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
-            0x1.c71c71bc3951cp-4,   -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
-            -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
-            0x1.842dbe9b0d916p-5,   -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
-            -0x1.00e6eece7de8p-5,   0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
-            0x1.14e9dc19a4a4ep-7,  -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
-            -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
-  .pi_over_2 = 0x1.921fb54442d18p+0,
+  .c0 = -0x1.555555555552ap-2,	     .c1 = 0x1.9999999995aebp-3,
+  .c2 = -0x1.24924923923f6p-3,	     .c3 = 0x1.c71c7184288a2p-4,
+  .c4 = -0x1.745d11fb3d32bp-4,	     .c5 = 0x1.3b136a18051b9p-4,
+  .c6 = -0x1.110e6d985f496p-4,	     .c7 = 0x1.e1bcf7f08801dp-5,
+  .c8 = -0x1.ae644e28058c3p-5,	     .c9 = 0x1.82eeb1fed85c6p-5,
+  .c10 = -0x1.59d7f901566cbp-5,	     .c11 = 0x1.2c982855ab069p-5,
+  .c12 = -0x1.eb49592998177p-6,	     .c13 = 0x1.69d8b396e3d38p-6,
+  .c14 = -0x1.ca980345c4204p-7,	     .c15 = 0x1.dc050eafde0b3p-8,
+  .c16 = -0x1.7ea70755b8eccp-9,	     .c17 = 0x1.ba3da3de903e8p-11,
+  .c18 = -0x1.44a4b059b6f67p-13,     .c19 = 0x1.c4a45029e5a91p-17,
+  .shift_val = 0x1.490fdaa22168cp+1, .neg_one = -1,
 };
 
 /* Useful constants.  */
@@ -43,15 +46,14 @@ static const struct data
 /* Fast implementation of SVE atan.
    Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
-   error is 2.27 ulps:
-   _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
-				       want 0x1.9225645bdd7c3p-1.  */
+   error is 2.08 ulps:
+   _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1
+				       want 0x1.922a3163e15c4p-1.  */
 svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* No need to trigger special case. Small cases, infs and nans
-     are supported by our approximation technique.  */
+  svbool_t ptrue = svptrue_b64 ();
   svuint64_t ix = svreinterpret_u64 (x);
   svuint64_t sign = svand_x (pg, ix, SignMask);
 
@@ -59,32 +61,60 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
      y := arctan(x) for x < 1
      y := pi/2 + arctan(-1/x) for x > 1
      Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  svbool_t red = svacgt (pg, x, 1.0);
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x);
-  /* Use absolute value only when needed (odd powers of z).  */
-  svfloat64_t az = svabs_x (pg, z);
-  az = svneg_m (az, red, az);
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat64_t shift
+      = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign));
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
-  svfloat64_t z2 = svmul_x (pg, z, z);
-  svfloat64_t x2 = svmul_x (pg, z2, z2);
-  svfloat64_t x4 = svmul_x (pg, x2, x2);
-  svfloat64_t x8 = svmul_x (pg, x4, x4);
+  svfloat64_t z2 = svmul_x (ptrue, z, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+  svfloat64_t z16 = svmul_x (ptrue, z8, z8);
 
-  svfloat64_t y
-      = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly),
-		 sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8);
+  /* Order-7 Estrin.  */
+  svfloat64_t c13 = svld1rq (ptrue, &d->c1);
+  svfloat64_t c57 = svld1rq (ptrue, &d->c5);
 
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat64_t z3 = svmul_x (pg, z2, az);
-  y = svmla_x (pg, az, z3, y);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, z4, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, z8, p47);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c911 = svld1rq (ptrue, &d->c9);
+  svfloat64_t c1315 = svld1rq (ptrue, &d->c13);
+  svfloat64_t c1719 = svld1rq (ptrue, &d->c17);
 
-  /* Apply shift as indicated by `red` predicate.  */
-  y = svadd_m (red, y, d->pi_over_2);
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1);
+  svfloat64_t p811 = svmla_x (pg, p89, z4, p1011);
 
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1);
+  svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415);
 
-  return y;
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0);
+  svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1);
+  svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819);
+
+  svfloat64_t p815 = svmla_x (pg, p811, z8, p1215);
+  svfloat64_t p819 = svmla_x (pg, p815, z16, p1619);
+
+  svfloat64_t y = svmla_x (pg, p07, z16, p819);
+
+  /* y = shift + z + z^3 * P(z^2).  */
+  shift = svadd_m (red, z, shift);
+  y = svmul_x (pg, z2, y);
+  return svmla_x (pg, shift, z, y);
 }
diff --git a/sysdeps/aarch64/fpu/atanf_advsimd.c b/sysdeps/aarch64/fpu/atanf_advsimd.c
index 472865e..817a47e 100644
--- a/sysdeps/aarch64/fpu/atanf_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanf_advsimd.c
@@ -22,26 +22,35 @@
 
 static const struct data
 {
+  uint32x4_t sign_mask, pi_over_2;
+  float32x4_t neg_one;
+#if WANT_SIMD_EXCEPT
   float32x4_t poly[8];
-  float32x4_t pi_over_2;
+} data = {
+  .poly = { V4 (-0x1.5554dcp-2), V4 (0x1.9978ecp-3), V4 (-0x1.230a94p-3),
+	    V4 (0x1.b4debp-4), V4 (-0x1.3550dap-4), V4 (0x1.61eebp-5),
+	    V4 (-0x1.0c17d4p-6), V4 (0x1.7ea694p-9) },
+#else
+  float32x4_t c0, c2, c4, c6;
+  float c1, c3, c5, c7;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
      [2**-128, 1.0].
      Generated using fpminimax between FLT_MIN and 1.  */
-  .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
-	    V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
-	    V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
-  .pi_over_2 = V4 (0x1.921fb6p+0f),
+  .c0 = V4 (-0x1.5554dcp-2),	.c1 = 0x1.9978ecp-3,
+  .c2 = V4 (-0x1.230a94p-3),	.c3 = 0x1.b4debp-4,
+  .c4 = V4 (-0x1.3550dap-4),	.c5 = 0x1.61eebp-5,
+  .c6 = V4 (-0x1.0c17d4p-6),	.c7 = 0x1.7ea694p-9,
+#endif
+  .pi_over_2 = V4 (0x3fc90fdb),
+  .neg_one = V4 (-1.0f),
+  .sign_mask = V4 (0x80000000),
 };
 
-#define SignMask v_u32 (0x80000000)
-
-#define P(i) d->poly[i]
-
+#if WANT_SIMD_EXCEPT
 #define TinyBound 0x30800000 /* asuint(0x1p-30).  */
 #define BigBound 0x4e800000  /* asuint(0x1p30).  */
 
-#if WANT_SIMD_EXCEPT
 static float32x4_t VPCS_ATTR NOINLINE
 special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 {
@@ -51,19 +60,20 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
 
 /* Fast implementation of vector atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
-   using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
-   _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1.  */
+   using z=-1/x and shift = pi/2. Maximum observed error is 2.02 ulps:
+   _ZGVnN4v_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
+				 want 0x1.95ed36p-1.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Small cases, infs and nans are supported by our approximation technique,
-     but do not set fenv flags correctly. Only trigger special case if we need
-     fenv.  */
   uint32x4_t ix = vreinterpretq_u32_f32 (x);
-  uint32x4_t sign = vandq_u32 (ix, SignMask);
+  uint32x4_t sign = vandq_u32 (ix, d->sign_mask);
 
 #if WANT_SIMD_EXCEPT
+  /* Small cases, infs and nans are supported by our approximation technique,
+     but do not set fenv flags correctly. Only trigger special case if we need
+     fenv.  */
   uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000));
   uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)),
 				  v_u32 (BigBound - TinyBound));
@@ -71,41 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x)
   if (__glibc_unlikely (v_any_u32 (special)))
     return special_case (x, x, v_u32 (-1));
 #endif
-
   /* Argument reduction:
-     y := arctan(x) for x < 1
-     y := pi/2 + arctan(-1/x) for x > 1
-     Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0));
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x);
+     y := arctan(x) for |x| < 1
+     y := arctan(-1/x) + pi/2 for x > +1
+     y := arctan(-1/x) - pi/2 for x < -1
+     Hence, use z=-1/a if x>=|-1|, otherwise z=a.  */
+  uint32x4_t red = vcagtq_f32 (x, d->neg_one);
+
+  float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x);
+
+  /* Shift is calculated as +-pi/2 or 0, depending on the argument case.  */
   float32x4_t shift = vreinterpretq_f32_u32 (
-      vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2)));
-  /* Use absolute value only when needed (odd powers of z).  */
-  float32x4_t az = vbslq_f32 (
-      SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z);
+      vandq_u32 (red, veorq_u32 (d->pi_over_2, sign)));
+
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z3 = vmulq_f32 (z, z2);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+#if WANT_SIMD_EXCEPT
 
   /* Calculate the polynomial approximation.
      Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
      a standard implementation using z8 creates spurious underflow
      in the very last fma (when z^8 is small enough).
-     Therefore, we split the last fma into a mul and an fma.
-     Horner and single-level Estrin have higher errors that exceed
-     threshold.  */
-  float32x4_t z2 = vmulq_f32 (z, z);
-  float32x4_t z4 = vmulq_f32 (z2, z2);
-
+     Therefore, we split the last fma into a mul and an fma.  */
   float32x4_t y = vfmaq_f32 (
       v_pairwise_poly_3_f32 (z2, z4, d->poly), z4,
       vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4)));
 
-  /* y = shift + z * P(z^2).  */
-  y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift);
+#else
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  /* Uses an Estrin scheme for polynomial approximation.  */
+  float32x4_t odd_coeffs = vld1q_f32 (&d->c1);
+
+  float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, odd_coeffs, 0);
+  float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, odd_coeffs, 1);
+  float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, odd_coeffs, 2);
+  float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, odd_coeffs, 3);
 
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign));
+  float32x4_t p03 = vfmaq_f32 (p01, z4, p23);
+  float32x4_t p47 = vfmaq_f32 (p45, z4, p67);
 
-  return y;
+  float32x4_t y = vfmaq_f32 (p03, z8, p47);
+#endif
+
+  /* y = shift + z * P(z^2).  */
+  return vfmaq_f32 (vaddq_f32 (shift, z), z3, y);
 }
 libmvec_hidden_def (V_NAME_F1 (atan))
 HALF_WIDTH_ALIAS_F1 (atan)
diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c
index 3a98d70..6558223 100644
--- a/sysdeps/aarch64/fpu/atanf_sve.c
+++ b/sysdeps/aarch64/fpu/atanf_sve.c
@@ -18,18 +18,26 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 static const struct data
 {
-  float32_t poly[8];
-  float32_t pi_over_2;
+  float32_t c1, c3, c5, c7;
+  float32_t c0, c2, c4, c6;
+  float32_t shift_val, neg_one;
 } data = {
   /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
     [2**-128, 1.0].  */
-  .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
-	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
-  .pi_over_2 = 0x1.921fb6p+0f,
+  .c0 = -0x1.5554dcp-2,
+  .c1 = 0x1.9978ecp-3,
+  .c2 = -0x1.230a94p-3,
+  .c3 = 0x1.b4debp-4,
+  .c4 = -0x1.3550dap-4,
+  .c5 = 0x1.61eebp-5,
+  .c6 = -0x1.0c17d4p-6,
+  .c7 = 0x1.7ea694p-9,
+  /*  pi/2, used as a shift value after reduction.  */
+  .shift_val = 0x1.921fb54442d18p+0,
+  .neg_one = -1.0f,
 };
 
 #define SignMask (0x80000000)
@@ -37,43 +45,49 @@ static const struct data
 /* Fast implementation of SVE atanf based on
    atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
    z=-1/x and shift = pi/2.
-   Largest observed error is 2.9 ULP, close to +/-1.0:
-   _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1
-				 want -0x1.967fp-1.  */
+   Largest observed error is 2.12 ULP:
+   _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
+				 want 0x1.95ed36p-1.  */
 svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
 
   /* No need to trigger special case. Small cases, infs and nans
      are supported by our approximation technique.  */
   svuint32_t ix = svreinterpret_u32 (x);
-  svuint32_t sign = svand_x (pg, ix, SignMask);
+  svuint32_t sign = svand_x (ptrue, ix, SignMask);
 
   /* Argument reduction:
      y := arctan(x) for x < 1
-     y := pi/2 + arctan(-1/x) for x > 1
-     Hence, use z=-1/a if x>=1, otherwise z=a.  */
-  svbool_t red = svacgt (pg, x, 1.0f);
-  /* Avoid dependency in abs(x) in division (and comparison).  */
-  svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x);
-  /* Use absolute value only when needed (odd powers of z).  */
-  svfloat32_t az = svabs_x (pg, z);
-  az = svneg_m (az, red, az);
-
-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
-  svfloat32_t z2 = svmul_x (pg, z, z);
-  svfloat32_t z4 = svmul_x (pg, z2, z2);
-  svfloat32_t z8 = svmul_x (pg, z4, z4);
-
-  svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly);
-
-  /* y = shift + z + z^3 * P(z^2).  */
-  svfloat32_t z3 = svmul_x (pg, z2, az);
-  y = svmla_x (pg, az, z3, y);
-
-  /* Apply shift as indicated by 'red' predicate.  */
-  y = svadd_m (red, y, sv_f32 (d->pi_over_2));
-
-  /* y = atan(x) if x>0, -atan(-x) otherwise.  */
-  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+     y := arctan(-1/x) + pi/2 for x > +1
+     y := arctan(-1/x) - pi/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (d->neg_one), x), x);
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  svfloat32_t shift = svreinterpret_f32 (
+      sveor_x (red, svreinterpret_u32 (sv_f32 (d->shift_val)), sign));
+
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z3 = svmul_x (ptrue, z2, z);
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1);
+
+  svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
+  svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
+  svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
+  svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3);
+
+  svfloat32_t p03 = svmla_x (pg, p01, z4, p23);
+  svfloat32_t p47 = svmla_x (pg, p45, z4, p67);
+
+  svfloat32_t y = svmla_x (pg, p03, z8, p47);
+
+  /* shift + z + z^3 * P(z^2).  */
+  shift = svadd_m (red, z, shift);
+  return svmla_x (pg, shift, z3, y);
 }
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
index 16a7cf6..958d69a 100644
--- a/sysdeps/aarch64/fpu/atanh_sve.c
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -30,7 +30,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 }
 
 /* SVE approximation for double-precision atanh, based on log1p.
-   The greatest observed error is 2.81 ULP:
+   The greatest observed error is 3.3 ULP:
    _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
 				      want 0x1.ffd8ff31b501cp-6.  */
 svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
@@ -42,7 +42,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
 
   /* It is special if iax >= 1.  */
-//   svbool_t special = svcmpge (pg, iax, One);
   svbool_t special = svacge (pg, x, 1.0);
 
   /* Computation is performed based on the following sequence of equality:
diff --git a/sysdeps/aarch64/fpu/atanpi_advsimd.c b/sysdeps/aarch64/fpu/atanpi_advsimd.c
new file mode 100644
index 0000000..9101419
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpi_advsimd.c
@@ -0,0 +1,117 @@
+/* Double-Precision vector (Advanced SIMD) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  double c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64x2_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+} data = {
+  /* Coefficients of polynomial P such that atanpi(x)~x*P(x^2) on
+	      [2^-1022, 1.0].  */
+  .c0 = V2 (0x1.45f306dc9c883p-2), .c1 = V2 (-0x1.b2995e7b7ba4ap-4),
+  .c2 = 0x1.04c26be3d2c1p-4,	   .c3 = V2 (-0x1.7483759c17ea1p-5),
+  .c4 = 0x1.21bb95c315d57p-5,	   .c5 = V2 (-0x1.da1bdc3d453f3p-6),
+  .c6 = 0x1.912d20459b4bfp-6,	   .c7 = V2 (-0x1.5bbd4545cad1fp-6),
+  .c8 = 0x1.331b83bec30a1p-6,	   .c9 = V2 (-0x1.13d6457f44de3p-6),
+  .c10 = 0x1.f8e802974db94p-7,	   .c11 = V2 (-0x1.d7e173ab04a1ap-7),
+  .c12 = 0x1.bdfa47d6a4f28p-7,	   .c13 = V2 (-0x1.9ba78f3232ceep-7),
+  .c14 = 0x1.5e6044590ab4fp-7,	   .c15 = V2 (-0x1.01ccfdeb9f77fp-7),
+  .c16 = 0x1.345cf0d4eb1c1p-8,	   .c17 = V2 (-0x1.19e5f00f67e3ap-9),
+  .c18 = 0x1.6d3035ac7625bp-11,	   .c19 = V2 (-0x1.286bb9ae4ed79p-13),
+  .c20 = 0x1.c37ec36da0e1ap-17,
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Fast implementation of vector atanpi.
+   atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1] using
+   z=1/x and shift = +-1/2. Maximum observed error is 2.76 ulps:
+   _ZGVnN2v_atanpi(0x1.fa2d6912cd64fp-1) got 0x1.fc45a51bd497fp-3
+					want 0x1.fc45a51bd497cp-3.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (atanpi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint64x2_t ix = vreinterpretq_u64_f64 (x);
+  uint64x2_t sign = vandq_u64 (ix, SignMask);
+
+  /* Argument Reduction:
+     y := arctanpi(x) for |x| < 1
+     y := arctanpi(-1/x) + 1/2 for x > 1
+     y := arctanpi(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0));
+  float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x);
+
+  /* Shift is calculated as +1/2 or 0, depending on the argument case.  */
+  float64x2_t shift = vreinterpretq_f64_u64 (
+      vandq_u64 (red, vreinterpretq_u64_f64 (v_f64 (0.5))));
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f64_u64 (
+      veorq_u64 (vreinterpretq_u64_f64 (shift), sign));
+
+  /* Calculate polynomial approximation P(z^2) with deg(P)=19.  */
+  float64x2_t z2 = vmulq_f64 (z, z);
+  float64x2_t z4 = vmulq_f64 (z2, z2);
+  float64x2_t z8 = vmulq_f64 (z4, z4);
+  float64x2_t z16 = vmulq_f64 (z8, z8);
+
+  float64x2_t c24 = vld1q_f64 (&d->c2);
+  float64x2_t c68 = vld1q_f64 (&d->c6);
+
+  /* Order-7 Estrin.  */
+  float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0);
+  float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1);
+  float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0);
+  float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1);
+
+  float64x2_t p14 = vfmaq_f64 (p12, z4, p34);
+  float64x2_t p58 = vfmaq_f64 (p56, z4, p78);
+  float64x2_t p18 = vfmaq_f64 (p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  float64x2_t c1012 = vld1q_f64 (&d->c10);
+  float64x2_t c1416 = vld1q_f64 (&d->c14);
+  float64x2_t c1820 = vld1q_f64 (&d->c18);
+
+  float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0);
+  float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1);
+  float64x2_t p912 = vfmaq_f64 (p910, z4, p1112);
+
+  float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0);
+  float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1);
+  float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516);
+
+  float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0);
+  float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1);
+  float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920);
+
+  float64x2_t p916 = vfmaq_f64 (p912, z8, p1316);
+  float64x2_t p920 = vfmaq_f64 (p916, z16, p1720);
+
+  float64x2_t y = vfmaq_f64 (p18, p920, z16);
+
+  y = vfmaq_f64 (d->c0, z2, y);
+
+  /* y = shift + z * p(z^2).  */
+  return vfmaq_f64 (shift, z, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanpi_sve.c b/sysdeps/aarch64/fpu/atanpi_sve.c
new file mode 100644
index 0000000..3f8f277
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpi_sve.c
@@ -0,0 +1,127 @@
+/* Double-Precision vector (SVE) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20;
+  float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19;
+  float64_t shift_val, neg_one;
+} data = {
+  /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on
+     [2^-1022, 1.0].  */
+  .c0 = 0x1.45f306dc9c883p-2,
+  .c1 = -0x1.b2995e7b7ba4ap-4,
+  .c2 = 0x1.04c26be3d2c1p-4,
+  .c3 = -0x1.7483759c17ea1p-5,
+  .c4 = 0x1.21bb95c315d57p-5,
+  .c5 = -0x1.da1bdc3d453f3p-6,
+  .c6 = 0x1.912d20459b4bfp-6,
+  .c7 = -0x1.5bbd4545cad1fp-6,
+  .c8 = 0x1.331b83bec30a1p-6,
+  .c9 = -0x1.13d6457f44de3p-6,
+  .c10 = 0x1.f8e802974db94p-7,
+  .c11 = -0x1.d7e173ab04a1ap-7,
+  .c12 = 0x1.bdfa47d6a4f28p-7,
+  .c13 = -0x1.9ba78f3232ceep-7,
+  .c14 = 0x1.5e6044590ab4fp-7,
+  .c15 = -0x1.01ccfdeb9f77fp-7,
+  .c16 = 0x1.345cf0d4eb1c1p-8,
+  .c17 = -0x1.19e5f00f67e3ap-9,
+  .c18 = 0x1.6d3035ac7625bp-11,
+  .c19 = -0x1.286bb9ae4ed79p-13,
+  .c20 = 0x1.c37ec36da0e1ap-17,
+  .shift_val = 1.5,
+  .neg_one = -1,
+};
+
+/* Fast implementation of SVE atan.
+   Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to 0,1 using
+   z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
+   error is 2.80 ulps:
+   _ZGVsMxv_atanpi(0x1.f19587d63c76fp-1) got 0x1.f6b1304817d02p-3
+					want 0x1.f6b1304817d05p-3.  */
+svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  svbool_t ptrue = svptrue_b64 ();
+  svuint64_t ix = svreinterpret_u64 (x);
+  svuint64_t sign = svand_x (pg, ix, 0x8000000000000000);
+
+  /* Argument reduction:
+     y := arctan(x) for x < 1
+     y := pi/2 + arctan(-1/x) for x > 1
+     Hence, use z=-1/a if x>=1, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat64_t shift
+      = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign));
+
+  /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
+  svfloat64_t z2 = svmul_x (ptrue, z, z);
+  svfloat64_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat64_t z8 = svmul_x (ptrue, z4, z4);
+  svfloat64_t z16 = svmul_x (ptrue, z8, z8);
+
+  /* Order-7 Estrin.  */
+  svfloat64_t c24 = svld1rq (ptrue, &d->c2);
+  svfloat64_t c68 = svld1rq (ptrue, &d->c6);
+
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1);
+  svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0);
+  svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1);
+
+  svfloat64_t p14 = svmla_x (pg, p12, z4, p34);
+  svfloat64_t p58 = svmla_x (pg, p56, z4, p78);
+  svfloat64_t p18 = svmla_x (pg, p14, z8, p58);
+
+  /* Order-11 Estrin.  */
+  svfloat64_t c1012 = svld1rq (ptrue, &d->c10);
+  svfloat64_t c1416 = svld1rq (ptrue, &d->c14);
+  svfloat64_t c1820 = svld1rq (ptrue, &d->c18);
+
+  svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0);
+  svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1);
+  svfloat64_t p912 = svmla_x (pg, p910, z4, p1112);
+
+  svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0);
+  svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1);
+  svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516);
+
+  svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0);
+  svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1);
+  svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920);
+
+  svfloat64_t p916 = svmla_x (pg, p912, z8, p1316);
+  svfloat64_t p920 = svmla_x (pg, p916, z16, p1720);
+
+  svfloat64_t y = svmla_x (pg, p18, z16, p920);
+
+  y = svmla_x (pg, sv_f64 (d->c0), z2, y);
+  return svmla_x (pg, shift, z, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanpif_advsimd.c b/sysdeps/aarch64/fpu/atanpif_advsimd.c
new file mode 100644
index 0000000..9295156
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpif_advsimd.c
@@ -0,0 +1,92 @@
+/* Single-Precision vector (Advanced SIMD) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+
+static const struct data
+{
+  uint32x4_t half;
+  float32x4_t neg_one;
+  float32x4_t c0, c1, c3, c5, c7;
+  float c2, c4, c6, c8;
+} data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see atanpi.sollya for details.  */
+  .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4),
+  .c2 = 0x1.0490e4p-4,	    .c3 = V4 (-0x1.70c272p-5),
+  .c4 = 0x1.0eef52p-5,	    .c5 = V4 (-0x1.6abbbap-6),
+  .c6 = 0x1.78157p-7,	    .c7 = V4 (-0x1.f0b406p-9),
+  .c8 = 0x1.2ae7fep-11,	    .half = V4 (0x3f000000),
+  .neg_one = V4 (-1.0f),
+};
+
+#define SignMask v_u32 (0x80000000)
+
+/* Fast implementation of vector atanpif based on
+   atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1]
+   using z=-1/x and shift = +-1/2.
+   Maximum observed error is 2.59ulps:
+   _ZGVnN4v_atanpif (0x1.f2a89cp-1) got 0x1.f76524p-3
+				   want 0x1.f7651ep-3.  */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanpi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  uint32x4_t ix = vreinterpretq_u32_f32 (x);
+  uint32x4_t sign = vandq_u32 (ix, SignMask);
+
+  /* Argument Reduction:
+     y := arctanpi(x) for |x| < 1
+     y := arctanpi(-1/x) + 1/2 for x > 1
+     y := arctanpi(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  uint32x4_t red = vcagtq_f32 (x, d->neg_one);
+
+  float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x);
+
+  /* Shift is calculated as +1/2 or 0, depending on the argument case.  */
+  float32x4_t shift = vreinterpretq_f32_u32 (vandq_u32 (red, d->half));
+
+  /* Reinsert sign bit from argument into the shift value.  */
+  shift = vreinterpretq_f32_u32 (
+      veorq_u32 (vreinterpretq_u32_f32 (shift), sign));
+
+  /* Uses an Estrin scheme for polynomial approximation.  */
+  float32x4_t z2 = vmulq_f32 (z, z);
+  float32x4_t z4 = vmulq_f32 (z2, z2);
+  float32x4_t z8 = vmulq_f32 (z4, z4);
+
+  float32x4_t even_coeffs = vld1q_f32 (&d->c2);
+
+  float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, even_coeffs, 0);
+  float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, even_coeffs, 1);
+  float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, even_coeffs, 2);
+  float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, even_coeffs, 3);
+
+  float32x4_t p14 = vfmaq_f32 (p12, z4, p34);
+  float32x4_t p58 = vfmaq_f32 (p56, z4, p78);
+
+  float32x4_t y = vfmaq_f32 (p14, z8, p58);
+  y = vfmaq_f32 (d->c0, z2, y);
+
+  /* y = shift + z * P(z^2).  */
+  return vfmaq_f32 (shift, z, y);
+}
+libmvec_hidden_def (V_NAME_F1 (atanpi))
+HALF_WIDTH_ALIAS_F1 (atanpi)
diff --git a/sysdeps/aarch64/fpu/atanpif_sve.c b/sysdeps/aarch64/fpu/atanpif_sve.c
new file mode 100644
index 0000000..2abd788
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanpif_sve.c
@@ -0,0 +1,89 @@
+/* Single-Precision vector (SVE) inverse tanpi function
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+
+static const struct data
+{
+  float32_t c2, c4, c6, c8;
+  float32_t c0, c1, c3, c5, c7;
+  float32_t shift_val, neg_one;
+} data = {
+  /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+    [2**-128, 1.0].  */
+  .c0 = 0x1.45f306p-2,	.c1 = -0x1.b2975ep-4, .c2 = 0x1.0490e4p-4,
+  .c3 = -0x1.70c272p-5, .c4 = 0x1.0eef52p-5,  .c5 = -0x1.6abbbap-6,
+  .c6 = 0x1.78157p-7,	.c7 = -0x1.f0b406p-9, .c8 = 0x1.2ae7fep-11,
+  .shift_val = 1.5f,	.neg_one = -1.0f,
+};
+
+#define SignMask (0x80000000)
+
+/* Fast implementation of SVE atanpif based on
+   atan(x) ~ shift + z * P(z^2) with reduction to [0,1] using
+   z=-1/x and shift = 1/2.
+   Largest observed error is 2.59 ULP, close to +/-1.0:
+   _ZGVsMxv_atanpif(0x1.f2a89cp-1) got 0x1.f76524p-3
+				  want 0x1.f7651ep-3.  */
+svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+  svbool_t ptrue = svptrue_b32 ();
+
+  /* No need to trigger special case. Small cases, infs and nans
+     are supported by our approximation technique.  */
+  svuint32_t ix = svreinterpret_u32 (x);
+  svuint32_t sign = svand_x (pg, ix, SignMask);
+
+  /* Argument reduction:
+     y := arctan(x) for x < 1
+     y := arctan(-1/x) + 1/2 for x > +1
+     y := arctan(-1/x) - 1/2 for x < -1
+     Hence, use z=-1/a if |x|>=|-1|, otherwise z=a.  */
+  svbool_t red = svacgt (pg, x, d->neg_one);
+  svfloat32_t z = svsel (red, svdiv_x (ptrue, sv_f32 (d->neg_one), x), x);
+
+  /* Reuse of -1.0f to reduce constant loads,
+     We need a shift value of 1/2, which is created via -1 + (1 + 1/2).  */
+  svfloat32_t shift
+      = svadd_z (red, sv_f32 (d->neg_one), sv_f32 (d->shift_val));
+
+  /* Reinserts the sign bit of the argument to handle the case of x < -1.  */
+  shift = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (shift), sign));
+
+  svfloat32_t z2 = svmul_x (ptrue, z, z);
+  svfloat32_t z4 = svmul_x (ptrue, z2, z2);
+  svfloat32_t z8 = svmul_x (ptrue, z4, z4);
+
+  svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2);
+
+  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0);
+  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1);
+  svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2);
+  svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3);
+
+  svfloat32_t p14 = svmad_x (pg, z4, p34, p12);
+  svfloat32_t p58 = svmad_x (pg, z4, p78, p56);
+
+  svfloat32_t p18 = svmad_x (pg, z8, p58, p14);
+  svfloat32_t y = svmad_x (pg, z2, p18, d->c0);
+
+  /* shift + z * P(z^2).  */
+  return svmad_x (pg, y, z, shift);
+}
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 5152c0d..77ae10d 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -37,6 +37,10 @@
 # define __DECL_SIMD_acosh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_acoshf
 # define __DECL_SIMD_acoshf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acospi
+# define __DECL_SIMD_acospi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acospif
+# define __DECL_SIMD_acospif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asin
 # define __DECL_SIMD_asin __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asinf
@@ -45,6 +49,10 @@
 # define __DECL_SIMD_asinh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_asinhf
 # define __DECL_SIMD_asinhf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinpi
+# define __DECL_SIMD_asinpi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinpif
+# define __DECL_SIMD_asinpif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan
 # define __DECL_SIMD_atan __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atanf
@@ -53,10 +61,18 @@
 # define __DECL_SIMD_atanh __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atanhf
 # define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanpi
+# define __DECL_SIMD_atanpi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanpif
+# define __DECL_SIMD_atanpif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2
 # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
 # undef __DECL_SIMD_atan2f
 # define __DECL_SIMD_atan2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atan2pi
+# define __DECL_SIMD_atan2pi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atan2pif
+# define __DECL_SIMD_atan2pif __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cbrt
 # define __DECL_SIMD_cbrt __DECL_SIMD_aarch64
 # undef __DECL_SIMD_cbrtf
@@ -176,12 +192,16 @@ typedef __SVBool_t __sv_bool_t;
 #  define __vpcs __attribute__ ((__aarch64_vector_pcs__))
 
 __vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_atan2pif (__f32x4_t, __f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_acospif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_asinpif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_atanpif (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
@@ -207,12 +227,16 @@ __vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_tanpif (__f32x4_t);
 
 __vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_atan2pi (__f64x2_t, __f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_acospi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_asinpi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_atanpi (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
@@ -243,12 +267,16 @@ __vpcs __f64x2_t _ZGVnN2v_tanpi (__f64x2_t);
 #ifdef __SVE_VEC_MATH_SUPPORTED
 
 __sv_f32_t _ZGVsMxvv_atan2f (__sv_f32_t, __sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_atan2pif (__sv_f32_t, __sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_acospif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_asinpif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_atanpif (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
@@ -274,12 +302,16 @@ __sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t);
 __sv_f32_t _ZGVsMxv_tanpif (__sv_f32_t, __sv_bool_t);
 
 __sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_atan2pi (__sv_f64_t, __sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_acospi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_asinpi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_atanpi (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
 __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
index 77e58e1..f5a163b 100644
--- a/sysdeps/aarch64/fpu/cosh_sve.c
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -21,71 +21,99 @@
 
 static const struct data
 {
-  float64_t poly[3];
-  float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
+  double c0, c2;
+  double c1, c3;
+  float64_t inv_ln2, ln2_hi, ln2_lo, shift;
   uint64_t special_bound;
 } data = {
-  .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
-	    0x1.5555576a59599p-5, },
-
-  .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2.  */
-  /* -ln2/N.  */
-  .ln2_hi = -0x1.62e42fefa39efp-9,
-  .ln2_lo = -0x1.abc9e3b39803f3p-64,
-  .shift = 0x1.8p+52,
-  .thres = 704.0,
-
-  /* 0x1.6p9, above which exp overflows.  */
-  .special_bound = 0x4086000000000000,
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1.fffffffffdbcdp-2,
+  .c1 = 0x1.555555555444cp-3,
+  .c2 = 0x1.555573c6a9f7dp-5,
+  .c3 = 0x1.1111266d28935p-7,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  /* 1/ln2.  */
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ff80p+46, /* 1.5*2^46+1022.  */
+
+  /* asuint(ln(2^(1024 - 1/128))), the value above which exp overflows.  */
+  .special_bound = 0x40862e37e7d8ba72,
 };
 
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
-{
-  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
-  svfloat64_t y = svadd_x (pg, half_t, half_over_t);
-  return sv_call_f64 (cosh, x, y, special);
-}
-
-/* Helper for approximating exp(x). Copied from sv_exp_tail, with no
-   special-case handling or tail.  */
+/* Helper for approximating exp(x)/2.
+   Functionally identical to FEXPA exp(x), but an adjustment in
+   the shift value which leads to a reduction in the exponent of scale by 1,
+   thus halving the result at no cost.  */
 static inline svfloat64_t
-exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
   /* Calculate exp(x).  */
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
   svfloat64_t n = svsub_x (pg, z, d->shift);
 
-  svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
-  r = svmla_x (pg, r, n, d->ln2_lo);
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
 
-  svuint64_t u = svreinterpret_u64 (z);
-  svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
-  svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
 
-  svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
-  y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
-  y = svmla_x (pg, sv_f64 (1.0), r, y);
-  y = svmul_x (svptrue_b64 (), r, y);
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+  svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
+  svfloat64_t p = svmla_x (pg, r, p04, r2);
 
-  /* s = 2^(n/N).  */
-  u = svld1_gather_index (pg, __v_exp_tail_data, i);
-  svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
+  svfloat64_t scale = svexpa (u);
 
-  return svmla_x (pg, s, s, y);
+  return svmla_x (pg, scale, scale, p);
+}
+
+/* Vectorised special case to handle values past where exp_inline overflows.
+   Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+   the valid range of inputs, and returns inf for anything past that.  */
+static svfloat64_t NOINLINE
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax, svfloat64_t t,
+	      const struct data *d)
+{
+  /* Finish fast path to compute values for non-special cases.  */
+  svfloat64_t inv_twoexp = svdivr_x (pg, t, 0.25);
+  svfloat64_t y = svadd_x (pg, t, inv_twoexp);
+
+  /* Halves input value, and then check if any cases
+     are still going to overflow.  */
+  ax = svmul_x (special, ax, 0.5);
+  svbool_t is_safe
+      = svcmplt (special, svreinterpret_u64 (ax), d->special_bound);
+
+  /* Computes exp(x/2), and sets any overflowing lanes to inf.  */
+  svfloat64_t half_exp = exp_over_two_inline (special, ax, d);
+  half_exp = svsel (is_safe, half_exp, sv_f64 (INFINITY));
+
+  /* Construct special case cosh(x) = (exp(x/2)^2)/2.  */
+  svfloat64_t exp = svmul_x (svptrue_b64 (), half_exp, 2);
+  svfloat64_t special_y = svmul_x (special, exp, half_exp);
+
+  /* Select correct return values for special and non-special cases.  */
+  special_y = svsel (special, special_y, y);
+
+  /* Ensure an input of nan is correctly propagated.  */
+  svbool_t is_nan
+      = svcmpgt (special, svreinterpret_u64 (ax), sv_u64 (0x7ff0000000000000));
+  return svsel (is_nan, ax, svsel (special, special_y, y));
 }
 
 /* Approximation for SVE double-precision cosh(x) using exp_inline.
    cosh(x) = (exp(x) + exp(-x)) / 2.
-   The greatest observed error is in the scalar fall-back region, so is the
-   same as the scalar routine, 1.93 ULP:
-   _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
-				       want 0x1.fd774e958236fp+1021.
-
-   The greatest observed error in the non-special region is 1.54 ULP:
-   _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
-				       want 0x1.f5e2bb8d5c991p+8.  */
+   The greatest observed error in special case region is 2.66 + 0.5 ULP:
+   _ZGVsMxv_cosh (0x1.633b532ffbc1ap+9) got 0x1.f9b2d3d22399ep+1023
+				       want 0x1.f9b2d3d22399bp+1023
+
+  The greatest observed error in the non-special region is 1.01 + 0.5 ULP:
+  _ZGVsMxv_cosh (0x1.998ecbb3c1f81p+1) got 0x1.890b225657f84p+3
+				      want 0x1.890b225657f82p+3.  */
 svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
@@ -94,14 +122,13 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
   svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
 
   /* Up to the point that exp overflows, we can use it to calculate cosh by
-     exp(|x|) / 2 + 1 / (2 * exp(|x|)).  */
-  svfloat64_t t = exp_inline (ax, pg, d);
+     (exp(|x|)/2 + 1) / (2 * exp(|x|)).  */
+  svfloat64_t half_exp = exp_over_two_inline (pg, ax, d);
 
-  /* Fall back to scalar for any special cases.  */
+  /* Falls back to entirely standalone vectorized special case.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, pg, t, special);
+    return special_case (pg, special, ax, half_exp, d);
 
-  svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
-  svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
-  return svadd_x (pg, half_t, half_over_t);
+  svfloat64_t inv_twoexp = svdivr_x (pg, half_exp, 0.25);
+  return svadd_x (pg, half_exp, inv_twoexp);
 }
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index 1a74db2..f3e7f8b 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -19,26 +19,19 @@
 
 #include "sv_math.h"
 
-/* For x < -Thres, the result is subnormal and not handled correctly by
-   FEXPA.  */
-#define Thres 37.9
+/* For x < -Thres (-log10(2^126)), the result is subnormal and not handled
+   correctly by FEXPA.  */
+#define Thres 0x1.2f702p+5
 
 static const struct data
 {
-  float log2_10_lo, c0, c2, c4;
-  float c1, c3, log10_2;
-  float shift, log2_10_hi, thres;
+  float log10_2, log2_10_hi, log2_10_lo, c1;
+  float c0, shift, thres;
 } data = {
   /* Coefficients generated using Remez algorithm with minimisation of relative
-     error.
-     rel error: 0x1.89dafa3p-24
-     abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
-     maxerr: 0.52 +0.5 ulp.  */
-  .c0 = 0x1.26bb16p+1f,
-  .c1 = 0x1.5350d2p+1f,
-  .c2 = 0x1.04744ap+1f,
-  .c3 = 0x1.2d8176p+0f,
-  .c4 = 0x1.12b41ap-1f,
+     error.  */
+  .c0 = 0x1.26bb62p1,
+  .c1 = 0x1.53524cp1,
   /* 1.5*2^17 + 127, a shift value suitable for FEXPA.  */
   .shift = 0x1.803f8p17f,
   .log10_2 = 0x1.a934fp+1,
@@ -53,28 +46,23 @@ sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
   /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N].  */
-
-  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
+  svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log10_2);
 
   /* n = round(x/(log10(2)/N)).  */
   svfloat32_t shift = sv_f32 (d->shift);
-  svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
-  svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
+  svfloat32_t z = svmla_lane (shift, x, lane_consts, 0);
+  svfloat32_t n = svsub_x (pg, z, shift);
 
   /* r = x - n*log10(2)/N.  */
-  svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
-  r = svmls_lane (r, n, lane_consts, 0);
+  svfloat32_t r = x;
+  r = svmls_lane (r, n, lane_consts, 1);
+  r = svmls_lane (r, n, lane_consts, 2);
 
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
   /* Polynomial evaluation: poly(r) ~ exp10(r)-1.  */
-  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
-  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
-  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
-  svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
-  svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
+  svfloat32_t poly = svmla_lane (sv_f32 (d->c0), r, lane_consts, 3);
+  poly = svmul_x (pg, poly, r);
   return svmla_x (pg, scale, scale, poly);
 }
 
@@ -85,11 +73,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
 		      special);
 }
 
-/* Single-precision SVE exp10f routine. Implements the same algorithm
-   as AdvSIMD exp10f.
-   Worst case error is 1.02 ULPs.
-   _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
-				  want 0x1.ba5f9cp-1.  */
+/* Single-precision SVE exp10f routine. Based on the FEXPA instruction.
+   Worst case error is 1.10 ULP.
+   _ZGVsMxv_exp10f (0x1.cc76dep+3) got 0x1.be0172p+47
+				  want 0x1.be017p+47.  */
 svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c
index 6db8526..c135852 100644
--- a/sysdeps/aarch64/fpu/exp2_sve.c
+++ b/sysdeps/aarch64/fpu/exp2_sve.c
@@ -19,23 +19,21 @@
 
 #include "sv_math.h"
 
-#define N (1 << V_EXP_TABLE_BITS)
-
 #define BigBound 1022
 #define UOFlowBound 1280
 
 static const struct data
 {
-  double c0, c2;
-  double c1, c3;
+  double c2, c4;
+  double c0, c1, c3;
   double shift, big_bound, uoflow_bound;
 } data = {
   /* Coefficients are computed using Remez algorithm with
      minimisation of the absolute error.  */
-  .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
-  .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
-  .shift = 0x1.8p52 / N,      .uoflow_bound = UOFlowBound,
-  .big_bound = BigBound,
+  .c0 = 0x1.62e42fefa39efp-1,  .c1 = 0x1.ebfbdff82a31bp-3,
+  .c2 = 0x1.c6b08d706c8a5p-5,  .c3 = 0x1.3b2ad2ff7d2f3p-7,
+  .c4 = 0x1.5d8761184beb3p-10, .shift = 0x1.800000000ffc0p+46,
+  .uoflow_bound = UOFlowBound, .big_bound = BigBound,
 };
 
 #define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
@@ -64,50 +62,52 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
       svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
 
   /* |n| > 1280 => 2^(n) overflows.  */
-  svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
+  svbool_t p_cmp = svacle (pg, n, d->uoflow_bound);
 
   svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
   svfloat64_t r2 = svmla_x (pg, s2, s2, y);
   svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
 
-  return svsel (p_cmp, r1, r0);
+  return svsel (p_cmp, r0, r1);
 }
 
 /* Fast vector implementation of exp2.
-   Maximum measured error is 1.65 ulp.
-   _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
-				       want 0x1.f8db0d4df721dp-1.  */
+   Maximum measured error is 0.52 + 0.5 ulp.
+   _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0
+				       want 0x1.8861641b49e07p+0.  */
 svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
-  svbool_t no_big_scale = svacle (pg, x, d->big_bound);
-  svbool_t special = svnot_z (pg, no_big_scale);
-
-  /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N].  */
-  svfloat64_t shift = sv_f64 (d->shift);
-  svfloat64_t kd = svadd_x (pg, x, shift);
-  svuint64_t ki = svreinterpret_u64 (kd);
-  /* kd = k/N.  */
-  kd = svsub_x (pg, kd, shift);
-  svfloat64_t r = svsub_x (pg, x, kd);
-
-  /* scale ~= 2^(k/N).  */
-  svuint64_t idx = svand_x (pg, ki, N - 1);
-  svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
-  /* This is only a valid scale when -1023*N < k < 1024*N.  */
-  svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
-  svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
-
-  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
-  /* Approximate exp2(r) using polynomial.  */
-  /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4.  */
+  svbool_t special = svacge (pg, x, d->big_bound);
+
+  svfloat64_t z = svadd_x (svptrue_b64 (), x, d->shift);
+  svfloat64_t n = svsub_x (svptrue_b64 (), z, d->shift);
+  svfloat64_t r = svsub_x (svptrue_b64 (), x, n);
+
+  svfloat64_t scale = svexpa (svreinterpret_u64 (z));
+
   svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
-  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
-  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
-  svfloat64_t p = svmla_x (pg, p01, p23, r2);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  /* Approximate exp2(r) using polynomial.  */
+  /* y = exp2(r) - 1 ~= r * (C0 + C1 r + C2 r^2 + C3 r^3 + C4 r^4).  */
+  svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  svfloat64_t p = svmla_x (pg, p12, p34, r2);
+  p = svmad_x (pg, p, r, d->c0);
   svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
+
   /* Assemble exp2(x) = exp2(r) * scale.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (pg, scale, y, kd, d);
+    {
+      /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+          special case function so needs to be copied.
+          e = sign bit of u << 46.  */
+      svuint64_t e = svand_x (pg, svlsl_x (pg, svreinterpret_u64 (z), 46),
+            0x8000000000000000);
+      scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+      return special_case (pg, scale, y, n, d);
+    }
+
   return svmla_x (pg, scale, scale, y);
 }
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index fcd7830..989cefb 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -18,21 +18,17 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f32.h"
 
 #define Thres 0x1.5d5e2ap+6f
 
 static const struct data
 {
-  float c0, c2, c4, c1, c3;
-  float shift, thres;
+  float c0, c1, shift, thres;
 } data = {
-  /* Coefficients copied from the polynomial in AdvSIMD variant.  */
-  .c0 = 0x1.62e422p-1f,
-  .c1 = 0x1.ebf9bcp-3f,
-  .c2 = 0x1.c6bd32p-5f,
-  .c3 = 0x1.3ce9e4p-7f,
-  .c4 = 0x1.59977ap-10f,
+  /* Coefficients generated using Remez algorithm with minimisation of relative
+     error.  */
+  .c0 = 0x1.62e485p-1,
+  .c1 = 0x1.ebfbe0p-3,
   /* 1.5*2^17 + 127.  */
   .shift = 0x1.803f8p17f,
   /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
@@ -51,16 +47,8 @@ sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 
   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
 
-  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
-     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
-     coefficients 1 to 4, and apply most significant coefficient directly.  */
-  svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
-  svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
-  svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
-  svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
-  svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
-  svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
-  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+  svfloat32_t poly = svmla_x (pg, sv_f32 (d->c0), r, sv_f32 (d->c1));
+  poly = svmul_x (svptrue_b32 (), poly, r);
 
   return svmla_x (pg, scale, scale, poly);
 }
@@ -72,11 +60,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
 		      special);
 }
 
-/* Single-precision SVE exp2f routine. Implements the same algorithm
-   as AdvSIMD exp2f.
-   Worst case error is 1.04 ULPs.
-   _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
-				 want 0x1.ba6a64p-1.  */
+/* Single-precision SVE exp2f routine, based on the FEXPA instruction.
+   Worst case error is 1.09 ULPs.
+   _ZGVsMxv_exp2f (0x1.9a2a94p-1) got 0x1.be1054p+0
+				 want 0x1.be1052p+0.  */
 svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c
index d4ba8cc..b1d940b 100644
--- a/sysdeps/aarch64/fpu/expm1_sve.c
+++ b/sysdeps/aarch64/fpu/expm1_sve.c
@@ -18,82 +18,164 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
-#define SpecialBound 0x1.62b7d369a5aa9p+9
-#define ExponentBias 0x3ff0000000000000
+#define FexpaBound 0x1.4cb5ecef28adap-3 /* 15*ln2/64.  */
+#define SpecialBound 0x1.628c2855bfaddp+9 /* ln(2^(1023 + 1/128)).  */
 
 static const struct data
 {
-  double poly[11];
-  double shift, inv_ln2, special_bound;
-  /* To be loaded in one quad-word.  */
+  double c2, c4;
+  double inv_ln2;
   double ln2_hi, ln2_lo;
+  double c0, c1, c3;
+  double shift, thres;
+  uint64_t expm1_data[32];
 } data = {
-  /* Generated using fpminimax.  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-            0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13,
-            0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-            0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .special_bound = SpecialBound,
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .ln2_hi = 0x1.62e42fefa39efp-1,
-  .ln2_lo = 0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
+  /* Table emulating FEXPA - 1, for values of FEXPA close to 1.
+  The table holds values of 2^(i/64) - 1, computed in arbitrary precision.
+  The first half of the table stores values associated to i from 0 to 15.
+  The second half of the table stores values associated to i from 0 to -15.  */
+  .expm1_data = {
+      0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+      0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+      0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+		  0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+      0x0000000000000000, 0xbfc331751ec3a814, 0xbfc20224341286e4, 0xbfc0cf85bed0f8b7,
+      0xbfbf332113d56b1f, 0xbfbcc0768d4175a6, 0xbfba46f918837cb7, 0xbfb7c695afc3b424,
+		  0xbfb53f391822dbc7, 0xbfb2b0cfe1266bd4, 0xbfb01b466423250a, 0xbfaafd11874c009e,
+      0xbfa5b505d5b6f268, 0xbfa05e4119ea5d89, 0xbf95f134923757f3, 0xbf860f9f985bc9f4,
+    },
+
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023.  */
+  .thres = SpecialBound,
 };
 
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t pg)
+#define SpecialOffset 0x6000000000000000 /* 0x1p513.  */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0).  */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
+
+static NOINLINE svfloat64_t
+special_case (svbool_t pg, svfloat64_t y, svfloat64_t s, svfloat64_t p,
+	      svfloat64_t n)
 {
-  return sv_call_f64 (expm1, x, y, pg);
+  /* s=2^n may overflow, break it up into s=s1*s2,
+     such that exp = s + s*y can be computed as s1*(s2+s2*y)
+     and s1*s1 overflows only if n>0.  */
+
+  /* If n<=0 then set b to 0x6, 0 otherwise.  */
+  svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0.  */
+  svuint64_t b
+      = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0.  */
+
+  /* Set s1 to generate overflow depending on sign of exponent n,
+     ie. s1 = 0x70...0 - b.  */
+  svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+  /* Offset s to avoid overflow in final result if n is below threshold.
+     ie. s2 = as_u64 (s) - 0x3010...0 + b.  */
+  svfloat64_t s2 = svreinterpret_f64 (
+      svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
+
+  /* |n| > 1280 => 2^(n) overflows.  */
+  svbool_t p_cmp = svacgt (pg, n, 1280.0);
+
+  svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
+  svfloat64_t r2 = svmla_x (pg, s2, s2, p);
+  svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
+
+  svbool_t is_safe = svacle (pg, n, 1023); /* Only correct special lanes.  */
+  return svsel (is_safe, y, svsub_x (pg, svsel (p_cmp, r1, r0), 1.0));
 }
 
-/* Double-precision vector exp(x) - 1 function.
-   The maximum error observed error is 2.18 ULP:
-   _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
-				       want 0x1.a8b9ea8d66e2p-2.  */
+/* FEXPA based SVE expm1 algorithm.
+   Maximum measured error is 2.81 + 0.5 ULP:
+   _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3
+				       want 0x1.c290e5858bb5p-3.  */
 svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Large, Nan/Inf.  */
-  svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
-
-  /* Reduce argument to smaller range:
-     Let i = round(x / ln2)
-     and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where 2^i is exact because i is an integer.  */
-  svfloat64_t shift = sv_f64 (d->shift);
-  svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift);
-  svint64_t i = svcvt_s64_x (pg, n);
-  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
-  svfloat64_t f = svmls_lane (x, n, ln2, 0);
-  f = svmls_lane (f, n, ln2, 1);
-
-  /* Approximate expm1(f) using polynomial.
-     Taylor expansion for expm1(x) has the form:
-	 x + ax^2 + bx^3 + cx^4 ....
-     So we calculate the polynomial P(f) = a + bf + cf^2 + ...
-     and assemble the approximation expm1(f) ~= f + f^2 * P(f).  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t f8 = svmul_x (pg, f4, f4);
-  svfloat64_t p
-      = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
-
-  /* Assemble the result.
-   expm1(x) ~= 2^i * (p + 1) - 1
-   Let t = 2^i.  */
-  svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias);
-  svfloat64_t t = svreinterpret_f64 (u);
-
-  /* expm1(x) ~= p * t + (t - 1).  */
-  svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t);
+  svbool_t special = svacgt (pg, x, d->thres);
 
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, y, special);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
 
+  /* r = x - n * ln2, r is in [-ln2/128, ln2/128].  */
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+  svfloat64_t scalem1 = svsub_x (pg, scale, sv_f64 (1.0));
+
+  /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.
+
+     This bound is based upon the table size:
+	   Bound = (TableSize-1/64) * ln2.
+     The current bound is based upon a table size of 16.  */
+  svbool_t is_small = svaclt (pg, x, FexpaBound);
+
+  if (svptest_any (pg, is_small))
+    {
+      /* Index via the input of FEXPA, but we only care about the lower 4 bits.
+       */
+      svuint64_t base_idx = svand_x (pg, u, 0xf);
+
+      /* We can use the sign of x as a fifth bit to account for the asymmetry
+	 of e^x around 0.  */
+      svuint64_t signBit
+	  = svlsl_x (pg, svlsr_x (pg, svreinterpret_u64 (x), 63), 4);
+      svuint64_t idx = svorr_x (pg, base_idx, signBit);
+
+      /* Lookup values for scale - 1 for small x.  */
+      svfloat64_t lookup = svreinterpret_f64 (
+	  svld1_gather_index (is_small, d->expm1_data, idx));
+
+      /* Select the appropriate scale - 1 value based on x.  */
+      scalem1 = svsel (is_small, lookup, scalem1);
+    }
+
+  svfloat64_t y = svmla_x (pg, scalem1, scale, p);
+
+  /* FEXPA returns nan for large inputs so we special case those.  */
+  if (__glibc_unlikely (svptest_any (pg, special)))
+    {
+      /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+          special case function so needs to be copied.
+          e = sign bit of u << 46.  */
+      svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
+      /* Copy sign to s.  */
+      scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+      return special_case (pg, y, scale, p, n);
+    }
+
+  /* return expm1 = (scale - 1) + (scale * poly).  */
   return y;
 }
diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c
index 862c13f..821c078 100644
--- a/sysdeps/aarch64/fpu/log1p_sve.c
+++ b/sysdeps/aarch64/fpu/log1p_sve.c
@@ -22,19 +22,33 @@
 
 static const struct data
 {
-  double poly[19];
+  float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
   double ln2_hi, ln2_lo;
   uint64_t hfrt2_top, onemhfrt2_top, inf, mone;
 } data = {
   /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20
-     polynomial, however first 2 coefficients are 0 and 1 so are not stored.  */
-  .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	    -0x1.cfa7385bdb37ep-6, },
+     polynomial, however first 2 coefficients are 0 and 1 so are not
+     stored.  */
+  .c0 = -0x1.ffffffffffffbp-2,
+  .c1 = 0x1.55555555551a9p-2,
+  .c2 = -0x1.00000000008e3p-2,
+  .c3 = 0x1.9999999a32797p-3,
+  .c4 = -0x1.555555552fecfp-3,
+  .c5 = 0x1.249248e071e5ap-3,
+  .c6 = -0x1.ffffff8bf8482p-4,
+  .c7 = 0x1.c71c8f07da57ap-4,
+  .c8 = -0x1.9999ca4ccb617p-4,
+  .c9 = 0x1.7459ad2e1dfa3p-4,
+  .c10 = -0x1.554d2680a3ff2p-4,
+  .c11 = 0x1.3b4c54d487455p-4,
+  .c12 = -0x1.2548a9ffe80e6p-4,
+  .c13 = 0x1.0f389a24b2e07p-4,
+  .c14 = -0x1.eee4db15db335p-5,
+  .c15 = 0x1.e95b494d4a5ddp-5,
+  .c16 = -0x1.15fdf07cb7c73p-4,
+  .c17 = 0x1.0310b70800fcfp-4,
+  .c18 = -0x1.cfa7385bdb37ep-6,
   .ln2_hi = 0x1.62e42fefa3800p-1,
   .ln2_lo = 0x1.ef35793c76730p-45,
   /* top32(asuint64(sqrt(2)/2)) << 32.  */
@@ -49,7 +63,7 @@ static const struct data
 #define BottomMask 0xffffffff
 
 static svfloat64_t NOINLINE
-special_case (svbool_t special, svfloat64_t x, svfloat64_t y)
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (log1p, x, y, special);
 }
@@ -91,8 +105,9 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
   /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
   svuint64_t utop
       = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top);
-  svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask));
-  svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
+  svuint64_t u_red
+      = svorr_x (pg, utop, svand_x (svptrue_b64 (), mi, BottomMask));
+  svfloat64_t f = svsub_x (svptrue_b64 (), svreinterpret_f64 (u_red), 1);
 
   /* Correction term c/m.  */
   svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m);
@@ -103,18 +118,49 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
      Hence approximation has the form f + f^2 * P(f)
      where P(x) = C0 + C1*x + C2x^2 + ...
      Assembling this all correctly is dealt with at the final step.  */
-  svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2),
-	      f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8);
-  svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly);
+  svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f),
+	      f4 = svmul_x (svptrue_b64 (), f2, f2),
+	      f8 = svmul_x (svptrue_b64 (), f4, f4),
+	      f16 = svmul_x (svptrue_b64 (), f8, f8);
+
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+  svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+  svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+  svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17);
+
+  /* Order-18 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, f2, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, f2, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, f4, p47);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1);
+
+  svfloat64_t p811 = svmla_x (pg, p89, f2, p1011);
+  svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415);
+  svfloat64_t p815 = svmla_x (pg, p811, f4, p1215);
+
+  svfloat64_t p015 = svmla_x (pg, p07, f8, p815);
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0);
+  svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1);
+  svfloat64_t p = svmla_x (pg, p015, f16, p1618);
 
   svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo);
   svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi);
-  svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
 
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (special, x, y);
-
-  return y;
+    return special_case (
+	x, svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p),
+	special);
+  return svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p);
 }
 
 strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1))
diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
index 963453f..072ba8f 100644
--- a/sysdeps/aarch64/fpu/sinh_sve.c
+++ b/sysdeps/aarch64/fpu/sinh_sve.c
@@ -18,90 +18,153 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[11];
-  float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
   uint64_t halff;
-  int64_t onef;
-  uint64_t large_bound;
+  double c2, c4;
+  double inv_ln2;
+  double ln2_hi, ln2_lo;
+  double c0, c1, c3;
+  double shift, special_bound, bound;
+  uint64_t expm1_data[20];
 } data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
-	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
-	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .m_ln2_hi = -0x1.62e42fefa39efp-1,
-  .m_ln2_lo = -0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
-
+  /* Table lookup of 2^(i/64) - 1, for values of i from 0..19.  */
+  .expm1_data = {
+    0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+    0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+    0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+    0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+    0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+  },
+
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .inv_ln2 = 0x1.71547652b82fep+0,
+  .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023.  */
   .halff = 0x3fe0000000000000,
-  .onef = 0x3ff0000000000000,
-  /* 2^9. expm1 helper overflows for large input.  */
-  .large_bound = 0x4080000000000000,
+  .special_bound = 0x1.62e37e7d8ba72p+9,	/* ln(2^(1024 - 1/128)).  */
+  .bound = 0x1.a56ef8ec924ccp-3 /* 19*ln2/64.  */
 };
 
+/* A specialised FEXPA expm1 that is only valid for positive inputs and
+   has no special cases. Based off the full FEXPA expm1 implementated for
+   _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP.  */
 static inline svfloat64_t
-expm1_inline (svfloat64_t x, svbool_t pg)
+expm1_inline (svbool_t pg, svfloat64_t x)
 {
   const struct data *d = ptr_barrier (&data);
 
-  /* Reduce argument:
-     exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
-     where i = round(x / ln2)
-     and   f = x - i * ln2 (f in [-ln2/2, ln2/2]).  */
-  svfloat64_t j
-      = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
-  svint64_t i = svcvt_s64_x (pg, j);
-  svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
-  f = svmla_x (pg, f, j, d->m_ln2_lo);
-  /* Approximate expm1(f) using polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t f8 = svmul_x (pg, f4, f4);
-  svfloat64_t p
-      = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
-  /* t = 2^i.  */
-  svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
-  /* expm1(x) ~= p * t + (t - 1).  */
-  return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
+
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t r = x;
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+
+  /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.  */
+  svbool_t is_small = svaclt (pg, x, d->bound);
+
+  /* Index via the input of FEXPA, but we only care about the lower 5 bits.  */
+  svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+  /* Compute scale - 1 from FEXPA, and lookup values where this fails.  */
+  svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+  svuint64_t scalem1_lookup
+      = svld1_gather_index (is_small, d->expm1_data, base_idx);
+
+  /* Select the appropriate scale - 1 value based on x.  */
+  svfloat64_t scalem1
+      = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+
+  /* return expm1 = scale - 1 + (scale * poly).  */
+  return svmla_x (pg, scalem1, scale, p);
 }
 
+/* Vectorised special case to handle values past where exp_inline overflows.
+   Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+   the valid range of inputs, and returns inf for anything past that.  */
 static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg)
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax,
+	      svfloat64_t halfsign, const struct data *d)
 {
-  return sv_call_f64 (sinh, x, x, pg);
+  /* Halves input value, and then check if any cases
+     are still going to overflow.  */
+  ax = svmul_x (special, ax, 0.5);
+  svbool_t is_safe = svaclt (special, ax, d->special_bound);
+
+  svfloat64_t t = expm1_inline (pg, ax);
+
+  /* Finish fastpass to compute values for non-special cases.  */
+  svfloat64_t y = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+  y = svmul_x (pg, y, halfsign);
+
+  /* Computes special lane, and set remaining overflow lanes to inf.  */
+  svfloat64_t half_special_y = svmul_x (svptrue_b64 (), t, halfsign);
+  svfloat64_t special_y = svmul_x (svptrue_b64 (), half_special_y, t);
+
+  svuint64_t signed_inf
+      = svorr_x (svptrue_b64 (), svreinterpret_u64 (halfsign),
+		 sv_u64 (0x7ff0000000000000));
+  special_y = svsel (is_safe, special_y, svreinterpret_f64 (signed_inf));
+
+  /* Join resulting vectors together and return.  */
+  return svsel (special, special_y, y);
 }
 
-/* Approximation for SVE double-precision sinh(x) using expm1.
-   sinh(x) = (exp(x) - exp(-x)) / 2.
-   The greatest observed error is 2.57 ULP:
-   _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
-				       want 0x1.ab929fc64bd63p-2.  */
+/* Approximation for SVE double-precision sinh(x) using FEXPA expm1.
+   Uses sinh(x) = e^2x - 1 / 2e^x, rewritten for accuracy.
+   The greatest observed error in the non-special region is 2.63 + 0.5 ULP:
+   _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2
+				       want 0x1.c3587faf97b09p-2
+
+   The greatest observed error in the special region is 2.65 + 0.5 ULP:
+   _ZGVsMxv_sinh (0x1.633ce847dab1ap+9) got 0x1.fffd30eea0066p+1023
+				       want 0x1.fffd30eea0063p+1023.  */
 svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
+  svbool_t special = svacge (pg, x, d->special_bound);
   svfloat64_t ax = svabs_x (pg, x);
   svuint64_t sign
       = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
   svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
 
-  svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
-
   /* Fall back to scalar variant for all lanes if any are special.  */
   if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, pg);
+    return special_case (pg, special, ax, halfsign, d);
 
   /* Up to the point that expm1 overflows, we can use it to calculate sinh
      using a slight rearrangement of the definition of sinh. This allows us to
      retain acceptable accuracy for very small inputs.  */
-  svfloat64_t t = expm1_inline (ax, pg);
+  svfloat64_t t = expm1_inline (pg, ax);
   t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
   return svmul_x (pg, t, halfsign);
 }
diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h
index 71f88e0..c2b196f 100644
--- a/sysdeps/aarch64/fpu/sv_log1p_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h
@@ -21,11 +21,12 @@
 #define AARCH64_FPU_SV_LOG1P_INLINE_H
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct sv_log1p_data
 {
-  double poly[19], ln2[2];
+  double c0, c2, c4, c6, c8, c10, c12, c14, c16;
+  double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+  double ln2_lo, ln2_hi;
   uint64_t hf_rt2_top;
   uint64_t one_m_hf_rt2_top;
   uint32_t bottom_mask;
@@ -33,15 +34,30 @@ static const struct sv_log1p_data
 } sv_log1p_data = {
   /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
    */
-  .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
-	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
-	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
-	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
-	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
-	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
-	    -0x1.cfa7385bdb37ep-6 },
-  .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
+  .c0 = -0x1.ffffffffffffbp-2,
+  .c1 = 0x1.55555555551a9p-2,
+  .c2 = -0x1.00000000008e3p-2,
+  .c3 = 0x1.9999999a32797p-3,
+  .c4 = -0x1.555555552fecfp-3,
+  .c5 = 0x1.249248e071e5ap-3,
+  .c6 = -0x1.ffffff8bf8482p-4,
+  .c7 = 0x1.c71c8f07da57ap-4,
+  .c8 = -0x1.9999ca4ccb617p-4,
+  .c9 = 0x1.7459ad2e1dfa3p-4,
+  .c10 = -0x1.554d2680a3ff2p-4,
+  .c11 = 0x1.3b4c54d487455p-4,
+  .c12 = -0x1.2548a9ffe80e6p-4,
+  .c13 = 0x1.0f389a24b2e07p-4,
+  .c14 = -0x1.eee4db15db335p-5,
+  .c15 = 0x1.e95b494d4a5ddp-5,
+  .c16 = -0x1.15fdf07cb7c73p-4,
+  .c17 = 0x1.0310b70800fcfp-4,
+  .c18 = -0x1.cfa7385bdb37ep-6,
+  .ln2_lo = 0x1.62e42fefa3800p-1,
+  .ln2_hi = 0x1.ef35793c76730p-45,
+  /* top32(asuint64(sqrt(2)/2)) << 32.  */
   .hf_rt2_top = 0x3fe6a09e00000000,
+  /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32.  */
   .one_m_hf_rt2_top = 0x00095f6200000000,
   .bottom_mask = 0xffffffff,
   .one_top = 0x3ff
@@ -51,14 +67,14 @@ static inline svfloat64_t
 sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 {
   /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
-     differs from v_log1p_2u5.c by:
+     differs from advsimd/log1p.c by:
      - No special-case handling - this should be dealt with by the caller.
      - Pairwise Horner polynomial evaluation for improved accuracy.
      - Optionally simulate the shortcut for k=0, used in the scalar routine,
        using svsel, for improved accuracy when the argument to log1p is close
      to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
      in the source of the caller before including this file.
-     See sv_log1p_2u1.c for details of the algorithm.  */
+     See sve/log1p.c for details of the algorithm.  */
   const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
   svfloat64_t m = svadd_x (pg, x, 1);
   svuint64_t mi = svreinterpret_u64 (m);
@@ -79,7 +95,7 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
   svfloat64_t cm;
 
 #ifndef WANT_SV_LOG1P_K0_SHORTCUT
-#error                                                                         \
+#error                                                                       \
   "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
 #elif WANT_SV_LOG1P_K0_SHORTCUT
   /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
@@ -96,14 +112,46 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 #endif
 
   /* Approximate log1p(f) on the reduced input using a polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
+  svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f),
+	      f4 = svmul_x (svptrue_b64 (), f2, f2),
+	      f8 = svmul_x (svptrue_b64 (), f4, f4),
+	      f16 = svmul_x (svptrue_b64 (), f8, f8);
+
+  svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+  svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+  svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+  svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+  svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17);
+
+  /* Order-18 Estrin scheme.  */
+  svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0);
+  svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1);
+  svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0);
+  svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1);
+
+  svfloat64_t p03 = svmla_x (pg, p01, f2, p23);
+  svfloat64_t p47 = svmla_x (pg, p45, f2, p67);
+  svfloat64_t p07 = svmla_x (pg, p03, f4, p47);
+
+  svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0);
+  svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1);
+  svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0);
+  svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1);
+
+  svfloat64_t p811 = svmla_x (pg, p89, f2, p1011);
+  svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415);
+  svfloat64_t p815 = svmla_x (pg, p811, f4, p1215);
+
+  svfloat64_t p015 = svmla_x (pg, p07, f8, p815);
+  svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0);
+  svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1);
+  svfloat64_t p = svmla_x (pg, p015, f16, p1618);
 
   /* Assemble log1p(x) = k * log2 + log1p(f) + c/m.  */
-  svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
-  svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
+  svfloat64_t ln2_lo_hi = svld1rq (svptrue_b64 (), &d->ln2_lo);
+  svfloat64_t ylo = svmla_lane (cm, k, ln2_lo_hi, 0);
+  svfloat64_t yhi = svmla_lane (f, k, ln2_lo_hi, 1);
 
-  return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+  return svmad_x (pg, p, f2, svadd_x (pg, ylo, yhi));
 }
-
 #endif
diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
index 789cc68..5869419 100644
--- a/sysdeps/aarch64/fpu/tanh_sve.c
+++ b/sysdeps/aarch64/fpu/tanh_sve.c
@@ -18,83 +18,117 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include "sv_math.h"
-#include "poly_sve_f64.h"
 
 static const struct data
 {
-  float64_t poly[11];
-  float64_t inv_ln2, ln2_hi, ln2_lo, shift;
-  uint64_t thresh, tiny_bound;
+  double ln2_hi, ln2_lo;
+  double c2, c4;
+  double c0, c1, c3;
+  double two_over_ln2, shift;
+  uint64_t tiny_bound;
+  double large_bound, fexpa_bound;
+  uint64_t e2xm1_data[20];
 } data = {
-  /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2].  */
-  .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
-	    0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
-	    0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
-	    0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
-	    0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
-  .inv_ln2 = 0x1.71547652b82fep0,
-  .ln2_hi = -0x1.62e42fefa39efp-1,
-  .ln2_lo = -0x1.abc9e3b39803fp-56,
-  .shift = 0x1.8p52,
-
+  /* Generated using Remez, in [-log(2)/128, log(2)/128].  */
+  .c0 = 0x1p-1,
+  .c1 = 0x1.55555555548f9p-3,
+  .c2 = 0x1.5555555554c22p-5,
+  .c3 = 0x1.111123aaa2fb2p-7,
+  .c4 = 0x1.6c16d77d98e5bp-10,
+  .ln2_hi = 0x1.62e42fefa3800p-1,
+  .ln2_lo = 0x1.ef35793c76730p-45,
+  .two_over_ln2 = 0x1.71547652b82fep+1,
+  .shift = 0x1.800000000ffc0p+46,   /* 1.5*2^46+1023.  */
   .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27).  */
-  /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound).  */
-  .thresh = 0x01f241bf835f9d5f,
+  .large_bound = 0x1.30fc1931f09cap+4, /* arctanh(1 - 2^-54).  */
+  .fexpa_bound = 0x1.a56ef8ec924ccp-4,	  /* 19/64 * ln2/2.  */
+  /* Table lookup of 2^(i/64) - 1, for values of i from 0..19.  */
+  .e2xm1_data = {
+    0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+    0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+    0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+    0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+    0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+  },
 };
 
+/* An expm1 inspired, FEXPA based helper function that returns an
+   accurate estimate for e^2x - 1. With no special case or support for
+   negative inputs of x.  */
 static inline svfloat64_t
-expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
-{
-  /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
-     the scalar variant of tanh.  */
-
-  /* Reduce argument: f in [-ln2/2, ln2/2], i is exact.  */
-  svfloat64_t j
-      = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
-  svint64_t i = svcvt_s64_x (pg, j);
-  svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
-  f = svmla_x (pg, f, j, d->ln2_lo);
-
-  /* Approximate expm1(f) using polynomial.  */
-  svfloat64_t f2 = svmul_x (pg, f, f);
-  svfloat64_t f4 = svmul_x (pg, f2, f2);
-  svfloat64_t p = svmla_x (
-      pg, f, f2,
-      sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
-
-  /* t = 2 ^ i.  */
-  svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
-  /* expm1(x) = p * t + (t - 1).  */
-  return svmla_x (pg, svsub_x (pg, t, 1), p, t);
-}
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
-  return sv_call_f64 (tanh, x, y, special);
+  svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2);
+  svuint64_t u = svreinterpret_u64 (z);
+  svfloat64_t n = svsub_x (pg, z, d->shift);
+
+  /* r = x - n * ln2/2, r is in [-ln2/(2N), ln2/(2N)].  */
+  svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+  svfloat64_t r = svadd_x (pg, x, x);
+  r = svmls_lane (r, n, ln2, 0);
+  r = svmls_lane (r, n, ln2, 1);
+
+  /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6.  */
+  svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+  svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+  svfloat64_t p;
+  svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+  svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+  p = svmad_x (pg, c34, r2, c12);
+  p = svmad_x (pg, p, r, sv_f64 (d->c0));
+  p = svmad_x (pg, p, r2, r);
+
+  svfloat64_t scale = svexpa (u);
+
+  /* We want to construct e2xm1(x) = (scale - 1) + scale * poly.
+     However, for values of scale close to 1, scale-1 causes large ULP errors
+     due to cancellation.
+
+     This can be circumvented by using a small lookup for scale-1
+     when our input is below a certain bound, otherwise we can use FEXPA.  */
+  svbool_t is_small = svaclt (pg, x, d->fexpa_bound);
+
+  /* Index via the input of FEXPA, but we only care about the lower 5 bits.  */
+  svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+  /* Compute scale - 1 from FEXPA, and lookup values where this fails.  */
+  svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+  svuint64_t scalem1_lookup
+      = svld1_gather_index (is_small, d->e2xm1_data, base_idx);
+
+  /* Select the appropriate scale - 1 value based on x.  */
+  svfloat64_t scalem1
+      = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+  return svmla_x (pg, scalem1, scale, p);
 }
 
-/* SVE approximation for double-precision tanh(x), using a simplified
-   version of expm1. The greatest observed error is 2.77 ULP:
-   _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
-				       want -0x1.bd6a21a163624p-3.  */
+/* SVE approximation for double-precision tanh(x), using a modified version of
+   FEXPA expm1 to calculate e^2x - 1.
+   The greatest observed error is 2.79 + 0.5 ULP:
+   _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9
+				       want 0x1.fff7be486cae9p-9.  */
 svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
-  svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
+  svbool_t large = svacge (pg, x, d->large_bound);
 
-  /* Trigger special-cases for tiny, boring and infinity/NaN.  */
-  svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
+  /* We can use tanh(x) = (e^2x - 1) / (e^2x + 1) to approximate tanh.
+  As an additional optimisation, we can ensure more accurate values of e^x
+  by only using positive inputs. So we calculate tanh(|x|), and restore the
+  sign of the input before returning.  */
+  svfloat64_t ax = svabs_x (pg, x);
+  svuint64_t sign_bit
+      = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
 
-  svfloat64_t u = svadd_x (pg, x, x);
+  svfloat64_t p = e2xm1_inline (pg, ax, d);
+  svfloat64_t q = svadd_x (pg, p, 2);
 
-  /* tanh(x) = (e^2x - 1) / (e^2x + 1).  */
-  svfloat64_t q = expm1_inline (u, pg, d);
-  svfloat64_t qp2 = svadd_x (pg, q, 2);
+  /* For sufficiently high inputs, the result of tanh(|x|) is 1 when correctly
+     rounded, at this point we can return 1 directly, with sign correction.
+     This will also act as a guard against our approximation overflowing.  */
+  svfloat64_t y = svsel (large, sv_f64 (1.0), svdiv_x (pg, p, q));
 
-  if (__glibc_unlikely (svptest_any (pg, special)))
-    return special_case (x, svdiv_x (pg, q, qp2), special);
-  return svdiv_x (pg, q, qp2);
+  return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y)));
 }
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 07133eb..a3fef22 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -25,11 +25,15 @@
 
 VPCS_VECTOR_WRAPPER (acos_advsimd, _ZGVnN2v_acos)
 VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
+VPCS_VECTOR_WRAPPER (acospi_advsimd, _ZGVnN2v_acospi)
 VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
 VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
+VPCS_VECTOR_WRAPPER (asinpi_advsimd, _ZGVnN2v_asinpi)
 VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
 VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
+VPCS_VECTOR_WRAPPER (atanpi_advsimd, _ZGVnN2v_atanpi)
 VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
+VPCS_VECTOR_WRAPPER_ff (atan2pi_advsimd, _ZGVnN2vv_atan2pi)
 VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt)
 VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
 VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 02953cb..f4a5ae8 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -44,11 +44,15 @@
 
 SVE_VECTOR_WRAPPER (acos_sve, _ZGVsMxv_acos)
 SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
+SVE_VECTOR_WRAPPER (acospi_sve, _ZGVsMxv_acospi)
 SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
 SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
+SVE_VECTOR_WRAPPER (asinpi_sve, _ZGVsMxv_asinpi)
 SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
 SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
+SVE_VECTOR_WRAPPER (atanpi_sve, _ZGVsMxv_atanpi)
 SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
+SVE_VECTOR_WRAPPER_ff (atan2pi_sve, _ZGVsMxvv_atan2pi)
 SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt)
 SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
 SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 118bbb0..bc22956 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -25,11 +25,15 @@
 
 VPCS_VECTOR_WRAPPER (acosf_advsimd, _ZGVnN4v_acosf)
 VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
+VPCS_VECTOR_WRAPPER (acospif_advsimd, _ZGVnN4v_acospif)
 VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
 VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
+VPCS_VECTOR_WRAPPER (asinpif_advsimd, _ZGVnN4v_asinpif)
 VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
 VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
+VPCS_VECTOR_WRAPPER (atanpif_advsimd, _ZGVnN4v_atanpif)
 VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
+VPCS_VECTOR_WRAPPER_ff (atan2pif_advsimd, _ZGVnN4vv_atan2pif)
 VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf)
 VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
 VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index f5e7c8c..ad0d6ad 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -44,11 +44,15 @@
 
 SVE_VECTOR_WRAPPER (acosf_sve, _ZGVsMxv_acosf)
 SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
+SVE_VECTOR_WRAPPER (acospif_sve, _ZGVsMxv_acospif)
 SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
 SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
+SVE_VECTOR_WRAPPER (asinpif_sve, _ZGVsMxv_asinpif)
 SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
 SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
+SVE_VECTOR_WRAPPER (atanpif_sve, _ZGVsMxv_atanpif)
 SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
+SVE_VECTOR_WRAPPER_ff (atan2pif_sve, _ZGVsMxvv_atan2pif)
 SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf)
 SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
 SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
diff --git a/sysdeps/aarch64/machine-gmon.h b/sysdeps/aarch64/machine-gmon.h
index eba7c24..05323c9 100644
--- a/sysdeps/aarch64/machine-gmon.h
+++ b/sysdeps/aarch64/machine-gmon.h
@@ -27,9 +27,8 @@ static void mcount_internal (u_long frompc, u_long selfpc);
 #define _MCOUNT_DECL(frompc, selfpc) \
 static inline void mcount_internal (u_long frompc, u_long selfpc)
 
-/* Note: strip_pac is needed for frompc because of gcc PR target/94791.  */
 #define MCOUNT                                                    \
 void __mcount (void *frompc)                                      \
 {                                                                 \
-  mcount_internal ((u_long) strip_pac (frompc), (u_long) RETURN_ADDRESS (0)); \
+  mcount_internal ((u_long) frompc, (u_long) RETURN_ADDRESS (0)); \
 }
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index 8dc314b..0e26171 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -36,18 +36,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/aarch64/multiarch/memcpy.c, memmove.c and memset.c.  */
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_oryon1)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memcpy, sve && !bti, __memcpy_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, mops, __memcpy_mops)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
   IFUNC_IMPL (i, name, memmove,
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_oryon1)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memmove, sve && !bti, __memmove_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
   IFUNC_IMPL (i, name, memset,
@@ -55,10 +51,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_oryon1)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
-#if HAVE_AARCH64_SVE_ASM
-	      IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx)
+	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx)
 	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
   IFUNC_IMPL (i, name, memchr,
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index 63c24e7..75b3e08 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -31,7 +31,7 @@
   unsigned __attribute__((unused)) zva_size =				      \
     GLRO(dl_aarch64_cpu_features).zva_size;				      \
   bool __attribute__((unused)) bti =					      \
-    HAVE_AARCH64_BTI && GLRO(dl_aarch64_cpu_features).bti;		      \
+    GLRO(dl_aarch64_cpu_features).bti;					      \
   bool __attribute__((unused)) mte =					      \
     MTE_ENABLED ();							      \
   bool __attribute__((unused)) sve =					      \
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 0e33d19..894dabe 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -43,7 +43,7 @@ select_memcpy_ifunc (void)
   if (mops)
     return __memcpy_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr))
 	return __memcpy_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
index ed18682..acad6e8 100644
--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
@@ -19,9 +19,6 @@
 
 #include <sysdep.h>
 
-#undef BTI_C
-#define BTI_C
-
 /* Assumptions:
  *
  * ARMv8.2-a, AArch64, unaligned accesses, sve
@@ -38,8 +35,6 @@
 #define vlen	x7
 #define vlen8	x8
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 	.macro ld1b_unroll8
@@ -91,9 +86,6 @@
 	st1b	z7.b, p0, [dst, 7, mul vl]
 	.endm
 
-#undef BTI_C
-#define BTI_C
-
 ENTRY (__memcpy_a64fx)
 
 	cntb	vlen
@@ -296,4 +288,3 @@ L(full_overlap):
 	b	L(last_bytes)
 
 END (__memmove_a64fx)
-#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
index 26d4890..0ba6358 100644
--- a/sysdeps/aarch64/multiarch/memcpy_sve.S
+++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
@@ -56,8 +56,6 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 ENTRY (__memcpy_sve)
@@ -199,4 +197,3 @@ L(return):
 	ret
 
 END (__memmove_sve)
-#endif
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index 47b7268..6b0d0ce 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -41,7 +41,7 @@ select_memmove_ifunc (void)
   if (mops)
     return __memmove_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr))
 	return __memmove_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index 872f39f..2b0a58b 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -46,7 +46,7 @@ select_memset_ifunc (void)
   if (mops)
     return __memset_mops;
 
-  if (sve && HAVE_AARCH64_SVE_ASM)
+  if (sve)
     {
       if (IS_A64FX (midr) && zva_size == 256)
 	return __memset_a64fx;
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
index ea60b78..e921240 100644
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -31,8 +31,6 @@
 #define PF_DIST_L1	(CACHE_LINE_SIZE * 16)	// Prefetch distance L1
 #define vector_length	x9
 
-#if HAVE_AARCH64_SVE_ASM
-
 	.arch armv8.2-a+sve
 
 #define dstin   x0
@@ -50,10 +48,6 @@
 	.endif
 	.endm
 
-
-#undef BTI_C
-#define BTI_C
-
 ENTRY (__memset_a64fx)
 
 	cntb	vector_length
@@ -170,5 +164,3 @@ L(L2):
 	b	L(last)
 
 END (__memset_a64fx)
-
-#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
index 7fb40fd..c385e1a 100644
--- a/sysdeps/aarch64/multiarch/memset_sve_zva64.S
+++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
@@ -25,8 +25,6 @@
  * ZVA size is 64.
  */
 
-#if HAVE_AARCH64_SVE_ASM
-
 .arch armv8.2-a+sve
 
 #define dstin	x0
@@ -120,4 +118,3 @@ L(no_zva_loop):
 	ret
 
 END (__memset_sve_zva64)
-#endif
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
index 19657b6..e1b772c 100644
--- a/sysdeps/aarch64/preconfigure
+++ b/sysdeps/aarch64/preconfigure
@@ -3,5 +3,6 @@ aarch64*)
 	base_machine=aarch64
 	machine=aarch64
 	mtls_descriptor=desc
+	mtls_traditional=trad
 	;;
 esac
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index d82d62c..53c5e7d 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -35,6 +35,20 @@ libc_hidden_def (_setjmp)
 
 ENTRY_ALIGN (__sigsetjmp, 2)
 1:
+
+#if IS_IN(libc)
+	/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so.
+	   The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
+	bl	__libc_arm_za_disable
+	mov	x30, x13
+	cfi_register (x13, x30)
+#endif
+
 	stp	x19, x20, [x0, #JB_X19<<3]
 	stp	x21, x22, [x0, #JB_X21<<3]
 	stp	x23, x24, [x0, #JB_X23<<3]
@@ -73,7 +87,7 @@ L(gcs_done):
 #if IS_IN (rtld)
 	/* In ld.so we never save the signal mask */
 	mov	w0, #0
-	RET
+	ret
 #else
 	b	C_SYMBOL_NAME(__sigjmp_save)
 #endif
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
index 544e397..694c338 100644
--- a/sysdeps/aarch64/start.S
+++ b/sysdeps/aarch64/start.S
@@ -108,7 +108,7 @@ ENTRY(_start)
 	   because crt1.o and rcrt1.o share code and the later must avoid the
 	   use of GOT relocations before __libc_start_main is called.  */
 __wrap_main:
-	BTI_C
+	bti	c
 	b	main
 #endif
 END(_start)
diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h
index 7781b37..a3322a9 100644
--- a/sysdeps/aarch64/sys/ifunc.h
+++ b/sysdeps/aarch64/sys/ifunc.h
@@ -19,24 +19,77 @@
 #ifndef _SYS_IFUNC_H
 #define _SYS_IFUNC_H
 
+#include <sys/cdefs.h>
+
 /* A second argument is passed to the ifunc resolver.  */
 #define _IFUNC_ARG_HWCAP	(1ULL << 62)
 
-/* The prototype of a gnu indirect function resolver on AArch64 is
+/* Maximum number of HWCAP elements that are currently supported.  */
+#define _IFUNC_HWCAP_MAX	4
+
+/* The prototype of a GNU indirect function resolver on AArch64 is
+
+     ElfW(Addr) ifunc_resolver (uint64_t, const uint64_t *);
+
+   The following prototype is also compatible:
 
      ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *);
 
-   the first argument should have the _IFUNC_ARG_HWCAP bit set and
-   the remaining bits should match the AT_HWCAP settings.  */
+   The first argument might have the _IFUNC_ARG_HWCAP bit set and
+   the remaining bits should match the AT_HWCAP settings.
+
+   If the _IFUNC_ARG_HWCAP bit is set in the first argument, then
+   the second argument is passed to the resolver function.  In
+   this case, the second argument is a const pointer to a buffer
+   that allows to access all available HWCAP elements.
+
+   This buffer has its size in bytes at offset 0.  The HWCAP elements
+   are available at offsets 8, 16, 24, 32... respectively for AT_HWCAP,
+   AT_HWCAP2, AT_HWCAP3, AT_HWCAP4...  (these offsets are multiples of
+   sizeof (unsigned long)).
+
+   Indirect function resolvers must check availability of HWCAP
+   elements at runtime before accessing them using the size of the
+   buffer.  */
 
-/* Second argument to an ifunc resolver.  */
 struct __ifunc_arg_t
 {
-  unsigned long _size; /* Size of the struct, so it can grow.  */
+  unsigned long _size;    /* Size of the struct, so it can grow.  */
   unsigned long _hwcap;
-  unsigned long _hwcap2;
+  unsigned long _hwcap2;  /* End of 1st published struct.  */
+  unsigned long _hwcap3;
+  unsigned long _hwcap4;  /* End of 2nd published struct.  */
 };
 
 typedef struct __ifunc_arg_t __ifunc_arg_t;
 
+/* Constants for IDs of HWCAP elements to be used with the
+   __ifunc_hwcap function below.  */
+enum
+{
+  _IFUNC_ARG_AT_HWCAP = 1,
+  _IFUNC_ARG_AT_HWCAP2 = 2,
+  _IFUNC_ARG_AT_HWCAP3 = 3,
+  _IFUNC_ARG_AT_HWCAP4 = 4,
+};
+
+/* A helper function to obtain HWCAP element by its ID from the
+   parameters ARG0 and ARG1 passed to the ifunc resolver.  Note that
+   ID 1 corresponds to AT_HWCAP, ID 2 corresponds to AT_HWCAP2, etc.
+   If there is no element available for the requested ID then 0 is
+   returned.  If ID doesn't much any supported AT_HWCAP{,2,...} value,
+   then 0 is also returned.  */
+static __inline unsigned long __attribute__ ((unused, always_inline))
+__ifunc_hwcap (unsigned long __id,
+	       unsigned long __arg0, const unsigned long *__arg1)
+{
+  if (__glibc_likely (__arg0 & _IFUNC_ARG_HWCAP))
+    {
+      const unsigned long size = __arg1[0];
+      const unsigned long offset = __id * sizeof (unsigned long);
+      return offset < size && __id > 0 ? __arg1[__id] : 0;
+    }
+  return __id == 1 ? __arg0 : 0;
+}
+
 #endif
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index 9424115..f5e28cb 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -21,43 +21,15 @@
 
 #include <sysdeps/generic/sysdep.h>
 
-#ifndef __ASSEMBLER__
-/* Strip pointer authentication code from pointer p.  */
-static inline void *
-strip_pac (void *p)
-{
-  register void *ra asm ("x30") = (p);
-  asm ("hint 7 // xpaclri" : "+r"(ra));
-  return ra;
-}
-
-/* This is needed when glibc is built with -mbranch-protection=pac-ret
-   with a gcc that is affected by PR target/94891.  */
-# if HAVE_AARCH64_PAC_RET
-#  undef RETURN_ADDRESS
-#  define RETURN_ADDRESS(n) strip_pac (__builtin_return_address (n))
-# endif
-#endif
-
 #ifdef	__ASSEMBLER__
 
+/* CFI directive for return address.  */
+#define cfi_negate_ra_state	.cfi_negate_ra_state
+
 /* Syntactic details of assembler.  */
 
 #define ASM_SIZE_DIRECTIVE(name) .size name,.-name
 
-/* Branch Target Identitication support.  */
-#if HAVE_AARCH64_BTI
-# define BTI_C		hint	34
-# define BTI_J		hint	36
-#else
-# define BTI_C		nop
-# define BTI_J		nop
-#endif
-
-/* Return address signing support (pac-ret).  */
-#define PACIASP		hint	25
-#define AUTIASP		hint	29
-
 /* Guarded Control Stack support.  */
 #define CHKFEAT_X16	hint	40
 #define MRS_GCSPR(x)	mrs	x, s3_3_c2_c5_1
@@ -87,11 +59,7 @@ strip_pac (void *p)
 
 /* Add GNU property note with the supported features to all asm code
    where sysdep.h is included.  */
-#if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET
 GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS)
-#elif HAVE_AARCH64_BTI
-GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
-#endif
 
 /* Define an entry point visible from C.  */
 #define ENTRY(name)						\
@@ -100,7 +68,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .p2align 6;							\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 /* Define an entry point visible from C.  */
@@ -110,7 +78,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .p2align align;						\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 /* Define an entry point visible from C with a specified alignment and
@@ -127,7 +95,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
   .endr;							\
   C_LABEL(name)							\
   cfi_startproc;						\
-  BTI_C;							\
+  bti	c;							\
   CALL_MCOUNT
 
 #undef	END
diff --git a/sysdeps/aarch64/tst-ifunc-arg-1.c b/sysdeps/aarch64/tst-ifunc-arg-1.c
index b90c836..292c5ae 100644
--- a/sysdeps/aarch64/tst-ifunc-arg-1.c
+++ b/sysdeps/aarch64/tst-ifunc-arg-1.c
@@ -57,6 +57,21 @@ do_test (void)
   TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t));
   TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP));
   TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2));
+  TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3));
+  TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4));
+
+  const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2;
+
+  TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr),
+		getauxval (AT_HWCAP));
+  TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP2));
+  TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP3));
+  TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP4));
+
+
   return 0;
 }
 
diff --git a/sysdeps/aarch64/tst-ifunc-arg-2.c b/sysdeps/aarch64/tst-ifunc-arg-2.c
index dac144d..c05129a 100644
--- a/sysdeps/aarch64/tst-ifunc-arg-2.c
+++ b/sysdeps/aarch64/tst-ifunc-arg-2.c
@@ -60,6 +60,20 @@ do_test (void)
   TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t));
   TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP));
   TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2));
+  TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3));
+  TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4));
+
+  const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2;
+
+  TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP));
+  TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP2));
+  TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP3));
+  TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr),
+                getauxval (AT_HWCAP4));
+
   return 0;
 }
 
diff --git a/sysdeps/aarch64/tst-ifunc-arg-3.c b/sysdeps/aarch64/tst-ifunc-arg-3.c
new file mode 100644
index 0000000..49d8866
--- /dev/null
+++ b/sysdeps/aarch64/tst-ifunc-arg-3.c
@@ -0,0 +1,97 @@
+/* Tests for __ifunc_hwcap helper function.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include <sys/ifunc.h>
+#include <support/check.h>
+
+#define CHECK_VALUES_WITH_ARG(p1, p2, p3, p4) \
+  ({ \
+    TEST_COMPARE (__ifunc_hwcap (0, _IFUNC_ARG_HWCAP, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, _IFUNC_ARG_HWCAP, arg), p1); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, _IFUNC_ARG_HWCAP, arg), p2); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, _IFUNC_ARG_HWCAP, arg), p3); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, _IFUNC_ARG_HWCAP, arg), p4); \
+    TEST_COMPARE (__ifunc_hwcap (5, _IFUNC_ARG_HWCAP, arg), 0); \
+  })
+
+#define CHECK_VALUES_WITHOUT_ARG(p1) \
+  ({ \
+    TEST_COMPARE (__ifunc_hwcap (0, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, p1, arg), p1); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, p1, arg), 0); \
+    TEST_COMPARE (__ifunc_hwcap (5, p1, arg), 0); \
+  })
+
+static void
+test_one (const unsigned long *arg)
+{
+  uint64_t size = arg[0] / sizeof (uint64_t);
+
+  switch (size)
+    {
+      case 1:
+	CHECK_VALUES_WITH_ARG (0, 0, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (0);
+	break;
+      case 2:
+	CHECK_VALUES_WITH_ARG (1, 0, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 3:
+	CHECK_VALUES_WITH_ARG (1, 2, 0, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 4:
+	CHECK_VALUES_WITH_ARG (1, 2, 3, 0);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      case 5:
+	CHECK_VALUES_WITH_ARG (1, 2, 3, 4);
+	CHECK_VALUES_WITHOUT_ARG (1);
+	break;
+      default:
+	TEST_VERIFY (0); // unexpected size
+	break;
+    }
+}
+
+static int
+do_test (void)
+{
+  uint64_t arg[_IFUNC_HWCAP_MAX + 1] = {
+    0, /* Placeholder for size */
+    _IFUNC_ARG_AT_HWCAP, /* AT_HWCAP */
+    _IFUNC_ARG_AT_HWCAP2, /* AT_HWCAP2 */
+    _IFUNC_ARG_AT_HWCAP3, /* AT_HWCAP3 */
+    _IFUNC_ARG_AT_HWCAP4, /* AT_HWCAP4 */
+  };
+
+  for (int k = 0; k <= _IFUNC_HWCAP_MAX; k++)
+    {
+      /* Update size */
+      arg[0] = (k + 1) * sizeof (uint64_t);
+      test_one (arg);
+    }
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-ifunc-arg-4.c b/sysdeps/aarch64/tst-ifunc-arg-4.c
new file mode 100644
index 0000000..c95ef9e
--- /dev/null
+++ b/sysdeps/aarch64/tst-ifunc-arg-4.c
@@ -0,0 +1,67 @@
+/* Test for ifunc resolver that uses __ifunc_hwcap helper function.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include <sys/ifunc.h>
+#include <support/check.h>
+
+static int
+one (void)
+{
+  return 1;
+}
+
+static int
+two (void)
+{
+  return 2;
+}
+
+/* Resolver function.  */
+static void *
+resolver (uint64_t arg0, const uint64_t arg1[])
+{
+  uint64_t hwcap2 = __ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, arg0, arg1);
+  if (hwcap2 & HWCAP2_POE)
+    return (void *)one;
+  else
+    return (void *)two;
+}
+
+/* An extern visible ifunc symbol.  */
+int fun (void) __attribute__((ifunc ("resolver")));
+
+static int
+do_test (void)
+{
+  if (getauxval (AT_HWCAP2) & HWCAP2_POE)
+    {
+      printf ("using 1st implementation\n");
+      TEST_VERIFY (fun () == 1);
+    }
+  else
+    {
+      printf ("using 2nd implementation\n");
+      TEST_VERIFY (fun () == 2);
+    }
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h
new file mode 100644
index 0000000..f049416
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-helper.h
@@ -0,0 +1,97 @@
+/* Utility functions for SME tests.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Streaming SVE vector register size.  */
+static unsigned long svl;
+
+struct blk {
+  void *za_save_buffer;
+  uint16_t num_za_save_slices;
+  char __reserved[6];
+};
+
+/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
+static unsigned long
+get_svcr (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
+    : "=r" (x0));
+  return x0;
+}
+
+/* Returns tpidr2.  */
+static void *
+get_tpidr2 (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
+    : "=r"(x0) :: "memory");
+  return (void *) x0;
+}
+
+/* Obtains current streaming SVE vector register size.  */
+static unsigned long
+get_svl (void)
+{
+  register unsigned long x0 asm ("x0");
+  asm volatile (
+    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
+    : "=r" (x0));
+  return x0;
+}
+
+/* PSTATE.ZA = 1, set ZA state to active.  */
+static void
+start_za (void)
+{
+  asm volatile (
+    ".inst   0xd503457f  /* smstart za  */");
+}
+
+/* Load data into ZA byte by byte from p.  */
+static void __attribute__ ((noinline))
+load_za (const void *p)
+{
+  register unsigned long x15 asm ("x15") = 0;
+  register unsigned long x16 asm ("x16") = (unsigned long)p;
+  register unsigned long x17 asm ("x17") = svl;
+
+  asm volatile (
+    ".inst   0xd503437f  /* smstart sm  */\n"
+    ".L_ldr_loop:\n"
+    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
+    "add     w15, w15, 1\n"
+    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
+    "cmp     w15, w17\n"
+    "bne     .L_ldr_loop\n"
+    ".inst   0xd503427f  /* smstop  sm  */\n"
+    : "+r"(x15), "+r"(x16), "+r"(x17));
+}
+
+/* Set tpidr2 to BLK.  */
+static void
+set_tpidr2 (struct blk *blk)
+{
+  register unsigned long x0 asm ("x0") = (unsigned long)blk;
+  asm volatile (
+    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
+    :: "r"(x0) : "memory");
+}
diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c
index 62c419f..103897a 100644
--- a/sysdeps/aarch64/tst-sme-jmp.c
+++ b/sysdeps/aarch64/tst-sme-jmp.c
@@ -27,87 +27,12 @@
 #include <support/support.h>
 #include <support/test-driver.h>
 
-struct blk {
-  void *za_save_buffer;
-  uint16_t num_za_save_slices;
-  char __reserved[6];
-};
+#include "tst-sme-helper.h"
 
-static unsigned long svl;
 static uint8_t *za_orig;
 static uint8_t *za_dump;
 static uint8_t *za_save;
 
-static unsigned long
-get_svl (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0x04bf5820  /* rdsvl   x0, 1  */\n"
-    : "=r" (x0));
-  return x0;
-}
-
-/* PSTATE.ZA = 1, set ZA state to active.  */
-static void
-start_za (void)
-{
-  asm volatile (
-    ".inst   0xd503457f  /* smstart za  */");
-}
-
-/* Read SVCR to get SM (bit0) and ZA (bit1) state.  */
-static unsigned long
-get_svcr (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0xd53b4240  /* mrs     x0, svcr  */\n"
-    : "=r" (x0));
-  return x0;
-}
-
-/* Load data into ZA byte by byte from p.  */
-static void __attribute__ ((noinline))
-load_za (const void *p)
-{
-  register unsigned long x15 asm ("x15") = 0;
-  register unsigned long x16 asm ("x16") = (unsigned long)p;
-  register unsigned long x17 asm ("x17") = svl;
-
-  asm volatile (
-    ".inst   0xd503437f  /* smstart sm  */\n"
-    ".L_ldr_loop:\n"
-    ".inst   0xe1006200  /* ldr     za[w15, 0], [x16]  */\n"
-    "add     w15, w15, 1\n"
-    ".inst   0x04305030  /* addvl   x16, x16, 1  */\n"
-    "cmp     w15, w17\n"
-    "bne     .L_ldr_loop\n"
-    ".inst   0xd503427f  /* smstop  sm  */\n"
-    : "+r"(x15), "+r"(x16), "+r"(x17));
-}
-
-/* Set tpidr2 to BLK.  */
-static void
-set_tpidr2 (struct blk *blk)
-{
-  register unsigned long x0 asm ("x0") = (unsigned long)blk;
-  asm volatile (
-    ".inst   0xd51bd0a0  /* msr     tpidr2_el0, x0  */\n"
-    :: "r"(x0) : "memory");
-}
-
-/* Returns tpidr2.  */
-static void *
-get_tpidr2 (void)
-{
-  register unsigned long x0 asm ("x0");
-  asm volatile (
-    ".inst   0xd53bd0a0  /* mrs     x0, tpidr2_el0  */\n"
-    : "=r"(x0) :: "memory");
-  return (void *) x0;
-}
-
 static void
 print_data(const char *msg, void *p)
 {
@@ -168,8 +93,8 @@ longjmp_test (void)
     {
       p = get_tpidr2 ();
       printf ("before longjmp: tp2 = %p\n", p);
-      if (p != &blk)
-	FAIL_EXIT1 ("tpidr2 is clobbered");
+      if (p != NULL)
+	FAIL_EXIT1 ("tpidr2 has not been reset to null");
       do_longjmp (env);
       FAIL_EXIT1 ("longjmp returned");
     }
diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c
new file mode 100644
index 0000000..63f6eeb
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-za-state.c
@@ -0,0 +1,119 @@
+/* Test for SME ZA state being cleared on setjmp and longjmp.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+
+#include "tst-sme-helper.h"
+
+static uint8_t *state;
+
+static void
+enable_sme_za_state (struct blk *ptr)
+{
+  set_tpidr2 (ptr);
+  start_za ();
+  load_za (state);
+}
+
+static void
+check_sme_za_state (const char msg[], bool clear)
+{
+  unsigned long svcr = get_svcr ();
+  void *tpidr2 = get_tpidr2 ();
+  printf ("[%s]\n", msg);
+  printf ("svcr = %016lx\n", svcr);
+  printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
+  if (clear)
+    {
+      TEST_VERIFY (svcr == 0);
+      TEST_VERIFY (tpidr2 == NULL);
+    }
+  else
+    {
+      TEST_VERIFY (svcr != 0);
+      TEST_VERIFY (tpidr2 != NULL);
+    }
+}
+
+static void
+run (struct blk *ptr)
+{
+  jmp_buf buf;
+  int ret;
+
+  check_sme_za_state ("initial state", /* Clear.  */ true);
+
+  /* Enabled ZA state so that effect of disabling be observable.  */
+  enable_sme_za_state (ptr);
+  check_sme_za_state ("before setjmp", /* Clear.  */ false);
+
+  if ((ret = setjmp (buf)) == 0)
+    {
+      check_sme_za_state ("after setjmp", /* Clear.  */ true);
+
+      /* Enabled ZA state so that effect of disabling be observable.  */
+      enable_sme_za_state (ptr);
+      check_sme_za_state ("before longjmp", /* Clear.  */ false);
+
+      longjmp (buf, 42);
+
+      /* Unreachable.  */
+      TEST_VERIFY (false);
+      __builtin_unreachable ();
+    }
+
+  TEST_COMPARE (ret, 42);
+  check_sme_za_state ("after longjmp", /* Clear.  */ true);
+}
+
+static int
+do_test (void)
+{
+  unsigned long hwcap2 = getauxval (AT_HWCAP2);
+  if ((hwcap2 & HWCAP2_SME) == 0)
+    return EXIT_UNSUPPORTED;
+
+  /* Get current streaming SVE vector register size.  */
+  svl = get_svl ();
+  printf ("svl: %lu\n", svl);
+  TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
+
+  /* Initialise buffer for ZA state of SME.  */
+  state = xmalloc (svl * svl);
+  memset (state, 1, svl * svl);
+  struct blk blk = {
+    .za_save_buffer = state,
+    .num_za_save_slices = svl,
+    .__reserved = {0},
+  };
+
+  run (&blk);
+
+  free (state);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
index 3ed75dd..1be63b7 100644
--- a/sysdeps/generic/Makefile
+++ b/sysdeps/generic/Makefile
@@ -21,6 +21,9 @@ CFLAGS-wordcopy.c += -Wno-uninitialized
 endif
 
 ifeq ($(subdir),elf)
+ifeq ($(enable-gsframe),yes)
+sysdep_routines += sframe-read sframe
+endif
 ifeq (yes:yes,$(build-shared):$(unwind-find-fde))
 # This is needed to support g++ v2 and v3.
 sysdep_routines += framestate unwind-pe
diff --git a/sysdeps/generic/getrandom-internal.h b/sysdeps/generic/getrandom-internal.h
index 7c54194..4872598 100644
--- a/sysdeps/generic/getrandom-internal.h
+++ b/sysdeps/generic/getrandom-internal.h
@@ -19,7 +19,7 @@
 #ifndef _GETRANDOM_INTERNAL_H
 #define _GETRANDOM_INTERNAL_H
 
-static inline void __getrandom_early_init (_Bool)
+static inline void __getrandom_early_init (_Bool initial)
 {
 }
 
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index fc4a3de..74025f1 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -368,8 +368,6 @@ struct rtld_global
       size_t n_elements;
       void (*free) (void *);
     } _ns_unique_sym_table;
-    /* Keep track of changes to each namespace' list.  */
-    struct r_debug_extended _ns_debug;
   } _dl_ns[DL_NNS];
   /* One higher than index of last used namespace.  */
   EXTERN size_t _dl_nns;
@@ -1089,15 +1087,29 @@ extern void _dl_debug_state (void);
 rtld_hidden_proto (_dl_debug_state)
 
 /* Initialize `struct r_debug_extended' for the namespace NS.  LDBASE
-   is the run-time load address of the dynamic linker, to be put in the
-   `r_ldbase' member.  Return the address of the structure.  */
+   is the run-time load address of the dynamic linker, to be put in
+   the `r_ldbase' member.
+
+   Return the address of the r_debug structure for the namespace.
+   This is not merely a convenience or optimization, but it is
+   necessary for the LIBC_PROBE Systemtap/debugger probes to work
+   reliably: direct variable access can create probes that tools
+   cannot consume.  */
 extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
      attribute_hidden;
 
+/* This is called after relocation processing to handle a potential
+   copy relocation for _r_debug.  */
+void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden;
+
 /* Update the `r_map' member and return the address of `struct r_debug'
    of the namespace NS.  */
 extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden;
 
+/* Update R->r_state to STATE and notify the debugger by calling
+   _dl_debug_state.  */
+void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden;
+
 /* Initialize the basic data structure for the search paths.  SOURCE
    is either "LD_LIBRARY_PATH" or "--library-path".
    GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to
diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h
deleted file mode 100644
index b95e409..0000000
--- a/sysdeps/generic/libc-tsd.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* libc-internal interface for thread-specific data.  Stub or TLS version.
-   Copyright (C) 1998-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _GENERIC_LIBC_TSD_H
-#define _GENERIC_LIBC_TSD_H 1
-
-/* This file defines the following macros for accessing a small fixed
-   set of thread-specific `void *' data used only internally by libc.
-
-   __libc_tsd_define(CLASS, TYPE, KEY)	-- Define or declare a datum with TYPE
-					   for KEY.  CLASS can be `static' for
-					   keys used in only one source file,
-					   empty for global definitions, or
-					   `extern' for global declarations.
-   __libc_tsd_address(TYPE, KEY)	-- Return the `TYPE *' pointing to
-					   the current thread's datum for KEY.
-   __libc_tsd_get(TYPE, KEY)		-- Return the `TYPE' datum for KEY.
-   __libc_tsd_set(TYPE, KEY, VALUE)	-- Set the datum for KEY to VALUE.
-
-   The set of available KEY's will usually be provided as an enum,
-   and contains (at least):
-		_LIBC_TSD_KEY_MALLOC
-		_LIBC_TSD_KEY_DL_ERROR
-		_LIBC_TSD_KEY_RPC_VARS
-   All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros.
-   Some implementations may not provide any enum at all and instead
-   using string pasting in the macros.  */
-
-#include <tls.h>
-
-/* When full support for __thread variables is available, this interface is
-   just a trivial wrapper for it.  Without TLS, this is the generic/stub
-   implementation for wholly single-threaded systems.
-
-   We don't define an enum for the possible key values, because the KEYs
-   translate directly into variables by macro magic.  */
-
-#define __libc_tsd_define(CLASS, TYPE, KEY)	\
-  CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie;
-
-#define __libc_tsd_address(TYPE, KEY)		(&__libc_tsd_##KEY)
-#define __libc_tsd_get(TYPE, KEY)		(__libc_tsd_##KEY)
-#define __libc_tsd_set(TYPE, KEY, VALUE)	(__libc_tsd_##KEY = (VALUE))
-
-#endif	/* libc-tsd.h */
diff --git a/sysdeps/generic/sframe-read.c b/sysdeps/generic/sframe-read.c
new file mode 100644
index 0000000..a6ebc42
--- /dev/null
+++ b/sysdeps/generic/sframe-read.c
@@ -0,0 +1,636 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <sframe-read.h>
+
+/* Get the SFrame header size.  */
+
+static inline uint32_t
+sframe_get_hdr_size (sframe_header *sfh)
+{
+  return SFRAME_V1_HDR_SIZE (*sfh);
+}
+
+/* Access functions for frame row entry data.  */
+
+static inline uint8_t
+sframe_fre_get_offset_count (uint8_t fre_info)
+{
+  return SFRAME_V1_FRE_OFFSET_COUNT (fre_info);
+}
+
+static inline uint8_t
+sframe_fre_get_offset_size (uint8_t fre_info)
+{
+  return SFRAME_V1_FRE_OFFSET_SIZE (fre_info);
+}
+
+static inline bool
+sframe_get_fre_ra_mangled_p (uint8_t fre_info)
+{
+  return SFRAME_V1_FRE_MANGLED_RA_P (fre_info);
+}
+
+/* Access functions for info from function descriptor entry.  */
+
+static uint32_t
+sframe_get_fre_type (sframe_func_desc_entry *fdep)
+{
+  uint32_t fre_type = 0;
+  if (fdep != NULL)
+    fre_type = SFRAME_V1_FUNC_FRE_TYPE (fdep->sfde_func_info);
+  return fre_type;
+}
+
+static uint32_t
+sframe_get_fde_type (sframe_func_desc_entry *fdep)
+{
+  uint32_t fde_type = 0;
+  if (fdep != NULL)
+    fde_type = SFRAME_V1_FUNC_FDE_TYPE (fdep->sfde_func_info);
+  return fde_type;
+}
+
+/* Check if SFrame header has valid data.  Only consider SFrame type
+   2.  */
+
+static bool
+sframe_header_sanity_check_p (sframe_header *hp)
+{
+  /* Check preamble is valid.  */
+  if ((hp->sfh_preamble.sfp_magic != SFRAME_MAGIC)
+      || (hp->sfh_preamble.sfp_version != SFRAME_VERSION_2)
+      || (hp->sfh_preamble.sfp_flags & ~SFRAME_V2_F_ALL_FLAGS))
+    return false;
+
+  /* Check offsets are valid.  */
+  if (hp->sfh_fdeoff > hp->sfh_freoff)
+    return false;
+
+  return true;
+}
+
+/* Get the FRE start address size.  */
+
+static size_t
+sframe_fre_start_addr_size (uint32_t fre_type)
+{
+  size_t addr_size = 0;
+  switch (fre_type)
+    {
+    case SFRAME_FRE_TYPE_ADDR1:
+      addr_size = 1;
+      break;
+    case SFRAME_FRE_TYPE_ADDR2:
+      addr_size = 2;
+      break;
+    case SFRAME_FRE_TYPE_ADDR4:
+      addr_size = 4;
+      break;
+    default:
+      break;
+    }
+  return addr_size;
+}
+
+/* Check if the FREP has valid data.  */
+
+static bool
+sframe_fre_sanity_check_p (sframe_frame_row_entry *frep)
+{
+  uint8_t offset_size, offset_cnt;
+  uint8_t fre_info;
+
+  if (frep == NULL)
+    return false;
+
+  fre_info = frep->fre_info;
+  offset_size = sframe_fre_get_offset_size (fre_info);
+
+  if (offset_size != SFRAME_FRE_OFFSET_1B
+      && offset_size != SFRAME_FRE_OFFSET_2B
+      && offset_size != SFRAME_FRE_OFFSET_4B)
+    return false;
+
+  offset_cnt = sframe_fre_get_offset_count (fre_info);
+  if (offset_cnt > MAX_NUM_STACK_OFFSETS)
+    return false;
+
+  return true;
+}
+
+/* Get FRE_INFO's offset size in bytes.  */
+
+static size_t
+sframe_fre_offset_bytes_size (uint8_t fre_info)
+{
+  uint8_t offset_size, offset_cnt;
+
+  offset_size = sframe_fre_get_offset_size (fre_info);
+
+  offset_cnt = sframe_fre_get_offset_count (fre_info);
+
+  if (offset_size == SFRAME_FRE_OFFSET_2B
+      || offset_size == SFRAME_FRE_OFFSET_4B)	/* 2 or 4 bytes.  */
+    return (offset_cnt * (offset_size * 2));
+
+  return (offset_cnt);
+}
+
+/* Get total size in bytes to represent FREP in the binary format.  This
+   includes the starting address, FRE info, and all the offsets.  */
+
+static size_t
+sframe_fre_entry_size (sframe_frame_row_entry *frep, size_t addr_size)
+{
+  if (frep == NULL)
+    return 0;
+
+  uint8_t fre_info = frep->fre_info;
+
+  return (addr_size + sizeof (frep->fre_info)
+	  + sframe_fre_offset_bytes_size (fre_info));
+}
+
+/* Get SFrame header from the given decoder context DCTX.  */
+
+static inline sframe_header *
+sframe_decoder_get_header (sframe_decoder_ctx *dctx)
+{
+  sframe_header *hp = NULL;
+  if (dctx != NULL)
+    hp = &dctx->sfd_header;
+  return hp;
+}
+
+/* Get the offset of the sfde_func_start_address field (from the start of the
+   on-disk layout of the SFrame section) of the FDE at FUNC_IDX in the decoder
+   context DCTX.  */
+
+static uint32_t
+sframe_decoder_get_offsetof_fde_start_addr (sframe_decoder_ctx *dctx,
+					    uint32_t func_idx,
+					    _Unwind_Reason_Code *errp)
+{
+  sframe_header *dhp;
+
+  dhp = sframe_decoder_get_header (dctx);
+  if (dhp == NULL)
+    {
+      if (errp != NULL)
+	*errp = _URC_END_OF_STACK;
+      return 0;
+    }
+
+  if (func_idx >= dhp->sfh_num_fdes)
+    {
+      if (errp != NULL)
+	*errp = _URC_END_OF_STACK;
+      return 0;
+    }
+  else if (errp != NULL)
+    *errp = _URC_NO_REASON;
+
+  return (sframe_get_hdr_size (dhp)
+	  + func_idx * sizeof (sframe_func_desc_entry)
+	  + offsetof (sframe_func_desc_entry, sfde_func_start_address));
+}
+
+
+/* Get the offset of the start PC of the SFrame FDE at FUNC_IDX from
+   the start of the SFrame section. If the flag
+   SFRAME_F_FDE_FUNC_START_PCREL is set, sfde_func_start_address is
+   the offset of the start PC of the function from the field itself.
+
+   If FUNC_IDX is not a valid index in the given decoder object, returns 0.  */
+
+static int32_t
+sframe_decoder_get_secrel_func_start_addr (sframe_decoder_ctx *dctx,
+					   uint32_t func_idx)
+{
+  int32_t func_start_addr;
+  _Unwind_Reason_Code err = 0;
+  int32_t offsetof_fde_in_sec = 0;
+
+  /* Check if we have SFRAME_F_FDE_FUNC_START_PCREL.  */
+  sframe_header *sh = &dctx->sfd_header;
+  if ((sh->sfh_preamble.sfp_flags & SFRAME_F_FDE_FUNC_START_PCREL))
+    {
+      offsetof_fde_in_sec =
+	sframe_decoder_get_offsetof_fde_start_addr (dctx, func_idx, &err);
+      /* If func_idx is not a valid index, return 0.  */
+      if (err == _URC_END_OF_STACK)
+	return 0;
+    }
+
+  func_start_addr = dctx->sfd_funcdesc[func_idx].sfde_func_start_address;
+
+  return func_start_addr + offsetof_fde_in_sec;
+}
+
+/* Check if the SFrame Frame Row Entry identified via the
+   START_IP_OFFSET and the END_IP_OFFSET (for SFrame FDE at
+   FUNC_IDX).  */
+
+static bool
+sframe_fre_check_range_p (sframe_decoder_ctx *dctx, uint32_t func_idx,
+			  uint32_t start_ip_offset, uint32_t end_ip_offset,
+			  int32_t pc)
+{
+  sframe_func_desc_entry *fdep;
+  int32_t func_start_addr;
+  uint8_t rep_block_size;
+  uint32_t fde_type;
+  uint32_t pc_offset;
+  bool mask_p;
+
+  fdep = &dctx->sfd_funcdesc[func_idx];
+  if (fdep == NULL)
+    return false;
+
+  func_start_addr = sframe_decoder_get_secrel_func_start_addr (dctx, func_idx);
+  fde_type = sframe_get_fde_type (fdep);
+  mask_p = (fde_type == SFRAME_FDE_TYPE_PCMASK);
+  rep_block_size = fdep->sfde_func_rep_size;
+
+  if (func_start_addr > pc)
+    return false;
+
+  /* Given func_start_addr <= pc, pc - func_start_addr must be positive.  */
+  pc_offset = pc - func_start_addr;
+  /* For SFrame FDEs encoding information for repetitive pattern of insns,
+     masking with the rep_block_size is necessary to find the matching FRE.  */
+  if (mask_p)
+    pc_offset = pc_offset % rep_block_size;
+
+  return (start_ip_offset <= pc_offset) && (end_ip_offset >= pc_offset);
+}
+
+/* Get IDX'th offset from FRE.  Set ERRP as applicable.  */
+
+static int32_t
+sframe_get_fre_offset (sframe_frame_row_entry *fre,
+		       int idx,
+		       _Unwind_Reason_Code *errp)
+{
+  uint8_t offset_cnt, offset_size;
+
+  if (!sframe_fre_sanity_check_p (fre))
+    {
+      *errp = _URC_END_OF_STACK;
+      return 0;
+    }
+
+  offset_cnt = sframe_fre_get_offset_count (fre->fre_info);
+  offset_size = sframe_fre_get_offset_size (fre->fre_info);
+
+  if (offset_cnt < (idx + 1))
+    {
+      *errp = _URC_END_OF_STACK;
+      return 0;
+    }
+  *errp = _URC_NO_REASON;
+
+  if (offset_size == SFRAME_FRE_OFFSET_1B)
+    {
+      int8_t *sp = (int8_t *)fre->fre_offsets;
+      return sp[idx];
+    }
+  else if (offset_size == SFRAME_FRE_OFFSET_2B)
+    {
+      int16_t *sp = (int16_t *)fre->fre_offsets;
+      return sp[idx];
+    }
+  else
+    {
+      int32_t *ip = (int32_t *)fre->fre_offsets;
+      return ip[idx];
+    }
+}
+
+/* Decode the SFrame FRE start address offset value from FRE_BUF in on-disk
+   binary format, given the FRE_TYPE.  Updates the FRE_START_ADDR.  */
+
+static void
+sframe_decode_fre_start_address (const char *fre_buf,
+				 uint32_t *fre_start_addr,
+				 uint32_t fre_type)
+{
+  uint32_t saddr = 0;
+
+  if (fre_type == SFRAME_FRE_TYPE_ADDR1)
+    {
+      uint8_t *uc = (uint8_t *)fre_buf;
+      saddr = (uint32_t)*uc;
+    }
+  else if (fre_type == SFRAME_FRE_TYPE_ADDR2)
+    {
+      uint16_t *ust = (uint16_t *)fre_buf;
+      saddr = (uint32_t)*ust;
+    }
+  else if (fre_type == SFRAME_FRE_TYPE_ADDR4)
+    {
+      uint32_t *uit = (uint32_t *)fre_buf;
+      saddr = (uint32_t)*uit;
+    }
+  else
+    return;
+
+  *fre_start_addr = saddr;
+}
+
+/* Find the function descriptor entry starting which contains the specified
+   address ADDR.  */
+
+static sframe_func_desc_entry *
+sframe_get_funcdesc_with_addr_internal (sframe_decoder_ctx *ctx, int32_t addr,
+					int *errp, uint32_t *func_idx)
+{
+  sframe_header *dhp;
+  sframe_func_desc_entry *fdp;
+  int low, high;
+
+  if (ctx == NULL)
+    return NULL;
+
+  dhp = sframe_decoder_get_header (ctx);
+
+  if (dhp == NULL || dhp->sfh_num_fdes == 0 || ctx->sfd_funcdesc == NULL)
+    return NULL;
+  /* If the FDE sub-section is not sorted on PCs, skip the lookup because
+     binary search cannot be used.  */
+  if ((dhp->sfh_preamble.sfp_flags & SFRAME_F_FDE_SORTED) == 0)
+    return NULL;
+
+  /* Do the binary search.  */
+  fdp = (sframe_func_desc_entry *) ctx->sfd_funcdesc;
+  low = 0;
+  high = dhp->sfh_num_fdes - 1;
+  while (low <= high)
+    {
+      int mid = low + (high - low) / 2;
+
+      /* Given sfde_func_start_address <= addr,
+	 addr - sfde_func_start_address must be positive.  */
+      if (sframe_decoder_get_secrel_func_start_addr (ctx, mid) <= addr
+	  && ((uint32_t)(addr - sframe_decoder_get_secrel_func_start_addr (ctx,
+									   mid))
+	      < fdp[mid].sfde_func_size))
+	{
+	  *func_idx = mid;
+	  return fdp + mid;
+	}
+
+      if (sframe_decoder_get_secrel_func_start_addr (ctx, mid) < addr)
+	low = mid + 1;
+      else
+	high = mid - 1;
+    }
+
+  return NULL;
+}
+
+/* Get the end IP offset for the FRE at index i in the FDEP.  The buffer FRES
+   is the starting location for the FRE.  */
+
+static uint32_t
+sframe_fre_get_end_ip_offset (sframe_func_desc_entry *fdep, unsigned int i,
+			      const char *fres)
+{
+  uint32_t end_ip_offset = 0;
+  uint32_t fre_type;
+
+  fre_type = sframe_get_fre_type (fdep);
+
+  /* Get the start address of the next FRE in sequence.  */
+  if (i < fdep->sfde_func_num_fres - 1)
+    {
+      sframe_decode_fre_start_address (fres, &end_ip_offset, fre_type);
+      end_ip_offset -= 1;
+    }
+  else
+    /* The end IP offset for the FRE needs to be deduced from the function
+       size.  */
+    end_ip_offset = fdep->sfde_func_size - 1;
+
+  return end_ip_offset;
+}
+
+/* Get the SFrame's fixed FP offset given the decoder context CTX.  */
+
+static int8_t
+sframe_decoder_get_fixed_fp_offset (sframe_decoder_ctx *ctx)
+{
+  sframe_header *dhp;
+  dhp = sframe_decoder_get_header (ctx);
+  return dhp->sfh_cfa_fixed_fp_offset;
+}
+
+/* Get the SFrame's fixed RA offset given the decoder context CTX.  */
+
+static int8_t
+sframe_decoder_get_fixed_ra_offset (sframe_decoder_ctx *ctx)
+{
+  sframe_header *dhp;
+  dhp = sframe_decoder_get_header (ctx);
+  return dhp->sfh_cfa_fixed_ra_offset;
+}
+
+/* Get the base reg id from the FRE info.  Set errp if failure.  */
+
+uint8_t
+__sframe_fre_get_base_reg_id (sframe_frame_row_entry *fre)
+{
+  uint8_t fre_info = fre->fre_info;
+  return SFRAME_V1_FRE_CFA_BASE_REG_ID (fre_info);
+}
+
+/* Get the CFA offset from the FRE.  If the offset is unavailable,
+   sets errp.  */
+
+int32_t
+__sframe_fre_get_cfa_offset (sframe_decoder_ctx *dctx __attribute__ ((__unused__)),
+			     sframe_frame_row_entry *fre,
+			     _Unwind_Reason_Code *errp)
+{
+  return sframe_get_fre_offset (fre, SFRAME_FRE_CFA_OFFSET_IDX, errp);
+}
+
+/* Get the FP offset from the FRE.  If the offset is unavailable, sets
+   errp.  */
+
+int32_t
+__sframe_fre_get_fp_offset (sframe_decoder_ctx *dctx,
+			    sframe_frame_row_entry *fre,
+			    _Unwind_Reason_Code *errp)
+{
+  uint32_t fp_offset_idx = 0;
+  int8_t fp_offset = sframe_decoder_get_fixed_fp_offset (dctx);
+
+  *errp = _URC_NO_REASON;
+  /* If the FP offset is not being tracked, return the fixed FP offset
+     from the SFrame header.  */
+  if (fp_offset != SFRAME_CFA_FIXED_FP_INVALID)
+    return fp_offset;
+
+  /* In some ABIs, the stack offset to recover RA (using the CFA) from is
+     fixed (like AMD64).  In such cases, the stack offset to recover FP will
+     appear at the second index.  */
+  fp_offset_idx = ((sframe_decoder_get_fixed_ra_offset (dctx)
+		    != SFRAME_CFA_FIXED_RA_INVALID)
+		   ? SFRAME_FRE_RA_OFFSET_IDX
+		   : SFRAME_FRE_FP_OFFSET_IDX);
+  return sframe_get_fre_offset (fre, fp_offset_idx, errp);
+}
+
+/* Get the RA offset from the FRE.  If the offset is unavailable, sets
+   errp.  */
+
+int32_t
+__sframe_fre_get_ra_offset (sframe_decoder_ctx *dctx,
+			    sframe_frame_row_entry *fre,
+			    _Unwind_Reason_Code *errp)
+{
+  int8_t ra_offset = sframe_decoder_get_fixed_ra_offset (dctx);
+  *errp = _URC_NO_REASON;
+
+  /* If the RA offset was not being tracked, return the fixed RA offset
+     from the SFrame header.  */
+  if (ra_offset != SFRAME_CFA_FIXED_RA_INVALID)
+    return ra_offset;
+
+  /* Otherwise, get the RA offset from the FRE.  */
+  return sframe_get_fre_offset (fre, SFRAME_FRE_RA_OFFSET_IDX, errp);
+}
+
+/* Decode the specified SFrame buffer SF_BUF and return the new SFrame
+   decoder context.  */
+
+_Unwind_Reason_Code
+__sframe_decode (sframe_decoder_ctx *dctx, const char *sf_buf)
+{
+  const sframe_preamble *sfp;
+  size_t hdrsz;
+  sframe_header *sfheaderp;
+  char *frame_buf;
+
+  int fidx_size;
+  uint32_t fre_bytes;
+
+  if (sf_buf == NULL)
+    return _URC_END_OF_STACK;
+
+  sfp = (const sframe_preamble *) sf_buf;
+
+  /* Check for foreign endianness.  */
+  if (sfp->sfp_magic != SFRAME_MAGIC)
+    return _URC_END_OF_STACK;
+
+  frame_buf = (char *)sf_buf;
+
+  /* Handle the SFrame header.  */
+  dctx->sfd_header = *(sframe_header *) frame_buf;
+
+  /* Validate the contents of SFrame header.  */
+  sfheaderp = &dctx->sfd_header;
+  if (!sframe_header_sanity_check_p (sfheaderp))
+    return _URC_END_OF_STACK;
+
+  hdrsz = sframe_get_hdr_size (sfheaderp);
+  frame_buf += hdrsz;
+
+  /* Handle the SFrame Function Descriptor Entry section.  */
+  fidx_size
+    = sfheaderp->sfh_num_fdes * sizeof (sframe_func_desc_entry);
+  dctx->sfd_funcdesc = (sframe_func_desc_entry *)frame_buf;
+  frame_buf += (fidx_size);
+
+  dctx->sfd_fres = frame_buf;
+  fre_bytes = sfheaderp->sfh_fre_len;
+  dctx->sfd_fre_nbytes = fre_bytes;
+
+  return _URC_NO_REASON;
+}
+
+/* Find the SFrame Row Entry which contains the PC.  Returns
+   _URC_END_OF_STACK if failure.  */
+
+_Unwind_Reason_Code
+__sframe_find_fre (sframe_decoder_ctx *ctx, int32_t pc,
+		   sframe_frame_row_entry *frep)
+{
+  sframe_func_desc_entry *fdep;
+  uint32_t func_idx;
+  uint32_t fre_type, i;
+  uint32_t start_ip_offset;
+  int32_t func_start_addr;
+  uint32_t end_ip_offset;
+  const char *fres;
+  size_t size = 0;
+  int err = 0;
+
+  if ((ctx == NULL) || (frep == NULL))
+    return _URC_END_OF_STACK;
+
+  /* Find the FDE which contains the PC, then scan its fre entries.  */
+  fdep = sframe_get_funcdesc_with_addr_internal (ctx, pc, &err, &func_idx);
+  if (fdep == NULL || ctx->sfd_fres == NULL)
+    return _URC_END_OF_STACK;
+
+  fre_type = sframe_get_fre_type (fdep);
+
+  fres = ctx->sfd_fres + fdep->sfde_func_start_fre_off;
+  func_start_addr = sframe_decoder_get_secrel_func_start_addr (ctx, func_idx);
+
+  for (i = 0; i < fdep->sfde_func_num_fres; i++)
+    {
+      size_t addr_size;
+
+      /* Partially decode the FRE.  */
+      sframe_decode_fre_start_address (fres, &frep->fre_start_addr, fre_type);
+
+      addr_size = sframe_fre_start_addr_size (fre_type);
+      if (addr_size == 0)
+	return _URC_END_OF_STACK;
+
+      frep->fre_info = *(uint8_t *)(fres + addr_size);
+      size = sframe_fre_entry_size (frep, addr_size);
+
+      start_ip_offset = frep->fre_start_addr;
+      end_ip_offset = sframe_fre_get_end_ip_offset (fdep, i, fres + size);
+
+      /* Stop search if FRE's start_ip is greater than pc.  Given
+	func_start_addr <= pc, pc - func_start_addr must be positive.  */
+      if (start_ip_offset > (uint32_t) (pc - func_start_addr))
+	return _URC_END_OF_STACK;
+
+      if (sframe_fre_check_range_p (ctx, func_idx, start_ip_offset,
+				    end_ip_offset, pc))
+	{
+	  /* Decode last FRE bits: offsets size.  */
+	  frep->fre_offsets = fres + addr_size + sizeof (frep->fre_info);
+	  return _URC_NO_REASON;
+	}
+
+      fres += size;
+    }
+  return _URC_END_OF_STACK;
+}
diff --git a/sysdeps/generic/sframe-read.h b/sysdeps/generic/sframe-read.h
new file mode 100644
index 0000000..1461421
--- /dev/null
+++ b/sysdeps/generic/sframe-read.h
@@ -0,0 +1,112 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SFRAME_API_H
+#define _SFRAME_API_H
+
+#include <sframe.h>
+#include <stdbool.h>
+#include <unwind.h>
+
+#ifdef	__cplusplus
+extern "C"
+{
+#endif
+
+typedef struct sframe_decoder_ctx
+{
+
+  sframe_header sfd_header;
+
+  sframe_func_desc_entry *sfd_funcdesc;
+  /* SFrame FRE table.  */
+  char *sfd_fres;
+  /* Number of bytes needed for SFrame FREs.  */
+  int sfd_fre_nbytes;
+} sframe_decoder_ctx;
+
+#define MAX_NUM_STACK_OFFSETS	3
+
+/* User interfacing SFrame Row Entry.
+   An abstraction provided by libsframe so the consumer is decoupled from
+   the binary format representation of the same.
+
+   The members are best ordered such that they are aligned at their natural
+   boundaries.  This helps avoid usage of undesirable misaligned memory
+   accesses.  See PR libsframe/29856.  */
+
+typedef struct sframe_frame_row_entry
+{
+  uint32_t fre_start_addr;
+  const char *fre_offsets;
+  unsigned char fre_info;
+} sframe_frame_row_entry;
+
+/* The SFrame Decoder.  */
+
+/* Decode the specified SFrame buffer CF_BUF and return the new SFrame
+   decoder context.  */
+
+extern _Unwind_Reason_Code
+__sframe_decode (sframe_decoder_ctx *dctx, const char *cf_buf);
+
+/* Find the SFrame Frame Row Entry which contains the PC.  Returns
+   _URC_END_OF_STACK if failure.  */
+
+extern _Unwind_Reason_Code
+__sframe_find_fre (sframe_decoder_ctx *ctx, int32_t pc,
+		   sframe_frame_row_entry *frep);
+
+/* Get the base reg id from the FRE info.  */
+
+extern uint8_t
+__sframe_fre_get_base_reg_id (sframe_frame_row_entry *fre);
+
+/* Get the CFA offset from the FRE.  Sets ERRP if an error is
+   detected.  */
+
+extern int32_t
+__sframe_fre_get_cfa_offset (sframe_decoder_ctx *dtcx,
+			     sframe_frame_row_entry *fre,
+			     _Unwind_Reason_Code *errp);
+
+/* Get the FP offset from the FRE.  If the offset is unavailable, sets
+   ERRP.  */
+
+extern int32_t
+__sframe_fre_get_fp_offset (sframe_decoder_ctx *dctx,
+			    sframe_frame_row_entry *fre,
+			    _Unwind_Reason_Code *errp);
+
+/* Get the RA offset from the FRE.  Sets ERRP if ra offset is
+   unavailable.  */
+
+extern int32_t
+__sframe_fre_get_ra_offset (sframe_decoder_ctx *dctx,
+			    sframe_frame_row_entry *fre,
+			    _Unwind_Reason_Code *errp);
+
+/* Get the offset of the sfde_func_start_address field.  */
+
+extern uint32_t
+__sframe_decoder_get_offsetof_fde_start_addr (sframe_decoder_ctx *dctx,
+					      uint32_t func_idx,
+					      _Unwind_Reason_Code *errp);
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SFRAME_API_H */
diff --git a/sysdeps/generic/sframe.c b/sysdeps/generic/sframe.c
new file mode 100644
index 0000000..ba0830d
--- /dev/null
+++ b/sysdeps/generic/sframe.c
@@ -0,0 +1,187 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <sframe-read.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <unwind.h>
+#include <uw-sigframe.h>
+#include <ldsodefs.h>
+
+/* Some arches like s390x needs an offset to correct the value where
+   SP is located in relation to CFA.  */
+#ifndef SFRAME_SP_VAL_OFFSET
+#define SFRAME_SP_VAL_OFFSET 0
+#endif
+
+static inline _Unwind_Ptr
+read_stack_value (_Unwind_Ptr loc)
+{
+  _Unwind_Ptr value = *((_Unwind_Ptr *) loc);
+  return value;
+}
+
+/* Helper to avoid PLT call in libc.  Fixes elf/check-localplt
+   errors.  */
+
+static int
+_dl_find_object_helper (void *address, struct dl_find_object *result)
+{
+  return GLRO (dl_find_object) (address, result);
+}
+
+/* Backtrace the stack and collect the stacktrace given SFrame info.
+   If successful, store the return addresses in RA_LST.  The SIZE
+   argument specifies the maximum number of return addresses that can
+   be stored in RA_LST and contains the number of the addresses
+   collected.  */
+
+int
+__stacktrace_sframe (void **ra_lst, int count, frame *frame)
+{
+  _Unwind_Ptr sframe_vma, cfa, return_addr, ra_stack_loc, fp_stack_loc, pc,
+    frame_ptr;
+  int cfa_offset, fp_offset, ra_offset, i;
+  sframe_frame_row_entry fred, *frep = &fred;
+
+  if (!ra_lst || !count)
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      _Unwind_Reason_Code err;
+      struct dl_find_object data;
+      sframe_decoder_ctx decoder_context, *dctx = &decoder_context;
+
+      /* Clean decoder context.  */
+      memset (dctx, 0, sizeof (sframe_decoder_ctx));
+
+      /* Load and set up the SFrame stack trace info for pc.  */
+      if (_dl_find_object_helper ((void *) frame->pc, &data) < 0)
+	/* Force fallback to DWARF stacktracer.  */
+	return 0;
+
+      sframe_vma = (_Unwind_Ptr) data.dlfo_sframe;
+      if (!sframe_vma || !(data.dlfo_flags & DLFO_FLAG_SFRAME))
+	{
+#ifdef MD_DECODE_SIGNAL_FRAME
+	  /* If there is no valid SFrame section or SFrame section is
+	     corrupted then check if it is a signal frame.  */
+	  if (MD_DECODE_SIGNAL_FRAME (frame) == _URC_NO_REASON)
+	    {
+	      ra_lst[i] = (void *) frame->pc;
+	      continue;
+	    }
+#endif
+	  /* Force fallback to DWARF stacktracer.  */
+	  return 0;
+	}
+
+      /* Decode the specified SFrame buffer populate sframe's decoder
+	 context.  */
+      if (__sframe_decode (dctx, (char *) data.dlfo_sframe) != _URC_NO_REASON)
+	/* Force fallback to DWARF stacktracer.  */
+	return 0;
+
+      pc = frame->pc - sframe_vma;
+      /* Find the SFrame Row Entry which contains the PC.  */
+      if (__sframe_find_fre (dctx, pc, frep) == _URC_END_OF_STACK)
+	{
+#ifdef MD_DECODE_SIGNAL_FRAME
+	  /* If there are no valid FREs, check if it is a signal
+	     frame, and if so decode it.  */
+	  if (MD_DECODE_SIGNAL_FRAME (frame) == _URC_NO_REASON)
+	    {
+	      ra_lst[i] = (void *) frame->pc;
+	      continue;
+	    }
+#endif
+#ifdef MD_DETECT_OUTERMOST_FRAME
+	  if (MD_DETECT_OUTERMOST_FRAME (frame) == _URC_END_OF_STACK)
+	    return i;
+#endif
+	  /* Force fallback to DWARF stacktracer.  */
+	  return 0;
+	}
+
+      /* Get the CFA offset from the FRE.  If offset is unavailable,
+	 sets err.  */
+      cfa_offset = __sframe_fre_get_cfa_offset (dctx, frep, &err);
+      if (err != _URC_NO_REASON)
+	/* Force fallback to DWARF stacktracer.  */
+	return 0;
+
+      /* Get CFA using base reg id from the FRE info.  */
+      cfa = ((__sframe_fre_get_base_reg_id (frep)
+	      == SFRAME_BASE_REG_SP) ? frame->sp : frame->fp) + cfa_offset;
+
+      /* Get the RA offset from the FRE.  If the offset is
+	 unavailable, sets err.  */
+      ra_offset = __sframe_fre_get_ra_offset (dctx, frep, &err);
+      if (err != _URC_NO_REASON)
+	/* Force fallback to DWARF stacktracer.  */
+	return 0;
+
+      /* RA offset is available, get the value stored in the stack
+	 location.  */
+      ra_stack_loc = cfa + ra_offset;
+      return_addr = read_stack_value (ra_stack_loc);
+
+      ra_lst[i] = (void *) return_addr;
+
+      /* Get the FP offset from the FRE.  If the offset is
+	 unavailable, sets err.  */
+      fp_offset = __sframe_fre_get_fp_offset (dctx, frep, &err);
+      frame_ptr = frame->fp;
+      if (err == _URC_NO_REASON)
+	{
+	  /* FP offset is available, get the value stored in the stack
+	     location.  */
+	  fp_stack_loc = cfa + fp_offset;
+	  frame_ptr = read_stack_value (fp_stack_loc);
+	}
+
+      /* Set up for the next frame.  */
+      frame->fp = frame_ptr;
+      frame->sp = cfa + SFRAME_SP_VAL_OFFSET;
+      frame->pc = return_addr;
+    }
+  return i;
+}
+
+libc_hidden_def (__stacktrace_sframe);
+
+/* A noinline helper used to obtain the caller's current PC.  */
+
+_Unwind_Ptr  __attribute__ ((noinline))
+__getPC (void)
+{
+  return (_Unwind_Ptr)
+    __builtin_extract_return_addr (__builtin_return_address (0));
+}
+
+libc_hidden_def (__getPC);
+
+/* A noinline helper used to obtain the caller's current SP.  It
+   mimics gcc14's __builtin_stack_address() functionality.  */
+
+_Unwind_Ptr  __attribute__ ((noinline))
+__getSP (void)
+{
+  return (_Unwind_Ptr) __builtin_dwarf_cfa() + SFRAME_SP_VAL_OFFSET;
+}
+
+libc_hidden_def (__getSP);
diff --git a/sysdeps/generic/sframe.h b/sysdeps/generic/sframe.h
new file mode 100644
index 0000000..e38adcf
--- /dev/null
+++ b/sysdeps/generic/sframe.h
@@ -0,0 +1,378 @@
+/* SFrame format description.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef	_SFRAME_H
+#define	_SFRAME_H
+
+#include <sys/types.h>
+#include <limits.h>
+#include <stdint.h>
+#include <unwind.h>
+
+#ifdef	__cplusplus
+extern "C"
+{
+#endif
+
+/* SFrame format.
+
+   SFrame format is a simple format to represent the information needed
+   for generating vanilla backtraces.  SFrame format keeps track of the
+   minimal necessary information needed for stack tracing:
+     - Canonical Frame Address (CFA)
+     - Frame Pointer (FP)
+     - Return Address (RA)
+
+   The SFrame section itself has the following structure:
+
+       +--------+------------+---------+
+       |  file  |  function  | frame   |
+       | header | descriptor |  row    |
+       |        |   entries  | entries |
+       +--------+------------+---------+
+
+   The file header stores a magic number and version information, flags, and
+   the byte offset of each of the sections relative to the end of the header
+   itself.  The file header also specifies the total number of Function
+   Descriptor Entries, Frame Row Entries and length of the FRE sub-section.
+
+   Following the header is a list of Function Descriptor Entries (FDEs).
+   This list may be sorted if the flags in the file header indicate it to be
+   so.  The sort order, if applicable, is the order of functions in the
+   .text.* sections in the resulting binary artifact.  Each Function
+   Descriptor Entry specifies the start PC of a function, the size in bytes
+   of the function and an offset to its first Frame Row Entry (FRE).  Each FDE
+   additionally also specifies the type of FRE it uses to encode the stack
+   trace information.
+
+   Next, the SFrame Frame Row Entry sub-section is a list of variable size
+   records.  Each entry represents stack trace information for a set of PCs
+   of the function.  A singular Frame Row Entry is a self-sufficient record
+   which contains information on how to generate stack trace from the
+   applicable set of PCs.
+
+   */
+
+
+/* SFrame format versions.  */
+#define SFRAME_VERSION_1	1
+#define SFRAME_VERSION_2	2
+/* SFrame magic number.  */
+#define SFRAME_MAGIC		0xdee2
+/* Current version of SFrame format.  */
+#define SFRAME_VERSION	SFRAME_VERSION_2
+
+/* Various flags for SFrame.  */
+
+/* Function Descriptor Entries are sorted on PC.  */
+#define SFRAME_F_FDE_SORTED		    0x1
+/* Functions preserve frame pointer.  */
+#define SFRAME_F_FRAME_POINTER		    0x2
+/* Function start address in SFrame FDE is encoded as the distance from the
+   location of the sfde_func_start_address to the start PC of the function.
+   If absent, the function start address in SFrame FDE is encoded as the
+   distance from the start of the SFrame FDE section to the start PC of the
+   function.  */
+#define SFRAME_F_FDE_FUNC_START_PCREL	    0x4
+
+/* Set of all defined flags in SFrame V2.  */
+#define SFRAME_V2_F_ALL_FLAGS \
+  (SFRAME_F_FDE_SORTED | SFRAME_F_FRAME_POINTER \
+   | SFRAME_F_FDE_FUNC_START_PCREL)
+
+#define SFRAME_CFA_FIXED_FP_INVALID 0
+#define SFRAME_CFA_FIXED_RA_INVALID 0
+
+/* Supported ABIs/Arch.  */
+#define SFRAME_ABI_AARCH64_ENDIAN_BIG      1 /* AARCH64 big endian.  */
+#define SFRAME_ABI_AARCH64_ENDIAN_LITTLE   2 /* AARCH64 little endian.  */
+#define SFRAME_ABI_AMD64_ENDIAN_LITTLE     3 /* AMD64 little endian.  */
+
+/* SFrame FRE types.  */
+#define SFRAME_FRE_TYPE_ADDR1	0
+#define SFRAME_FRE_TYPE_ADDR2	1
+#define SFRAME_FRE_TYPE_ADDR4	2
+
+/* SFrame Function Descriptor Entry types.
+
+   The SFrame format has two possible representations for functions.  The
+   choice of which type to use is made according to the instruction patterns
+   in the relevant program stub.
+
+   An SFrame FDE of type SFRAME_FDE_TYPE_PCINC is an indication
+   that the PCs in the FREs should be treated as increments in bytes.  This is
+   used for a bulk of the executable code of a program, which contains
+   instructions with no specific pattern.
+
+   An SFrame FDE of type SFRAME_FDE_TYPE_PCMASK is an indication
+   that the PCs in the FREs should be treated as masks.  This type is useful
+   for the cases when a small pattern of instructions in a program stub is
+   repeatedly to cover a specific functionality.  Typical usescases are pltN
+   entries, trampolines etc.  */
+
+/* Unwinders perform a (PC >= FRE_START_ADDR) to look up a matching FRE.  */
+#define SFRAME_FDE_TYPE_PCINC   0
+/* Unwinders perform a (PC % REP_BLOCK_SIZE >= FRE_START_ADDR) to look up a
+   matching FRE.  */
+#define SFRAME_FDE_TYPE_PCMASK  1
+
+typedef struct sframe_preamble
+{
+  uint16_t sfp_magic;	/* Magic number (SFRAME_MAGIC).  */
+  uint8_t sfp_version;	/* Data format version number (SFRAME_VERSION).  */
+  uint8_t sfp_flags;	/* Flags.  */
+} __attribute__ ((packed)) sframe_preamble;
+
+typedef struct sframe_header
+{
+  sframe_preamble sfh_preamble;
+  /* Information about the arch (endianness) and ABI.  */
+  uint8_t sfh_abi_arch;
+  /* Offset for the Frame Pointer (FP) from CFA may be fixed for some
+     ABIs (e.g, in AMD64 when -fno-omit-frame-pointer is used).  When fixed,
+     this field specifies the fixed stack frame offset and the individual
+     FREs do not need to track it.  When not fixed, it is set to
+     SFRAME_CFA_FIXED_FP_INVALID, and the individual FREs may provide
+     the applicable stack frame offset, if any.  */
+  int8_t sfh_cfa_fixed_fp_offset;
+  /* Offset for the Return Address from CFA is fixed for some ABIs
+     (e.g., AMD64 has it as CFA-8).  When fixed, the header specifies the
+     fixed stack frame offset and the individual FREs do not track it.  When
+     not fixed, it is set to SFRAME_CFA_FIXED_RA_INVALID, and individual
+     FREs provide the applicable stack frame offset, if any.  */
+  int8_t sfh_cfa_fixed_ra_offset;
+  /* Number of bytes making up the auxiliary header, if any.
+     Some ABI/arch, in the future, may use this space for extending the
+     information in SFrame header.  Auxiliary header is contained in
+     bytes sequentially following the sframe_header.  */
+  uint8_t sfh_auxhdr_len;
+  /* Number of SFrame FDEs in this SFrame section.  */
+  uint32_t sfh_num_fdes;
+  /* Number of SFrame Frame Row Entries.  */
+  uint32_t sfh_num_fres;
+  /* Number of bytes in the SFrame Frame Row Entry section.  */
+  uint32_t sfh_fre_len;
+  /* Offset of SFrame Function Descriptor Entry section.  */
+  uint32_t sfh_fdeoff;
+  /* Offset of SFrame Frame Row Entry section.  */
+  uint32_t sfh_freoff;
+} __attribute__ ((packed)) sframe_header;
+
+#define SFRAME_V1_HDR_SIZE(sframe_hdr)	\
+  ((sizeof (sframe_header) + (sframe_hdr).sfh_auxhdr_len))
+
+/* Two possible keys for executable (instruction) pointers signing.  */
+#define SFRAME_AARCH64_PAUTH_KEY_A    0 /* Key A.  */
+#define SFRAME_AARCH64_PAUTH_KEY_B    1 /* Key B.  */
+
+typedef struct sframe_func_desc_entry
+{
+  /* Function start address.  Encoded as a signed offset, relative to the
+     beginning of the current FDE.  */
+  int32_t sfde_func_start_address;
+  /* Size of the function in bytes.  */
+  uint32_t sfde_func_size;
+  /* Offset of the first SFrame Frame Row Entry of the function, relative to the
+     beginning of the SFrame Frame Row Entry sub-section.  */
+  uint32_t sfde_func_start_fre_off;
+  /* Number of frame row entries for the function.  */
+  uint32_t sfde_func_num_fres;
+  /* Additional information for stack tracing from the function:
+     - 4-bits: Identify the FRE type used for the function.
+     - 1-bit: Identify the FDE type of the function - mask or inc.
+     - 1-bit: PAC authorization A/B key (aarch64).
+     - 2-bits: Unused.
+     ------------------------------------------------------------------------
+     |     Unused    |  PAC auth A/B key (aarch64) |  FDE type |   FRE type   |
+     |               |        Unused (amd64)       |           |              |
+     ------------------------------------------------------------------------
+     8               6                             5           4              0     */
+  uint8_t sfde_func_info;
+  /* Size of the block of repeating insns.  Used for SFrame FDEs of type
+     SFRAME_FDE_TYPE_PCMASK.  */
+  uint8_t sfde_func_rep_size;
+  uint16_t sfde_func_padding2;
+} __attribute__ ((packed)) sframe_func_desc_entry;
+
+/* Macros to compose and decompose function info in FDE.  */
+
+/* Note: Set PAC auth key to SFRAME_AARCH64_PAUTH_KEY_A by default.  */
+#define SFRAME_V1_FUNC_INFO(fde_type, fre_enc_type) \
+  (((SFRAME_AARCH64_PAUTH_KEY_A & 0x1) << 5) | \
+   (((fde_type) & 0x1) << 4) | ((fre_enc_type) & 0xf))
+
+#define SFRAME_V1_FUNC_FRE_TYPE(data)	  ((data) & 0xf)
+#define SFRAME_V1_FUNC_FDE_TYPE(data)	  (((data) >> 4) & 0x1)
+#define SFRAME_V1_FUNC_PAUTH_KEY(data)	  (((data) >> 5) & 0x1)
+
+/* Set the pauth key as indicated.  */
+#define SFRAME_V1_FUNC_INFO_UPDATE_PAUTH_KEY(pauth_key, fde_info) \
+  ((((pauth_key) & 0x1) << 5) | ((fde_info) & 0xdf))
+
+/* Size of stack frame offsets in an SFrame Frame Row Entry.  A single
+   SFrame FRE has all offsets of the same size.  Offset size may vary
+   across frame row entries.  */
+#define SFRAME_FRE_OFFSET_1B	  0
+#define SFRAME_FRE_OFFSET_2B	  1
+#define SFRAME_FRE_OFFSET_4B	  2
+
+/* An SFrame Frame Row Entry can be SP or FP based.  */
+#define SFRAME_BASE_REG_FP	0
+#define SFRAME_BASE_REG_SP	1
+
+/* The index at which a specific offset is presented in the variable length
+   bytes of an FRE.  */
+#define SFRAME_FRE_CFA_OFFSET_IDX   0
+/* The RA stack offset, if present, will always be at index 1 in the variable
+   length bytes of the FRE.  */
+#define SFRAME_FRE_RA_OFFSET_IDX    1
+/* The FP stack offset may appear at offset 1 or 2, depending on the ABI as RA
+   may or may not be tracked.  */
+#define SFRAME_FRE_FP_OFFSET_IDX    2
+
+typedef struct sframe_fre_info
+{
+  /* Information about
+     - 1 bit: base reg for CFA
+     - 4 bits: Number of offsets (N).  A value of upto 3 is allowed to track
+     all three of CFA, FP and RA (fixed implicit order).
+     - 2 bits: information about size of the offsets (S) in bytes.
+     Valid values are SFRAME_FRE_OFFSET_1B, SFRAME_FRE_OFFSET_2B,
+     SFRAME_FRE_OFFSET_4B
+     - 1 bit: Mangled RA state bit (aarch64 only).
+     ----------------------------------------------------------------------------------
+     | Mangled-RA (aarch64) |  Size of offsets   |   Number of offsets    |   base_reg |
+     |  Unused (amd64)      |                    |                        |            |
+     ----------------------------------------------------------------------------------
+     8                     7                    5                        1            0
+
+     */
+  uint8_t fre_info;
+} sframe_fre_info;
+
+/* Macros to compose and decompose FRE info.  */
+
+/* Note: Set mangled_ra_p to zero by default.  */
+#define SFRAME_V1_FRE_INFO(base_reg_id, offset_num, offset_size) \
+  (((0 & 0x1) << 7) | (((offset_size) & 0x3) << 5) | \
+   (((offset_num) & 0xf) << 1) | ((base_reg_id) & 0x1))
+
+/* Set the mangled_ra_p bit as indicated.  */
+#define SFRAME_V1_FRE_INFO_UPDATE_MANGLED_RA_P(mangled_ra_p, fre_info) \
+  ((((mangled_ra_p) & 0x1) << 7) | ((fre_info) & 0x7f))
+
+#define SFRAME_V1_FRE_CFA_BASE_REG_ID(data)	  ((data) & 0x1)
+#define SFRAME_V1_FRE_OFFSET_COUNT(data)	  (((data) >> 1) & 0xf)
+#define SFRAME_V1_FRE_OFFSET_SIZE(data)		  (((data) >> 5) & 0x3)
+#define SFRAME_V1_FRE_MANGLED_RA_P(data)	  (((data) >> 7) & 0x1)
+
+/* SFrame Frame Row Entry definitions.
+
+   Used for both AMD64 and AARCH64.
+
+   An SFrame Frame Row Entry is a self-sufficient record which contains
+   information on how to generate the stack trace for the specified range of
+   PCs.  Each SFrame Frame Row Entry is followed by S*N bytes, where:
+     S is the size of the stack frame offset for the FRE, and
+     N is the number of stack frame offsets in the FRE
+
+   The interpretation of FRE stack offsets is ABI-specific:
+
+   AMD64:
+     offset1 (interpreted as CFA = BASE_REG + offset1)
+      if FP is being tracked
+	offset2 (intrepreted as FP = CFA + offset2)
+      fi
+
+    AARCH64:
+     offset1 (interpreted as CFA = BASE_REG + offset1)
+     if FP is being tracked (in other words, if frame record created)
+       offset2 (interpreted as RA = CFA + offset2)
+       offset3 (intrepreted as FP = CFA + offset3)
+     fi
+     Note that in AAPCS64, a frame record, if created, will save both FP and
+     LR on stack.
+*/
+
+/* Used when SFRAME_FRE_TYPE_ADDR1 is specified as FRE type.  */
+typedef struct sframe_frame_row_entry_addr1
+{
+  /* Start address of the frame row entry.  Encoded as an 1-byte unsigned
+     offset, relative to the start address of the function.  */
+  uint8_t sfre_start_address;
+  sframe_fre_info sfre_info;
+} __attribute__ ((packed)) sframe_frame_row_entry_addr1;
+
+/* Upper limit of start address in sframe_frame_row_entry_addr1
+   is 0x100 (not inclusive).  */
+#define SFRAME_FRE_TYPE_ADDR1_LIMIT   \
+  (1ULL << ((SFRAME_FRE_TYPE_ADDR1 + 1) * 8))
+
+/* Used when SFRAME_FRE_TYPE_ADDR2 is specified as FRE type.  */
+typedef struct sframe_frame_row_entry_addr2
+{
+  /* Start address of the frame row entry.  Encoded as an 2-byte unsigned
+     offset, relative to the start address of the function.  */
+  uint16_t sfre_start_address;
+  sframe_fre_info sfre_info;
+} __attribute__ ((packed)) sframe_frame_row_entry_addr2;
+
+/* Upper limit of start address in sframe_frame_row_entry_addr2
+   is 0x10000 (not inclusive).  */
+#define SFRAME_FRE_TYPE_ADDR2_LIMIT   \
+  (1ULL << ((SFRAME_FRE_TYPE_ADDR2 * 2) * 8))
+
+/* Used when SFRAME_FRE_TYPE_ADDR4 is specified as FRE type.  */
+typedef struct sframe_frame_row_entry_addr4
+{
+  /* Start address of the frame row entry.  Encoded as a 4-byte unsigned
+     offset, relative to the start address of the function.  */
+  uint32_t sfre_start_address;
+  sframe_fre_info sfre_info;
+} __attribute__ ((packed)) sframe_frame_row_entry_addr4;
+
+/* Upper limit of start address in sframe_frame_row_entry_addr2
+   is 0x100000000 (not inclusive).  */
+#define SFRAME_FRE_TYPE_ADDR4_LIMIT   \
+  (1ULL << ((SFRAME_FRE_TYPE_ADDR4 * 2) * 8))
+
+/* Used to pass frame information to stack trace routine.  */
+typedef struct cframe
+{
+  _Unwind_Ptr pc;
+  _Unwind_Ptr sp;
+  _Unwind_Ptr fp;
+} frame;
+
+/* SFrame stack tracing support.  */
+int __stacktrace_sframe (void **, int, frame *);
+libc_hidden_proto (__stacktrace_sframe);
+
+/* Helper used by SFrame tracing algorithm.  */
+_Unwind_Ptr __getPC (void);
+libc_hidden_proto (__getPC);
+
+/* Helper used by SFrame tracing algorithm.  */
+_Unwind_Ptr __getSP (void);
+libc_hidden_proto (__getSP);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif				/* _SFRAME_H */
diff --git a/sysdeps/generic/sysdep.h b/sysdeps/generic/sysdep.h
index 4c0dda4..ef5eba2 100644
--- a/sysdeps/generic/sysdep.h
+++ b/sysdeps/generic/sysdep.h
@@ -45,6 +45,7 @@
 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
 # define cfi_offset(reg, off)		.cfi_offset reg, off
 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+# define cfi_val_offset(reg, off)	.cfi_val_offset reg, off
 # define cfi_register(r1, r2)		.cfi_register r1, r2
 # define cfi_return_column(reg)	.cfi_return_column reg
 # define cfi_restore(reg)		.cfi_restore reg
@@ -74,6 +75,8 @@
    ".cfi_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
 # define CFI_REL_OFFSET(reg, off) \
    ".cfi_rel_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
+# define CFI_VAL_OFFSET(reg, off) \
+   ".cfi_val_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off)
 # define CFI_REGISTER(r1, r2) \
    ".cfi_register " CFI_STRINGIFY(r1) "," CFI_STRINGIFY(r2)
 # define CFI_RETURN_COLUMN(reg) \
diff --git a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h b/sysdeps/generic/uw-sigframe.h
index 401079c..b357f8a 100644
--- a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h
+++ b/sysdeps/generic/uw-sigframe.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
+/* Internal header file for handling signal frames.  Generic version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,26 +16,16 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
+/* Each architecture that supports SFrame may need to define several
+   macros to handle exceptional cases during stack backtracing.
 
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
+   MD_DECODE_SIGNAL_FRAME(frame) should recover frame information when
+   a signal-related exception occurs.  The input frame must contain a
+   valid program counter (PC) field.  On success, the macro should
+   return _URC_NO_REASON.
 
-#define __KERNEL_NCCS 17
+   MD_DETECT_OUTERMOST_FRAME(frame) is used to detect the outermost
+   stack frame.  It returns _URC_NO_REASON upon successful
+   detection.
 
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
+   The FRAME structure is defined in sysdeps/generic/sframe.h  */
diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h
index e841644..d7d907a 100644
--- a/sysdeps/gnu/errlist.h
+++ b/sysdeps/gnu/errlist.h
@@ -797,3 +797,12 @@ _S(ED, N_("?"))
 #ifdef EPROGUNAVAIL
 _S(EPROGUNAVAIL, N_("RPC program not available"))
 #endif
+#ifdef EINIT
+_S(EINIT, N_("Initialization error"))
+#endif
+#ifdef EREMDEV
+_S(EREMDEV, N_("Device is remote"))
+#endif
+#ifdef ERREMOTE
+_S(ERREMOTE, N_("Too many levels of remote in path"))
+#endif
diff --git a/sysdeps/gnu/netinet/tcp.h b/sysdeps/gnu/netinet/tcp.h
index b2acbb4..7a3500b 100644
--- a/sysdeps/gnu/netinet/tcp.h
+++ b/sysdeps/gnu/netinet/tcp.h
@@ -212,6 +212,9 @@ enum
 # define TCPI_OPT_ECN		8  /* ECN was negotiated at TCP session init */
 # define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
 # define TCPI_OPT_SYN_DATA	32 /* SYN-ACK acked data in SYN sent or rcvd */
+# define TCPI_OPT_USEC_TS	64 /* usec timestamps */
+# define TCPI_OPT_TFO_CHILD	128 /* child from a Fast Open option on SYN */
+
 
 /* Values for tcpi_state.  */
 enum tcp_ca_state
diff --git a/sysdeps/htl/include/bits/cancelation.h b/sysdeps/htl/include/bits/cancelation.h
new file mode 100644
index 0000000..ef2cd70
--- /dev/null
+++ b/sysdeps/htl/include/bits/cancelation.h
@@ -0,0 +1,5 @@
+#include_next <bits/cancelation.h>
+
+#ifndef _ISOMAC
+#include <pthreadP.h>
+#endif
diff --git a/sysdeps/htl/libc-lock.h b/sysdeps/htl/libc-lock.h
index 66779b9..8e764a7 100644
--- a/sysdeps/htl/libc-lock.h
+++ b/sysdeps/htl/libc-lock.h
@@ -37,13 +37,10 @@
       {									      \
 	__handler.__handler = FCT;					      \
 	__handler.__arg = ARG;						      \
-	if (__pthread_get_cleanup_stack != NULL)			      \
-	  {								      \
-	    __handlers = __pthread_get_cleanup_stack ();		      \
-	    __handler.__next = *__handlers;				      \
-	    *__handlers = &__handler;					      \
-	    __registered = 1;						      \
-	  }								      \
+	__handlers = __pthread_get_cleanup_stack ();			      \
+	__handler.__next = *__handlers;					      \
+	*__handlers = &__handler;					      \
+	__registered = 1;						      \
       }									      \
 
 #define __libc_cleanup_end(DOIT) \
@@ -59,12 +56,4 @@
 #define __libc_cleanup_push(fct, arg) __libc_cleanup_region_start (1, fct, arg)
 #define __libc_cleanup_pop(execute) __libc_cleanup_region_end (execute)
 
-#if !IS_IN (libpthread)
-# ifdef weak_extern
-weak_extern (__pthread_get_cleanup_stack)
-# else
-#  pragma weak __pthread_get_cleanup_stack
-# endif
-#endif
-
 #endif
diff --git a/sysdeps/htl/libc-lockP.h b/sysdeps/htl/libc-lockP.h
index 092eb35..e9977e4 100644
--- a/sysdeps/htl/libc-lockP.h
+++ b/sysdeps/htl/libc-lockP.h
@@ -126,15 +126,9 @@ libc_hidden_proto (__pthread_setcancelstate)
    single-threaded processes.  */
 #if !defined(__NO_WEAK_PTHREAD_ALIASES) && !IS_IN (libpthread)
 # ifdef weak_extern
-weak_extern (__pthread_key_create)
-weak_extern (__pthread_setspecific)
-weak_extern (__pthread_getspecific)
 weak_extern (__pthread_initialize)
 weak_extern (__pthread_atfork)
 # else
-#  pragma weak __pthread_key_create
-#  pragma weak __pthread_setspecific
-#  pragma weak __pthread_getspecific
 #  pragma weak __pthread_initialize
 #  pragma weak __pthread_atfork
 # endif
diff --git a/sysdeps/htl/pt-destroy-specific.c b/sysdeps/htl/pt-destroy-specific.c
index e63b807..b5eb0ba 100644
--- a/sysdeps/htl/pt-destroy-specific.c
+++ b/sysdeps/htl/pt-destroy-specific.c
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 
 #include <pt-internal.h>
+#include <string.h>
 
 void
 __pthread_destroy_specific (struct __pthread *thread)
@@ -100,3 +101,4 @@ __pthread_destroy_specific (struct __pthread *thread)
   memset (&thread->static_thread_specifics, 0,
 	  sizeof (thread->static_thread_specifics));
 }
+libc_hidden_def (__pthread_destroy_specific)
diff --git a/sysdeps/htl/pt-getspecific.c b/sysdeps/htl/pt-getspecific.c
index 0052ce8..d3ebb31 100644
--- a/sysdeps/htl/pt-getspecific.c
+++ b/sysdeps/htl/pt-getspecific.c
@@ -19,6 +19,7 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
 
 void *
 __pthread_getspecific (pthread_key_t key)
@@ -42,5 +43,9 @@ __pthread_getspecific (pthread_key_t key)
 
   return self->thread_specifics[key];
 }
-weak_alias (__pthread_getspecific, pthread_getspecific);
-hidden_def (__pthread_getspecific)
+libc_hidden_def (__pthread_getspecific)
+versioned_symbol (libc, __pthread_getspecific, pthread_getspecific, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_getspecific, pthread_getspecific, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-key-create.c b/sysdeps/htl/pt-key-create.c
index cf8a8d1..92a9db8 100644
--- a/sysdeps/htl/pt-key-create.c
+++ b/sysdeps/htl/pt-key-create.c
@@ -22,6 +22,9 @@
 
 #include <pt-internal.h>
 #include <pthreadP.h>
+#include <shlib-compat.h>
+#include <ldsodefs.h>
+
 
 pthread_mutex_t __pthread_key_lock;
 pthread_once_t __pthread_key_once = PTHREAD_ONCE_INIT;
@@ -116,5 +119,9 @@ do_search:
   __pthread_mutex_unlock (&__pthread_key_lock);
   return 0;
 }
-weak_alias (__pthread_key_create, pthread_key_create)
-hidden_def (__pthread_key_create)
+libc_hidden_def (__pthread_key_create)
+versioned_symbol (libc, __pthread_key_create, pthread_key_create, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_key_create, pthread_key_create, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-key-delete.c b/sysdeps/htl/pt-key-delete.c
index 79879e9..666314f 100644
--- a/sysdeps/htl/pt-key-delete.c
+++ b/sysdeps/htl/pt-key-delete.c
@@ -19,6 +19,8 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
+#include <ldsodefs.h>
 
 int
 __pthread_key_delete (pthread_key_t key)
@@ -69,4 +71,9 @@ __pthread_key_delete (pthread_key_t key)
 
   return err;
 }
-weak_alias (__pthread_key_delete, pthread_key_delete)
+libc_hidden_def (__pthread_key_delete)
+versioned_symbol (libc, __pthread_key_delete, pthread_key_delete, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_key_delete, pthread_key_delete, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pt-setspecific.c b/sysdeps/htl/pt-setspecific.c
index dfd55b6..0535225 100644
--- a/sysdeps/htl/pt-setspecific.c
+++ b/sysdeps/htl/pt-setspecific.c
@@ -19,6 +19,8 @@
 #include <pthread.h>
 
 #include <pt-internal.h>
+#include <shlib-compat.h>
+#include <string.h>
 
 int
 __pthread_setspecific (pthread_key_t key, const void *value)
@@ -68,5 +70,9 @@ __pthread_setspecific (pthread_key_t key, const void *value)
   self->thread_specifics[key] = (void *) value;
   return 0;
 }
-weak_alias (__pthread_setspecific, pthread_setspecific);
-hidden_def (__pthread_setspecific)
+libc_hidden_def (__pthread_setspecific)
+versioned_symbol (libc, __pthread_setspecific, pthread_setspecific, GLIBC_2_42);
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42)
+compat_symbol (libpthread, __pthread_setspecific, pthread_setspecific, GLIBC_2_12);
+#endif
diff --git a/sysdeps/htl/pthread-functions.h b/sysdeps/htl/pthread-functions.h
index 467d031..aec13a6 100644
--- a/sysdeps/htl/pthread-functions.h
+++ b/sysdeps/htl/pthread-functions.h
@@ -22,11 +22,6 @@
 #include <pthread.h>
 
 void __pthread_exit (void *) __attribute__ ((__noreturn__));
-struct __pthread_cancelation_handler **__pthread_get_cleanup_stack (void);
-int __pthread_once (pthread_once_t *, void (*) (void));
-int __pthread_key_create (pthread_key_t *, void (*) (void *));
-void *__pthread_getspecific (pthread_key_t);
-int __pthread_setspecific (pthread_key_t, const void *);
 
 void _cthreads_flockfile (FILE *);
 void _cthreads_funlockfile (FILE *);
@@ -38,11 +33,6 @@ int _cthreads_ftrylockfile (FILE *);
 struct pthread_functions
 {
   void (*ptr___pthread_exit) (void *) __attribute__ ((__noreturn__));
-  struct __pthread_cancelation_handler **(*ptr___pthread_get_cleanup_stack) (void);
-  int (*ptr_pthread_once) (pthread_once_t *, void (*) (void));
-  int (*ptr___pthread_key_create) (pthread_key_t *, void (*) (void *));
-  void *(*ptr___pthread_getspecific) (pthread_key_t);
-  int (*ptr___pthread_setspecific) (pthread_key_t, const void *);
   void (*ptr__IO_flockfile) (FILE *);
   void (*ptr__IO_funlockfile) (FILE *);
   int (*ptr__IO_ftrylockfile) (FILE *);
diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h
index 535deeb..535740f 100644
--- a/sysdeps/htl/pthreadP.h
+++ b/sysdeps/htl/pthreadP.h
@@ -182,9 +182,13 @@ int __cthread_keycreate (__cthread_key_t *);
 int __cthread_getspecific (__cthread_key_t, void **);
 int __cthread_setspecific (__cthread_key_t, void *);
 int __pthread_key_create (pthread_key_t *key, void (*destr) (void *));
+libc_hidden_proto (__pthread_key_create)
 void *__pthread_getspecific (pthread_key_t key);
+libc_hidden_proto (__pthread_getspecific)
 int __pthread_setspecific (pthread_key_t key, const void *value);
+libc_hidden_proto (__pthread_setspecific)
 int __pthread_key_delete (pthread_key_t key);
+libc_hidden_proto (__pthread_key_delete)
 int __pthread_once (pthread_once_t *once_control, void (*init_routine) (void));
 
 int __pthread_getattr_np (pthread_t, pthread_attr_t *);
@@ -212,14 +216,11 @@ int __pthread_condattr_init (pthread_condattr_t *attr);
 libc_hidden_proto (__pthread_self)
 libc_hidden_proto (__pthread_attr_init)
 libc_hidden_proto (__pthread_condattr_init)
+libc_hidden_proto (__pthread_get_cleanup_stack)
 
 #if IS_IN (libpthread)
 hidden_proto (__pthread_create)
 hidden_proto (__pthread_detach)
-hidden_proto (__pthread_key_create)
-hidden_proto (__pthread_getspecific)
-hidden_proto (__pthread_setspecific)
-hidden_proto (__pthread_get_cleanup_stack)
 #endif
 
 #if !defined(__NO_WEAK_PTHREAD_ALIASES) && !IS_IN (libpthread)
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
index a2e8c0b..74068ea 100644
--- a/sysdeps/i386/Makefile
+++ b/sysdeps/i386/Makefile
@@ -17,20 +17,25 @@ ifeq ($(subdir),gmon)
 sysdep_routines += i386-mcount
 endif
 
-ifeq ($(subdir),elf)
-CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
-CFLAGS-dl-load.c += -Wno-unused
-CFLAGS-dl-reloc.c += -Wno-unused
-endif
-
 ifeq ($(subdir),csu)
 gen-as-const-headers += link-defines.sym
+gen-as-const-headers += tlsdesc.sym
 else
 stack-align-test-flags += -malign-double
 endif
 
+# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since
+# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters
+# which must be preserved.
+# With SSE disabled, ensure -fpmath is not set to use sse either.
+rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387
 ifeq ($(subdir),elf)
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
+CFLAGS-dl-load.c += -Wno-unused
+CFLAGS-dl-reloc.c += -Wno-unused
+sysdep-dl-routines += \
+  dl-tls-get-addr \
+# sysdep-dl-routines
 
 tests += tst-audit3
 modules-names += tst-auditmod3a tst-auditmod3b
@@ -38,18 +43,6 @@ modules-names += tst-auditmod3a tst-auditmod3b
 $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
 $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
 tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
-endif
-
-ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
-endif
-
-# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since
-# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters
-# which must be preserved.
-# With SSE disabled, ensure -fpmath is not set to use sse either.
-rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387
-ifeq ($(subdir),elf)
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
 		   $(rtld-CFLAGS))
 
diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c
new file mode 100644
index 0000000..c97e5c5
--- /dev/null
+++ b/sysdeps/i386/dl-tls-get-addr.c
@@ -0,0 +1,68 @@
+/* Ifunc selector for ___tls_get_addr.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+# define ___tls_get_addr __redirect____tls_get_addr
+# include <dl-tls.h>
+# undef ___tls_get_addr
+# undef __tls_get_addr
+
+# define SYMBOL_NAME ___tls_get_addr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (cpu_features->xsave_state_size != 0)
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+	return OPTIMIZE (xsavec);
+      else
+	return OPTIMIZE (xsave);
+    }
+  else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+    return OPTIMIZE (fxsave);
+  return OPTIMIZE (fnsave);
+}
+
+libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr,
+		       IFUNC_SELECTOR ());
+
+/* The special thing about the x86 TLS ABI is that we have two
+   variants of the __tls_get_addr function with different calling
+   conventions.  The GNU version, which we are mostly concerned here,
+   takes the parameter in a register.  The name is changed by adding
+   an additional underscore at the beginning.  The Sun version uses
+   the normal calling convention.  */
+
+rtld_hidden_proto (___tls_get_addr)
+rtld_hidden_def (___tls_get_addr)
+
+void *
+__tls_get_addr (tls_index *ti)
+{
+  return ___tls_get_addr (ti);
+}
+#endif
diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h
index f453931..ef605c5 100644
--- a/sysdeps/i386/dl-tls.h
+++ b/sysdeps/i386/dl-tls.h
@@ -37,34 +37,14 @@ typedef struct dl_tls_index
 /* This is the prototype for the GNU version.  */
 extern void *___tls_get_addr (tls_index *ti)
      __attribute__ ((__regparm__ (1)));
-extern void *___tls_get_addr_internal (tls_index *ti)
-     __attribute__ ((__regparm__ (1))) attribute_hidden;
-
 # if IS_IN (rtld)
-/* The special thing about the x86 TLS ABI is that we have two
-   variants of the __tls_get_addr function with different calling
-   conventions.  The GNU version, which we are mostly concerned here,
-   takes the parameter in a register.  The name is changed by adding
-   an additional underscore at the beginning.  The Sun version uses
-   the normal calling convention.  */
-void *
-__tls_get_addr (tls_index *ti)
-{
-  return ___tls_get_addr_internal (ti);
-}
-
-
 /* Prepare using the definition of __tls_get_addr in the generic
    version of this file.  */
-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr
-strong_alias (___tls_get_addr, ___tls_get_addr_internal)
-rtld_hidden_proto (___tls_get_addr)
-rtld_hidden_def (___tls_get_addr)
-#else
-
+# define __tls_get_addr \
+    __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal
+# else
 /* Users should get the better interface.  */
-# define __tls_get_addr ___tls_get_addr
-
+#  define __tls_get_addr ___tls_get_addr
 # endif
 #endif
 
diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
index 6aec06d..be9ecd6 100644
--- a/sysdeps/i386/dl-tlsdesc-dynamic.h
+++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
@@ -16,34 +16,6 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#undef REGISTER_SAVE_AREA
-
-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
-# error STATE_SAVE_ALIGNMENT must be multiple of 16
-#endif
-
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-# ifdef USE_FNSAVE
-#  error USE_FNSAVE shouldn't be defined
-# endif
-# ifdef USE_FXSAVE
-/* Use fxsave to save all registers.  */
-#  define REGISTER_SAVE_AREA	512
-# endif
-#else
-# ifdef USE_FNSAVE
-/* Use fnsave to save x87 FPU stack registers.  */
-#  define REGISTER_SAVE_AREA	108
-# else
-#  ifndef USE_FXSAVE
-#   error USE_FXSAVE must be defined
-#  endif
-/* Use fxsave to save all registers.  Add 12 bytes to align the stack
-   to 16 bytes.  */
-#  define REGISTER_SAVE_AREA	(512 + 12)
-# endif
-#endif
-
 	.hidden _dl_tlsdesc_dynamic
 	.global	_dl_tlsdesc_dynamic
 	.type	_dl_tlsdesc_dynamic,@function
@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic:
 	ret
 	.p2align 4,,7
 2:
-	cfi_adjust_cfa_offset (32)
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-	movl	%ebx, -28(%esp)
-	movl	%esp, %ebx
-	cfi_def_cfa_register(%ebx)
-	and	$-STATE_SAVE_ALIGNMENT, %esp
-#endif
-#ifdef REGISTER_SAVE_AREA
-	subl	$REGISTER_SAVE_AREA, %esp
-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
-	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
-# endif
-#else
-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
-#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
-# endif
-	/* Allocate stack space of the required size to save the state.  */
-	LOAD_PIC_REG (cx)
-	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
-#endif
-#ifdef USE_FNSAVE
-	fnsave	(%esp)
-#elif defined USE_FXSAVE
-	fxsave	(%esp)
-#else
-	/* Save the argument for ___tls_get_addr in EAX.  */
-	movl	%eax, %ecx
-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
-	xorl	%edx, %edx
-	/* Clear the XSAVE Header.  */
-# ifdef USE_XSAVE
-	movl	%edx, (512)(%esp)
-	movl	%edx, (512 + 4 * 1)(%esp)
-	movl	%edx, (512 + 4 * 2)(%esp)
-	movl	%edx, (512 + 4 * 3)(%esp)
-# endif
-	movl	%edx, (512 + 4 * 4)(%esp)
-	movl	%edx, (512 + 4 * 5)(%esp)
-	movl	%edx, (512 + 4 * 6)(%esp)
-	movl	%edx, (512 + 4 * 7)(%esp)
-	movl	%edx, (512 + 4 * 8)(%esp)
-	movl	%edx, (512 + 4 * 9)(%esp)
-	movl	%edx, (512 + 4 * 10)(%esp)
-	movl	%edx, (512 + 4 * 11)(%esp)
-	movl	%edx, (512 + 4 * 12)(%esp)
-	movl	%edx, (512 + 4 * 13)(%esp)
-	movl	%edx, (512 + 4 * 14)(%esp)
-	movl	%edx, (512 + 4 * 15)(%esp)
-# ifdef USE_XSAVE
-	xsave	(%esp)
-# else
-	xsavec	(%esp)
-# endif
-	/* Restore the argument for ___tls_get_addr in EAX.  */
-	movl	%ecx, %eax
-#endif
-	call	HIDDEN_JUMPTARGET (___tls_get_addr)
-	/* Get register content back.  */
-#ifdef USE_FNSAVE
-	frstor	(%esp)
-#elif defined USE_FXSAVE
-	fxrstor	(%esp)
-#else
-	/* Save and retore ___tls_get_addr return value stored in EAX.  */
-	movl	%eax, %ecx
-	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
-	xorl	%edx, %edx
-	xrstor	(%esp)
-	movl	%ecx, %eax
-#endif
-#if DL_RUNTIME_RESOLVE_REALIGN_STACK
-	mov	%ebx, %esp
-	cfi_def_cfa_register(%esp)
-	movl	-28(%esp), %ebx
-	cfi_restore(%ebx)
-#else
-	addl	$REGISTER_SAVE_AREA, %esp
-	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
-#endif
+#include "tls-get-addr-wrapper.h"
 	jmp	1b
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
index c080993..c914ca4 100644
--- a/sysdeps/i386/dl-tlsdesc.S
+++ b/sysdeps/i386/dl-tlsdesc.S
@@ -22,23 +22,6 @@
 #include <features-offsets.h>
 #include "tlsdesc.h"
 
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
-
-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-
-   __tls_get_addr may be called with 4-byte stack alignment.  Although
-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
-   that stack will be always aligned at 16 bytes.  */
-# define DL_STACK_ALIGNMENT 4
-#endif
-
-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
-   stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr.  */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
-   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
-
 	.text
 
      /* This function is used to compute the TP offset for symbols in
diff --git a/sysdeps/i386/fpu/e_ilogb.S b/sysdeps/i386/fpu/e_ilogb.S
deleted file mode 100644
index f4b792c..0000000
--- a/sysdeps/i386/fpu/e_ilogb.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $")
-
-ENTRY(__ieee754_ilogb)
-	fldl	4(%esp)
-/* I added the following ugly construct because ilogb(+-Inf) is
-   required to return INT_MAX in ISO C99.
-   -- jakub@redhat.com.  */
-	fxam			/* Is NaN or +-Inf?  */
-	fstsw   %ax
-	movb    $0x45, %dh
-	andb    %ah, %dh
-	cmpb    $0x05, %dh
-	je      1f		/* Is +-Inf, jump.  */
-	cmpb    $0x40, %dh
-	je      2f		/* Is +-0, jump.  */
-
-	fxtract
-	pushl	%eax
-	cfi_adjust_cfa_offset (4)
-	fstp	%st
-
-	fistpl	(%esp)
-	fwait
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-
-	ret
-
-1:	fstp	%st
-	movl	$0x7fffffff, %eax
-	ret
-2:	fstp	%st
-	movl	$0x80000000, %eax	/* FP_ILOGB0  */
-	ret
-END (__ieee754_ilogb)
diff --git a/sysdeps/i386/fpu/e_ilogbf.S b/sysdeps/i386/fpu/e_ilogbf.S
deleted file mode 100644
index 37298b9..0000000
--- a/sysdeps/i386/fpu/e_ilogbf.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $")
-
-ENTRY(__ieee754_ilogbf)
-	flds	4(%esp)
-/* I added the following ugly construct because ilogb(+-Inf) is
-   required to return INT_MAX in ISO C99.
-   -- jakub@redhat.com.  */
-	fxam			/* Is NaN or +-Inf?  */
-	fstsw   %ax
-	movb    $0x45, %dh
-	andb    %ah, %dh
-	cmpb    $0x05, %dh
-	je      1f		/* Is +-Inf, jump.  */
-	cmpb    $0x40, %dh
-	je      2f		/* Is +-0, jump.  */
-
-	fxtract
-	pushl	%eax
-	cfi_adjust_cfa_offset (4)
-	fstp	%st
-
-	fistpl	(%esp)
-	fwait
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-
-	ret
-
-1:	fstp	%st
-	movl	$0x7fffffff, %eax
-	ret
-2:	fstp	%st
-	movl	$0x80000000, %eax	/* FP_ILOGB0  */
-	ret
-END (__ieee754_ilogbf)
diff --git a/sysdeps/i386/fpu/math_err.c b/sysdeps/i386/fpu/math_err.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/math_err.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h
new file mode 100644
index 0000000..0708e5a
--- /dev/null
+++ b/sysdeps/i386/tls-get-addr-wrapper.h
@@ -0,0 +1,127 @@
+/* Wrapper of i386 ___tls_get_addr to save and restore vector registers.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#undef REGISTER_SAVE_AREA
+
+#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multiple of 16
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# ifdef USE_FNSAVE
+#  error USE_FNSAVE shouldn't be defined
+# endif
+# ifdef USE_FXSAVE
+/* Use fxsave to save all registers.  */
+#  define REGISTER_SAVE_AREA	512
+# endif
+#else
+# ifdef USE_FNSAVE
+/* Use fnsave to save x87 FPU stack registers.  */
+#  define REGISTER_SAVE_AREA	108
+# else
+#  ifndef USE_FXSAVE
+#   error USE_FXSAVE must be defined
+#  endif
+/* Use fxsave to save all registers.  Add 12 bytes to align the stack
+   to 16 bytes.  */
+#  define REGISTER_SAVE_AREA	(512 + 12)
+# endif
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	movl	%ebx, 28(%esp)
+	movl	%esp, %ebx
+	cfi_def_cfa_register(%ebx)
+	and	$-STATE_SAVE_ALIGNMENT, %esp
+#endif
+#ifdef REGISTER_SAVE_AREA
+	subl	$REGISTER_SAVE_AREA, %esp
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+#else
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
+# endif
+	/* Allocate stack space of the required size to save the state.  */
+	LOAD_PIC_REG (cx)
+	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \
+		+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
+#endif
+#ifdef USE_FNSAVE
+	fnsave	(%esp)
+#elif defined USE_FXSAVE
+	fxsave	(%esp)
+#else
+	/* Save the argument for ___tls_get_addr in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	/* Clear the XSAVE Header.  */
+# ifdef USE_XSAVE
+	movl	%edx, (512)(%esp)
+	movl	%edx, (512 + 4 * 1)(%esp)
+	movl	%edx, (512 + 4 * 2)(%esp)
+	movl	%edx, (512 + 4 * 3)(%esp)
+# endif
+	movl	%edx, (512 + 4 * 4)(%esp)
+	movl	%edx, (512 + 4 * 5)(%esp)
+	movl	%edx, (512 + 4 * 6)(%esp)
+	movl	%edx, (512 + 4 * 7)(%esp)
+	movl	%edx, (512 + 4 * 8)(%esp)
+	movl	%edx, (512 + 4 * 9)(%esp)
+	movl	%edx, (512 + 4 * 10)(%esp)
+	movl	%edx, (512 + 4 * 11)(%esp)
+	movl	%edx, (512 + 4 * 12)(%esp)
+	movl	%edx, (512 + 4 * 13)(%esp)
+	movl	%edx, (512 + 4 * 14)(%esp)
+	movl	%edx, (512 + 4 * 15)(%esp)
+# ifdef USE_XSAVE
+	xsave	(%esp)
+# else
+	xsavec	(%esp)
+# endif
+	/* Restore the argument for ___tls_get_addr in EAX.  */
+	movl	%ecx, %eax
+#endif
+	call	___tls_get_addr_internal
+	/* Get register content back.  */
+#ifdef USE_FNSAVE
+	frstor	(%esp)
+#elif defined USE_FXSAVE
+	fxrstor	(%esp)
+#else
+	/* Save and retore ___tls_get_addr return value stored in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	xrstor	(%esp)
+	movl	%ecx, %eax
+#endif
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	mov	%ebx, %esp
+	cfi_def_cfa_register(%esp)
+	movl	28(%esp), %ebx
+	cfi_restore(%ebx)
+#else
+	addl	$REGISTER_SAVE_AREA, %esp
+	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+#endif
+
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S
new file mode 100644
index 0000000..7d143d8
--- /dev/null
+++ b/sysdeps/i386/tls_get_addr.S
@@ -0,0 +1,57 @@
+/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include <cpu-features-offsets.h>
+#include <features-offsets.h>
+
+	.text
+#ifdef SHARED
+# define USE_FNSAVE
+# define MINIMUM_ALIGNMENT	4
+# define STATE_SAVE_ALIGNMENT	4
+# define ___tls_get_addr	_____tls_get_addr_fnsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef MINIMUM_ALIGNMENT
+# undef USE_FNSAVE
+
+# define MINIMUM_ALIGNMENT	16
+
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT	16
+# define ___tls_get_addr	_____tls_get_addr_fxsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_FXSAVE
+
+# define USE_XSAVE
+# define STATE_SAVE_ALIGNMENT	64
+# define ___tls_get_addr	_____tls_get_addr_xsave
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_XSAVE
+
+# define USE_XSAVEC
+# define STATE_SAVE_ALIGNMENT	64
+# define ___tls_get_addr	_____tls_get_addr_xsavec
+# include "tls_get_addr.h"
+# undef ___tls_get_addr
+# undef USE_XSAVEC
+#endif /* SHARED */
diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h
new file mode 100644
index 0000000..1825798
--- /dev/null
+++ b/sysdeps/i386/tls_get_addr.h
@@ -0,0 +1,42 @@
+/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+	.hidden ___tls_get_addr
+	.global	___tls_get_addr
+	.type	___tls_get_addr,@function
+
+	/* This function is a wrapper of ___tls_get_addr_internal to
+	   preserve caller-saved vector registers.  */
+
+	cfi_startproc
+	.align 16
+___tls_get_addr:
+	/* Like all TLS resolvers, preserve call-clobbered registers.
+	   We need two scratch regs anyway.  */
+	subl	$32, %esp
+	cfi_adjust_cfa_offset (32)
+	movl	%ecx, 20(%esp)
+	movl	%edx, 24(%esp)
+#include "tls-get-addr-wrapper.h"
+	movl	20(%esp), %ecx
+	movl	24(%esp), %edx
+	addl	$32, %esp
+	cfi_adjust_cfa_offset (-32)
+	ret
+	cfi_endproc
+	.size	___tls_get_addr, .-___tls_get_addr
diff --git a/sysdeps/ieee754/dbl-64/e_ilogb.c b/sysdeps/ieee754/dbl-64/e_ilogb.c
index 1e338a5..1ea2f23 100644
--- a/sysdeps/ieee754/dbl-64/e_ilogb.c
+++ b/sysdeps/ieee754/dbl-64/e_ilogb.c
@@ -1,63 +1 @@
-/* @(#)s_ilogb.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ilogb.c,v 1.9 1995/05/10 20:47:28 jtc Exp $";
-#endif
-
-/* ilogb(double x)
- * return the binary exponent of non-zero x
- * ilogb(0) = FP_ILOGB0
- * ilogb(NaN) = FP_ILOGBNAN (no signal is raised)
- * ilogb(+-Inf) = INT_MAX (no signal is raised)
- */
-
-#include <limits.h>
-#include <math.h>
-#include <math_private.h>
-
-int
-__ieee754_ilogb (double x)
-{
-  int32_t hx, lx, ix;
-
-  GET_HIGH_WORD (hx, x);
-  hx &= 0x7fffffff;
-  if (hx < 0x00100000)
-    {
-      GET_LOW_WORD (lx, x);
-      if ((hx | lx) == 0)
-	return FP_ILOGB0;               /* ilogb(0) = FP_ILOGB0 */
-      else                              /* subnormal x */
-      if (hx == 0)
-	{
-	  for (ix = -1043; lx > 0; lx <<= 1)
-	    ix -= 1;
-	}
-      else
-	{
-	  for (ix = -1022, hx <<= 11; hx > 0; hx <<= 1)
-	    ix -= 1;
-	}
-      return ix;
-    }
-  else if (hx < 0x7ff00000)
-    return (hx >> 20) - 1023;
-  else if (FP_ILOGBNAN != INT_MAX)
-    {
-      /* ISO C99 requires ilogb(+-Inf) == INT_MAX.  */
-      GET_LOW_WORD (lx, x);
-      if (((hx ^ 0x7ff00000) | lx) == 0)
-	return INT_MAX;
-    }
-  return FP_ILOGBNAN;
-}
+/* ilogb is implemented at w_ilogb.c  */
diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
index 3382e38..d9288c4 100644
--- a/sysdeps/ieee754/dbl-64/math_config.h
+++ b/sysdeps/ieee754/dbl-64/math_config.h
@@ -109,6 +109,7 @@ issignaling_inline (double x)
 #define BIT_WIDTH       64
 #define MANTISSA_WIDTH  52
 #define EXPONENT_WIDTH  11
+#define EXPONENT_BIAS   1023
 #define MANTISSA_MASK   UINT64_C(0x000fffffffffffff)
 #define EXPONENT_MASK   UINT64_C(0x7ff0000000000000)
 #define EXP_MANT_MASK   UINT64_C(0x7fffffffffffffff)
@@ -121,12 +122,24 @@ is_nan (uint64_t x)
   return (x & EXP_MANT_MASK) > EXPONENT_MASK;
 }
 
+static inline bool
+is_inf (uint64_t x)
+{
+  return (x << 1) == (EXPONENT_MASK << 1);
+}
+
 static inline uint64_t
 get_mantissa (uint64_t x)
 {
   return x & MANTISSA_MASK;
 }
 
+static inline int
+get_exponent (uint64_t x)
+{
+  return (int)((x >> MANTISSA_WIDTH & 0x7ff) - EXPONENT_BIAS);
+}
+
 /* Convert integer number X, unbiased exponent EP, and sign S to double:
 
    result = X * 2^(EP+1 - exponent_bias)
@@ -164,6 +177,8 @@ attribute_hidden double __math_divzero (uint32_t);
 
 /* Invalid input unless it is a quiet NaN.  */
 attribute_hidden double __math_invalid (double);
+attribute_hidden int __math_invalid_i (int);
+attribute_hidden long int __math_invalid_li (long int);
 
 /* Error handling using output checking, only for errno setting.  */
 
diff --git a/sysdeps/ieee754/dbl-64/math_err.c b/sysdeps/ieee754/dbl-64/math_err.c
index 4a07fd5..b8c645a 100644
--- a/sysdeps/ieee754/dbl-64/math_err.c
+++ b/sysdeps/ieee754/dbl-64/math_err.c
@@ -29,8 +29,24 @@ with_errno (double y, int e)
   errno = e;
   return y;
 }
+
+NOINLINE static int
+with_errno_i (int y, int e)
+{
+  errno = e;
+  return y;
+}
+
+NOINLINE static long int
+with_errno_li (long int y, int e)
+{
+  errno = e;
+  return y;
+}
 #else
 #define with_errno(x, e) (x)
+#define with_errno_i(x, e) (x)
+#define with_errno_li(x, e) (x)
 #endif
 
 attribute_hidden double
@@ -83,6 +99,22 @@ __math_invalid (double x)
   return isnan (x) ? y : with_errno (y, EDOM);
 }
 
+attribute_hidden int
+__math_invalid_i (int r)
+{
+  double y = 0.0 / 0.0;
+  math_force_eval (y);
+  return with_errno_i (r, EDOM);
+}
+
+attribute_hidden long int
+__math_invalid_li (long int r)
+{
+  double y = 0.0 / 0.0;
+  math_force_eval (y);
+  return with_errno_li (r, EDOM);
+}
+
 /* Check result and set errno if necessary.  */
 
 attribute_hidden double
diff --git a/sysdeps/ieee754/dbl-64/s_modf.c b/sysdeps/ieee754/dbl-64/s_modf.c
index 0de2084..90cd8e8 100644
--- a/sysdeps/ieee754/dbl-64/s_modf.c
+++ b/sysdeps/ieee754/dbl-64/s_modf.c
@@ -1,63 +1,68 @@
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+/* Extract signed integral and fractional values.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-/*
- * modf(double x, double *iptr)
- * return fraction part of x, and return x's integral part in *iptr.
- * Method:
- *	Bit twiddling.
- *
- * Exception:
- *	No exception.
- */
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
 #include <libm-alias-double.h>
-#include <stdint.h>
-
-static const double one = 1.0;
+#include "math_config.h"
+#include <math-use-builtins-trunc.h>
 
 double
-__modf(double x, double *iptr)
+__modf (double x, double *iptr)
 {
-	int64_t i0;
-	int32_t j0;
-	EXTRACT_WORDS64(i0,x);
-	j0 = ((i0>>52)&0x7ff)-0x3ff;	/* exponent of x */
-	if(j0<52) {			/* integer part in x */
-	    if(j0<0) {			/* |x|<1 */
-		/* *iptr = +-0 */
-		INSERT_WORDS64(*iptr,i0&UINT64_C(0x8000000000000000));
-		return x;
-	    } else {
-		uint64_t i = UINT64_C(0x000fffffffffffff)>>j0;
-		if((i0&i)==0) {		/* x is integral */
-		    *iptr = x;
-		    /* return +-0 */
-		    INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000));
-		    return x;
-		} else {
-		    INSERT_WORDS64(*iptr,i0&(~i));
-		    return x - *iptr;
-		}
-	    }
-	} else { /* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x400 && (i0 & UINT64_C(0xfffffffffffff)))
-	      return x*one;
-	    INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000));	/* return +-0 */
-	    return x;
+  uint64_t t = asuint64 (x);
+#if USE_TRUNC_BUILTIN
+  if (is_inf (t))
+    {
+      *iptr = x;
+      return copysign (0.0, x);
+    }
+  *iptr = trunc (x);
+  return copysign (x - *iptr, x);
+#else
+  int e = get_exponent (t);
+  /* No fraction part.  */
+  if (e < MANTISSA_WIDTH)
+    {
+      if (e < 0)
+	{
+	  /* |x|<1 -> *iptr = +-0 */
+	  *iptr = asdouble (t & SIGN_MASK);
+	  return x;
+	}
+
+      uint64_t i = MANTISSA_MASK >> e;
+      if ((t & i) == 0)
+	{
+	  /* x in integral, return +-0  */
+	  *iptr = x;
+	  return asdouble (t & SIGN_MASK);
 	}
+
+      *iptr = asdouble (t & ~i);
+      return x - *iptr;
+    }
+
+  /* Set invalid operation for sNaN.  */
+  *iptr = x * 1.0;
+  if ((e == 0x400) && (t & MANTISSA_MASK))
+    return *iptr;
+  return asdouble (t & SIGN_MASK);
+#endif
 }
 #ifndef __modf
 libm_alias_double (__modf, modf)
diff --git a/sysdeps/ieee754/dbl-64/w_ilogb-impl.h b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h
new file mode 100644
index 0000000..c919735
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h
@@ -0,0 +1,37 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+static inline RET_TYPE
+IMPL_NAME (double x)
+{
+  uint64_t ux = asuint64 (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (__glibc_unlikely (ex == 0)) /* zero or subnormal */
+    {
+      /* Clear sign and exponent */
+      ux <<= 12;
+      if (ux == 0)
+	return RET_INVALID (RET_LOGB0);
+      /* subnormal  */
+      return (RET_TYPE)-1023 - stdc_leading_zeros (ux);
+    }
+  if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH))
+    /* NaN or Inf */
+    return RET_INVALID (ux << 12 ? RET_LOGBNAN : RET_LOGMAX);
+  return ex - 1023;
+}
diff --git a/sysdeps/ieee754/dbl-64/w_ilogb.c b/sysdeps/ieee754/dbl-64/w_ilogb.c
new file mode 100644
index 0000000..e460f14
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_ilogb.c
@@ -0,0 +1,52 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include <libm-alias-double.h>
+#include "math_config.h"
+
+#ifdef DEF_AS_LLOGB
+# define DECL_NAME   __llogb
+# define FUNC_NAME   llogb
+# define RET_TYPE    long int
+# define RET_LOGB0   FP_LLOGB0
+# define RET_LOGBNAN FP_LLOGBNAN
+# define RET_LOGMAX  LONG_MAX
+# define RET_INVALID __math_invalid_li
+#else
+# define DECL_NAME   __ilogb
+# define FUNC_NAME   ilogb
+# define RET_TYPE    int
+# define RET_LOGB0   FP_ILOGB0
+# define RET_LOGBNAN FP_ILOGBNAN
+# define RET_LOGMAX  INT_MAX
+# define RET_INVALID __math_invalid_i
+#endif
+#define __IMPL_NAME(x,y) x ## _ ## y
+#define _IMPL_NAME(x,y)  __IMPL_NAME(x,y)
+#define IMPL_NAME        _IMPL_NAME(FUNC_NAME, impl)
+#include <w_ilogb-impl.h>
+
+RET_TYPE
+DECL_NAME (double x)
+{
+  return IMPL_NAME (x);
+}
+libm_alias_double (DECL_NAME, FUNC_NAME)
diff --git a/sysdeps/ieee754/dbl-64/w_llogb.c b/sysdeps/ieee754/dbl-64/w_llogb.c
new file mode 100644
index 0000000..c984cd15
--- /dev/null
+++ b/sysdeps/ieee754/dbl-64/w_llogb.c
@@ -0,0 +1,2 @@
+#define DEF_AS_LLOGB
+#include "w_ilogb.c"
diff --git a/sysdeps/ieee754/flt-32/e_ilogbf.c b/sysdeps/ieee754/flt-32/e_ilogbf.c
index db24012..a27fb94 100644
--- a/sysdeps/ieee754/flt-32/e_ilogbf.c
+++ b/sysdeps/ieee754/flt-32/e_ilogbf.c
@@ -1,43 +1 @@
-/* s_ilogbf.c -- float version of s_ilogb.c.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_ilogbf.c,v 1.4 1995/05/10 20:47:31 jtc Exp $";
-#endif
-
-#include <limits.h>
-#include <math.h>
-#include <math_private.h>
-
-int __ieee754_ilogbf(float x)
-{
-	int32_t hx,ix;
-
-	GET_FLOAT_WORD(hx,x);
-	hx &= 0x7fffffff;
-	if(hx<0x00800000) {
-	    if(hx==0)
-		return FP_ILOGB0;	/* ilogb(0) = FP_ILOGB0 */
-	    else			/* subnormal x */
-	        for (ix = -126,hx<<=8; hx>0; hx<<=1) ix -=1;
-	    return ix;
-	}
-	else if (hx<0x7f800000) return (hx>>23)-127;
-	else if (FP_ILOGBNAN != INT_MAX) {
-	    /* ISO C99 requires ilogbf(+-Inf) == INT_MAX.  */
-	    if (hx==0x7f800000)
-		return INT_MAX;
-	}
-	return FP_ILOGBNAN;
-}
+/* ilogbf is implemented at w_ilogbf.c  */
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 8d9c8ee..33ea631 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -165,6 +165,7 @@ issignalingf_inline (float x)
 #define BIT_WIDTH       32
 #define MANTISSA_WIDTH  23
 #define EXPONENT_WIDTH  8
+#define EXPONENT_BIAS   127
 #define MANTISSA_MASK   0x007fffff
 #define EXPONENT_MASK   0x7f800000
 #define EXP_MANT_MASK   0x7fffffff
@@ -177,12 +178,24 @@ is_nan (uint32_t x)
   return (x & EXP_MANT_MASK) > EXPONENT_MASK;
 }
 
+static inline bool
+is_inf (uint32_t x)
+{
+  return (x << 1) == (EXPONENT_MASK << 1);
+}
+
 static inline uint32_t
 get_mantissa (uint32_t x)
 {
   return x & MANTISSA_MASK;
 }
 
+static inline int
+get_exponent (uint32_t x)
+{
+  return (int)((x >> MANTISSA_WIDTH & 0xff) - EXPONENT_BIAS);
+}
+
 /* Convert integer number X, unbiased exponent EP, and sign S to double:
 
    result = X * 2^(EP+1 - exponent_bias)
@@ -208,6 +221,8 @@ attribute_hidden float __math_uflowf (uint32_t);
 attribute_hidden float __math_may_uflowf (uint32_t);
 attribute_hidden float __math_divzerof (uint32_t);
 attribute_hidden float __math_invalidf (float);
+attribute_hidden int __math_invalidf_i (int);
+attribute_hidden long int __math_invalidf_li (long int);
 attribute_hidden float __math_edomf (float x);
 
 /* Shared between expf, exp2f, exp10f, and powf.  */
diff --git a/sysdeps/ieee754/flt-32/math_errf.c b/sysdeps/ieee754/flt-32/math_errf.c
index edcc4c0..244e38a 100644
--- a/sysdeps/ieee754/flt-32/math_errf.c
+++ b/sysdeps/ieee754/flt-32/math_errf.c
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <math-barriers.h>
 #include "math_config.h"
 
 #if WANT_ERRNO
@@ -27,8 +28,24 @@ with_errnof (float y, int e)
   errno = e;
   return y;
 }
+
+NOINLINE static int
+with_errnof_i (int y, int e)
+{
+  errno = e;
+  return y;
+}
+
+NOINLINE static long int
+with_errnof_li (long int y, int e)
+{
+  errno = e;
+  return y;
+}
 #else
 # define with_errnof(x, e) (x)
+# define with_errnof_i(x, x) (x)
+# define with_errnof_li(x, x) (x)
 #endif
 
 attribute_hidden float
@@ -80,3 +97,19 @@ __math_invalidf (float x)
   float y = (x - x) / (x - x);
   return isnan (x) ? y : with_errnof (y, EDOM);
 }
+
+attribute_hidden int
+__math_invalidf_i (int x)
+{
+  float y = 0.0f / 0.0f;
+  math_force_eval (y);
+  return with_errnof_i (x, EDOM);
+}
+
+attribute_hidden long int
+__math_invalidf_li (long int x)
+{
+  float y = 0.0f / 0.0f;
+  math_force_eval (y);
+  return with_errnof_li (x, EDOM);
+}
diff --git a/sysdeps/ieee754/flt-32/s_modff.c b/sysdeps/ieee754/flt-32/s_modff.c
index ad2e91d..965136b 100644
--- a/sysdeps/ieee754/flt-32/s_modff.c
+++ b/sysdeps/ieee754/flt-32/s_modff.c
@@ -1,54 +1,69 @@
-/* s_modff.c -- float version of s_modf.c.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+/* Extract signed integral and fractional values.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
 #include <libm-alias-float.h>
-
-static const float one = 1.0;
+#include "math_config.h"
+#include <math-use-builtins-trunc.h>
 
 float
-__modff(float x, float *iptr)
+__modff (float x, float *iptr)
 {
-	int32_t i0,j0;
-	uint32_t i;
-	GET_FLOAT_WORD(i0,x);
-	j0 = ((i0>>23)&0xff)-0x7f;	/* exponent of x */
-	if(__builtin_expect(j0<23, 1)) {		/* integer part in x */
-	    if(j0<0) {			/* |x|<1 */
-		SET_FLOAT_WORD(*iptr,i0&0x80000000);	/* *iptr = +-0 */
-		return x;
-	    } else {
-		i = (0x007fffff)>>j0;
-		if((i0&i)==0) {			/* x is integral */
-		    uint32_t ix;
-		    *iptr = x;
-		    GET_FLOAT_WORD(ix,x);
-		    SET_FLOAT_WORD(x,ix&0x80000000);	/* return +-0 */
-		    return x;
-		} else {
-		    SET_FLOAT_WORD(*iptr,i0&(~i));
-		    return x - *iptr;
-		}
-	    }
-	} else {			/* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x80 && (i0 & 0x7fffff))
-	      return x*one;
-	    SET_FLOAT_WORD(x,i0&0x80000000);	/* return +-0 */
-	    return x;
+  uint32_t t = asuint (x);
+#if USE_TRUNCF_BUILTIN
+  if (is_inf (t))
+    {
+      *iptr = x;
+      return copysignf (0.0, x);
+    }
+  *iptr = truncf (x);
+  return copysignf (x - *iptr, x);
+#else
+  int e = get_exponent (t);
+  /* No fraction part.  */
+  if (e < MANTISSA_WIDTH)
+    {
+      if (e < 0)
+	{
+	  /* |x|<1 -> *iptr = +-0 */
+	  *iptr = asfloat (t & SIGN_MASK);
+	  return x;
 	}
+
+      uint32_t i = MANTISSA_MASK >> e;
+      if ((t & i) == 0)
+	{
+	  /* x in integral, return +-0  */
+	  *iptr = x;
+	  return asfloat (t & SIGN_MASK);
+	}
+
+      *iptr = asfloat (t & ~i);
+      return x - *iptr;
+    }
+
+  /* Set invalid operation for sNaN.  */
+  *iptr = x * 1.0f;
+  if ((e == 0x80) && (t & MANTISSA_MASK))
+    return *iptr;
+  return asfloat (t & SIGN_MASK);
+#endif
 }
+#ifndef __modff
 libm_alias_float (__modf, modf)
+#endif
diff --git a/sysdeps/ieee754/flt-32/w_ilogbf-impl.h b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h
new file mode 100644
index 0000000..5aa8bf0
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h
@@ -0,0 +1,38 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+static inline RET_TYPE
+IMPL_NAME (float x)
+{
+  uint32_t ux = asuint (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (__glibc_unlikely (ex == 0))
+    {
+      /* Zero or subnormal.
+         Clear sign and exponent.  */
+      ux <<= 1 + EXPONENT_WIDTH;
+      if (ux == 0)
+	return RET_INVALID (RET_LOGB0);
+      /* subnormal */
+      return (RET_TYPE)-127 - stdc_leading_zeros (ux);
+    }
+  if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH))
+    /* NaN or Inf */
+    return RET_INVALID (ux << (1 + EXPONENT_WIDTH) ? RET_LOGBNAN : RET_LOGMAX);
+  return ex - 127;
+}
diff --git a/sysdeps/ieee754/flt-32/w_ilogbf.c b/sysdeps/ieee754/flt-32/w_ilogbf.c
new file mode 100644
index 0000000..4e2a707
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_ilogbf.c
@@ -0,0 +1,53 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include <libm-alias-float.h>
+#include <math-type-macros-float.h>
+#include "math_config.h"
+
+#ifdef DEF_AS_LLOGBF
+# define DECL_NAME   __llogb
+# define FUNC_NAME   llogb
+# define RET_TYPE    long int
+# define RET_LOGB0   FP_LLOGB0
+# define RET_LOGBNAN FP_LLOGBNAN
+# define RET_LOGMAX  LONG_MAX
+# define RET_INVALID __math_invalidf_li
+#else
+# define DECL_NAME   __ilogb
+# define FUNC_NAME   ilogb
+# define RET_TYPE    int
+# define RET_LOGB0   FP_ILOGB0
+# define RET_LOGBNAN FP_ILOGBNAN
+# define RET_LOGMAX  INT_MAX
+# define RET_INVALID __math_invalidf_i
+#endif
+#define __IMPL_NAME(x,y) x ## _ ## y
+#define _IMPL_NAME(x,y)  __IMPL_NAME(x,y)
+#define IMPL_NAME        _IMPL_NAME(FUNC_NAME, impl)
+#include <w_ilogbf-impl.h>
+
+RET_TYPE
+M_DECL_FUNC (DECL_NAME) (float x)
+{
+  return IMPL_NAME (x);
+}
+libm_alias_float (DECL_NAME, FUNC_NAME);
diff --git a/sysdeps/ieee754/flt-32/w_llogbf.c b/sysdeps/ieee754/flt-32/w_llogbf.c
new file mode 100644
index 0000000..8676434
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/w_llogbf.c
@@ -0,0 +1,2 @@
+#define DEF_AS_LLOGBF
+#include "w_ilogbf.c"
diff --git a/sysdeps/ieee754/ldbl-128/Makefile b/sysdeps/ieee754/ldbl-128/Makefile
index 5476a55..e666bdc 100644
--- a/sysdeps/ieee754/ldbl-128/Makefile
+++ b/sysdeps/ieee754/ldbl-128/Makefile
@@ -83,7 +83,7 @@ CFLAGS-w_j1l.c += -fno-builtin-j1f64x -fno-builtin-j1f128
 CFLAGS-w_jnl.c += -fno-builtin-jnf64x -fno-builtin-jnf128
 CFLAGS-s_ldexpl.c += -fno-builtin-ldexpf64x -fno-builtin-ldexpf128
 CFLAGS-w_lgammal.c += -fno-builtin-lgammaf64x -fno-builtin-lgammaf128
-CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r
+CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r -fno-builtin-lgammaf128_r
 CFLAGS-w_llogbl.c += -fno-builtin-llogbf64x -fno-builtin-llogbf128
 CFLAGS-s_llrintl.c += -fno-builtin-llrintf64x -fno-builtin-llrintf128
 CFLAGS-s_llroundl.c += -fno-builtin-llroundf64x -fno-builtin-llroundf128
diff --git a/sysdeps/ieee754/ldbl-128ibm-compat/Versions b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
index 29a3869..ae4bd5b 100644
--- a/sysdeps/ieee754/ldbl-128ibm-compat/Versions
+++ b/sysdeps/ieee754/ldbl-128ibm-compat/Versions
@@ -157,6 +157,7 @@ libm {
     __compoundnieee128;
     __pownieee128;
     __powrieee128;
+    __rootnieee128;
     __rsqrtieee128;
   }
 }
diff --git a/sysdeps/ieee754/ldbl-opt/Makefile b/sysdeps/ieee754/ldbl-opt/Makefile
index 72369eb..ef7da1f 100644
--- a/sysdeps/ieee754/ldbl-opt/Makefile
+++ b/sysdeps/ieee754/ldbl-opt/Makefile
@@ -181,6 +181,7 @@ libnldbl-calls = \
   remainder \
   remquo \
   rint \
+  rootn \
   round \
   roundeven \
   rsqrt \
@@ -265,7 +266,7 @@ extra-objs += $(addsuffix .oS, $(libnldbl-routines))
 
 CFLAGS-nldbl-acos.c = -fno-builtin-acosl
 CFLAGS-nldbl-acosh.c = -fno-builtin-acoshl
-CFLAGS-nldbl-acospi.c = -fno-builtin-acospi
+CFLAGS-nldbl-acospi.c = -fno-builtin-acospil
 CFLAGS-nldbl-asin.c = -fno-builtin-asinl
 CFLAGS-nldbl-asinh.c = -fno-builtin-asinhl
 CFLAGS-nldbl-asinpi.c = -fno-builtin-asinpil
@@ -296,7 +297,7 @@ CFLAGS-nldbl-conj.c = -fno-builtin-conjl
 CFLAGS-nldbl-copysign.c = -fno-builtin-copysignl
 CFLAGS-nldbl-cos.c = -fno-builtin-cosl
 CFLAGS-nldbl-cosh.c = -fno-builtin-coshl
-CFLAGS-nldbl-cospi.c = -fno-builtin-cospi
+CFLAGS-nldbl-cospi.c = -fno-builtin-cospil
 CFLAGS-nldbl-cpow.c = -fno-builtin-cpowl
 CFLAGS-nldbl-cproj.c = -fno-builtin-cprojl
 CFLAGS-nldbl-creal.c = -fno-builtin-creall
@@ -384,6 +385,7 @@ CFLAGS-nldbl-powr.c = -fno-builtin-powrl
 CFLAGS-nldbl-remainder.c = -fno-builtin-remainderl -fno-builtin-dreml
 CFLAGS-nldbl-remquo.c = -fno-builtin-remquol
 CFLAGS-nldbl-rint.c = -fno-builtin-rintl
+CFLAGS-nldbl-rootn.c = -fno-builtin-rootnl
 CFLAGS-nldbl-round.c = -fno-builtin-roundl
 CFLAGS-nldbl-roundeven.c = -fno-builtin-roundevenl
 CFLAGS-nldbl-rsqrt.c = -fno-builtin-rsqrtl
@@ -396,11 +398,11 @@ CFLAGS-nldbl-significand.c = -fno-builtin-significandl
 CFLAGS-nldbl-sin.c = -fno-builtin-sinl
 CFLAGS-nldbl-sincos.c = -fno-builtin-sincosl
 CFLAGS-nldbl-sinh.c = -fno-builtin-sinhl
-CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpi
+CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpil
 CFLAGS-nldbl-sqrt.c = -fno-builtin-sqrtl
 CFLAGS-nldbl-tan.c = -fno-builtin-tanl
 CFLAGS-nldbl-tanh.c = -fno-builtin-tanhl
-CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpi
+CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpil
 CFLAGS-nldbl-tgamma.c = -fno-builtin-tgammal
 CFLAGS-nldbl-totalorder.c = -fno-builtin-totalorderl
 CFLAGS-nldbl-totalordermag.c = -fno-builtin-totalordermagl
diff --git a/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c
new file mode 100644
index 0000000..fb0d860
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c
@@ -0,0 +1,8 @@
+#include "nldbl-compat.h"
+
+double
+attribute_hidden
+rootnl (double x, long long int y)
+{
+  return rootn (x, y);
+}
diff --git a/sysdeps/loongarch/fpu/e_ilogbf.c b/sysdeps/loongarch/fpu/e_ilogbf.c
index adced63..a27fb94 100644
--- a/sysdeps/loongarch/fpu/e_ilogbf.c
+++ b/sysdeps/loongarch/fpu/e_ilogbf.c
@@ -1,39 +1 @@
-/* __ieee754_ilogbf().  LoongArch version.
-   Copyright (C) 2022-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define NO_MATH_REDIRECT
-#include <math.h>
-#include <fpu_control.h>
-
-int
-__ieee754_ilogbf (float x)
-{
-  int x_cond;
-  asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x));
-
-  if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
-      return FP_ILOGB0;
-  else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
-      return FP_ILOGBNAN;
-  else
-    {
-      asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x));
-      asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x));
-      return x;
-    }
-}
+/* ilogbf is implemented at w_ilogbf.c  */
diff --git a/sysdeps/loongarch/fpu/e_ilogb.c b/sysdeps/loongarch/fpu/w_ilogb-impl.h
index f21fa5c..1905373 100644
--- a/sysdeps/loongarch/fpu/e_ilogb.c
+++ b/sysdeps/loongarch/fpu/w_ilogb-impl.h
@@ -1,4 +1,4 @@
-/* __ieee754_ilogb().  LoongArch version.
+/* Get integer exponent of a floating-point value.  LoongArch version.
    Copyright (C) 2022-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,20 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <math.h>
 #include <fpu_control.h>
 
-int
-__ieee754_ilogb (double x)
+static inline RET_TYPE
+IMPL_NAME (double x)
 {
   int x_cond;
   asm volatile ("fclass.d \t%0, %1" : "=f" (x_cond) : "f" (x));
 
   if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
-      return FP_ILOGB0;
+    return RET_INVALID (RET_LOGB0);
   else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
-      return FP_ILOGBNAN;
+    return RET_INVALID (RET_LOGBNAN);
   else
     {
       asm volatile ("fabs.d \t%0, %1" : "=f" (x) : "f" (x));
diff --git a/sysdeps/loongarch/fpu/w_ilogbf-impl.h b/sysdeps/loongarch/fpu/w_ilogbf-impl.h
new file mode 100644
index 0000000..9cb4172
--- /dev/null
+++ b/sysdeps/loongarch/fpu/w_ilogbf-impl.h
@@ -0,0 +1,37 @@
+/* Get integer exponent of a floating-point value.  LoongArch version.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <fpu_control.h>
+
+static inline RET_TYPE
+IMPL_NAME (float x)
+{
+  int x_cond;
+  asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x));
+
+  if (__glibc_unlikely (x_cond & _FCLASS_ZERO))
+    return RET_INVALID (RET_LOGB0);
+  else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF)))
+    return RET_INVALID (RET_LOGBNAN);
+  else
+    {
+      asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x));
+      asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x));
+      return x;
+    }
+}
diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure
index 0d1e9ed..6726ab8 100644
--- a/sysdeps/loongarch/preconfigure
+++ b/sysdeps/loongarch/preconfigure
@@ -44,6 +44,7 @@ loongarch*)
 
     base_machine=loongarch
     mtls_descriptor=desc
+    mtls_traditional=trad
     ;;
 esac
 
diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac
index df07dbf..5640226 100644
--- a/sysdeps/loongarch/preconfigure.ac
+++ b/sysdeps/loongarch/preconfigure.ac
@@ -42,6 +42,7 @@ loongarch*)
 
     base_machine=loongarch
     mtls_descriptor=desc
+    mtls_traditional=trad
     ;;
 esac
 
diff --git a/sysdeps/m68k/m680x0/fpu/math_err.c b/sysdeps/m68k/m680x0/fpu/math_err.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/m68k/m680x0/fpu/math_err.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/m68k/m680x0/w_ilogb.c b/sysdeps/m68k/m680x0/w_ilogb.c
new file mode 100644
index 0000000..9c26217
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_ilogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_ilogbf.c b/sysdeps/m68k/m680x0/w_ilogbf.c
new file mode 100644
index 0000000..047ad4b
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_ilogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_llogb.c b/sysdeps/m68k/m680x0/w_llogb.c
new file mode 100644
index 0000000..5e8891a
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_llogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/m68k/m680x0/w_llogbf.c b/sysdeps/m68k/m680x0/w_llogbf.c
new file mode 100644
index 0000000..edb7e9a
--- /dev/null
+++ b/sysdeps/m68k/m680x0/w_llogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
index 994de00..32bba61 100644
--- a/sysdeps/mach/hurd/Makefile
+++ b/sysdeps/mach/hurd/Makefile
@@ -311,9 +311,6 @@ endif
 ifeq ($(subdir),htl)
 tests-unsupported += tst-basic7
 endif
-ifeq ($(subdir),io)
-tests-unsupported += test-lfs
-endif
 ifeq ($(subdir),libio)
 tests-unsupported += tst-asprintf-null
 endif
@@ -323,6 +320,7 @@ tests-unsupported += tst-malloc-thread-fail-malloc-check
 tests-unsupported += tst-malloc-thread-fail-mcheck
 tests-unsupported += tst-malloc-thread-fail-malloc-hugetlb1
 tests-unsupported += tst-malloc-thread-fail-malloc-hugetlb2
+tests-unsupported += tst-malloc-thread-fail-malloc-largetcache
 tests-unsupported += tst-dynarray-fail
 endif
 ifeq ($(subdir),misc)
@@ -339,7 +337,8 @@ ifeq ($(subdir),stdlib)
 tests-unsupported += test-bz22786 tst-strtod-overflow
 # pthread_cleanup_combined_push/pthread_cleanup_combined_pop requires cleanup
 # support (BZ 32058).
-test-xfail-tst-qsortx7 = yes
+test-xfail-tst-qsort7-mem = yes
+test-xfail-tst-qsortx7-mem = yes
 endif
 ifeq ($(subdir),timezone)
 tests-unsupported += tst-tzset
diff --git a/sysdeps/mach/hurd/bits/ioctls.h b/sysdeps/mach/hurd/bits/ioctls.h
index f01316d..faf1373 100644
--- a/sysdeps/mach/hurd/bits/ioctls.h
+++ b/sysdeps/mach/hurd/bits/ioctls.h
@@ -324,15 +324,8 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 };
    From 4.4 <sys/ioctl_compat.h>.  */
 
 #ifdef __USE_MISC
-#ifdef USE_OLD_TTY
-# undef  TIOCGETD
-# define TIOCGETD	_IOR('t', 0, int)	/* get line discipline */
-# undef  TIOCSETD
-# define TIOCSETD	_IOW('t', 1, int)	/* set line discipline */
-#else
-# define OTIOCGETD	_IOR('t', 0, int)	/* get line discipline */
-# define OTIOCSETD	_IOW('t', 1, int)	/* set line discipline */
-#endif
+#define OTIOCGETD	_IOR('t', 0, int)	/* get line discipline */
+#define OTIOCSETD	_IOW('t', 1, int)	/* set line discipline */
 #define	TIOCHPCL	_IO('t', 2)		/* hang up on last close */
 #define	TIOCGETP	_IOR('t', 8,struct sgttyb)/* get parameters -- gtty */
 #define	TIOCSETP	_IOW('t', 9,struct sgttyb)/* set parameters -- stty */
@@ -411,26 +404,6 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 };
 #define	OTTYDISC	0
 #define	NETLDISC	1
 #define	NTTYDISC	2
-
-/* From 4.4 <sys/ttydev.h>.   */
-#ifdef USE_OLD_TTY
-# define B0	0
-# define B50	1
-# define B75	2
-# define B110	3
-# define B134	4
-# define B150	5
-# define B200	6
-# define B300	7
-# define B600	8
-# define B1200	9
-# define B1800	10
-# define B2400	11
-# define B4800	12
-# define B9600	13
-# define EXTA	14
-# define EXTB	15
-#endif /* USE_OLD_TTY */
 #endif
 
 #endif /* bits/ioctls.h */
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/mach/hurd/getrandom-internal.h
index c9d2f4e..8bd718b 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S
+++ b/sysdeps/mach/hurd/getrandom-internal.h
@@ -1,5 +1,5 @@
-/* Optimized memchr implementation for POWER10/PPC64.
-   Copyright (C) 2016-2025 Free Software Foundation, Inc.
+/* Internal definitions for Hurd getrandom implementation.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,15 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define MEMCHR __memchr_power10
+#ifndef _GETRANDOM_INTERNAL_H
+#define _GETRANDOM_INTERNAL_H
 
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-#undef weak_alias
-#define weak_alias(name,alias)
+extern void __mach_init (void);
+
+static inline void __getrandom_early_init (_Bool initial)
+{
+  /* getrandom needs RPCs for time etc.  */
+  __mach_init ();
+}
 
-#include <sysdeps/powerpc/powerpc64/le/power10/memchr.S>
 #endif
diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist
index 3e183f5..aac3cb3 100644
--- a/sysdeps/mach/hurd/i386/libc.abilist
+++ b/sysdeps/mach/hurd/i386/libc.abilist
@@ -28,6 +28,8 @@ GLIBC_2.11 mkostemps F
 GLIBC_2.11 mkostemps64 F
 GLIBC_2.11 mkstemps F
 GLIBC_2.11 mkstemps64 F
+GLIBC_2.12 __pthread_get_cleanup_stack F
+GLIBC_2.12 __pthread_key_create F
 GLIBC_2.12 __pthread_self F
 GLIBC_2.12 pthread_attr_destroy F
 GLIBC_2.12 pthread_attr_getdetachstate F
@@ -70,6 +72,9 @@ GLIBC_2.12 pthread_condattr_setclock F
 GLIBC_2.12 pthread_condattr_setpshared F
 GLIBC_2.12 pthread_equal F
 GLIBC_2.12 pthread_getschedparam F
+GLIBC_2.12 pthread_getspecific F
+GLIBC_2.12 pthread_key_create F
+GLIBC_2.12 pthread_key_delete F
 GLIBC_2.12 pthread_mutex_destroy F
 GLIBC_2.12 pthread_mutex_getprioceiling F
 GLIBC_2.12 pthread_mutex_init F
@@ -106,6 +111,7 @@ GLIBC_2.12 pthread_self F
 GLIBC_2.12 pthread_setcancelstate F
 GLIBC_2.12 pthread_setcanceltype F
 GLIBC_2.12 pthread_setschedparam F
+GLIBC_2.12 pthread_setspecific F
 GLIBC_2.12 pthread_sigmask F
 GLIBC_2.13 __fentry__ F
 GLIBC_2.14 syncfs F
@@ -2586,6 +2592,11 @@ GLIBC_2.41 pthread_mutexattr_settype F
 GLIBC_2.41 pthread_sigmask F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetobaud F
 GLIBC_2.42 pthread_barrier_destroy F
 GLIBC_2.42 pthread_barrier_init F
 GLIBC_2.42 pthread_barrier_wait F
@@ -2593,6 +2604,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F
 GLIBC_2.42 pthread_barrierattr_getpshared F
 GLIBC_2.42 pthread_barrierattr_init F
 GLIBC_2.42 pthread_barrierattr_setpshared F
+GLIBC_2.42 pthread_getspecific F
+GLIBC_2.42 pthread_key_create F
+GLIBC_2.42 pthread_key_delete F
 GLIBC_2.42 pthread_mutex_consistent F
 GLIBC_2.42 pthread_mutex_consistent_np F
 GLIBC_2.42 pthread_mutex_getprioceiling F
@@ -2614,6 +2628,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F
 GLIBC_2.42 pthread_rwlockattr_getpshared F
 GLIBC_2.42 pthread_rwlockattr_init F
 GLIBC_2.42 pthread_rwlockattr_setpshared F
+GLIBC_2.42 pthread_setspecific F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist
index 6948b42..47d215f 100644
--- a/sysdeps/mach/hurd/i386/libm.abilist
+++ b/sysdeps/mach/hurd/i386/libm.abilist
@@ -1301,6 +1301,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/mach/hurd/i386/libpthread.abilist b/sysdeps/mach/hurd/i386/libpthread.abilist
index b067d37..9d2c4cd 100644
--- a/sysdeps/mach/hurd/i386/libpthread.abilist
+++ b/sysdeps/mach/hurd/i386/libpthread.abilist
@@ -1,7 +1,5 @@
 GLIBC_2.12 __mutex_lock_solid F
 GLIBC_2.12 __mutex_unlock_solid F
-GLIBC_2.12 __pthread_get_cleanup_stack F
-GLIBC_2.12 __pthread_key_create F
 GLIBC_2.12 __pthread_kill F
 GLIBC_2.12 __pthread_mutex_transfer_np F
 GLIBC_2.12 __pthread_spin_destroy F
@@ -29,15 +27,11 @@ GLIBC_2.12 pthread_exit F
 GLIBC_2.12 pthread_getattr_np F
 GLIBC_2.12 pthread_getconcurrency F
 GLIBC_2.12 pthread_getcpuclockid F
-GLIBC_2.12 pthread_getspecific F
 GLIBC_2.12 pthread_join F
-GLIBC_2.12 pthread_key_create F
-GLIBC_2.12 pthread_key_delete F
 GLIBC_2.12 pthread_kill F
 GLIBC_2.12 pthread_mutex_transfer_np F
 GLIBC_2.12 pthread_setconcurrency F
 GLIBC_2.12 pthread_setschedprio F
-GLIBC_2.12 pthread_setspecific F
 GLIBC_2.12 pthread_spin_destroy F
 GLIBC_2.12 pthread_spin_init F
 GLIBC_2.12 pthread_spin_lock F
diff --git a/sysdeps/mach/hurd/x86_64/libc.abilist b/sysdeps/mach/hurd/x86_64/libc.abilist
index 688ee26..8f9d6aa 100644
--- a/sysdeps/mach/hurd/x86_64/libc.abilist
+++ b/sysdeps/mach/hurd/x86_64/libc.abilist
@@ -392,6 +392,7 @@ GLIBC_2.38 __profile_frequency F
 GLIBC_2.38 __progname D 0x8
 GLIBC_2.38 __progname_full D 0x8
 GLIBC_2.38 __pthread_get_cleanup_stack F
+GLIBC_2.38 __pthread_key_create F
 GLIBC_2.38 __pthread_self F
 GLIBC_2.38 __ptsname_r_chk F
 GLIBC_2.38 __pwrite64 F
@@ -1554,6 +1555,9 @@ GLIBC_2.38 pthread_condattr_setpshared F
 GLIBC_2.38 pthread_equal F
 GLIBC_2.38 pthread_exit F
 GLIBC_2.38 pthread_getschedparam F
+GLIBC_2.38 pthread_getspecific F
+GLIBC_2.38 pthread_key_create F
+GLIBC_2.38 pthread_key_delete F
 GLIBC_2.38 pthread_mutex_clocklock F
 GLIBC_2.38 pthread_mutex_consistent F
 GLIBC_2.38 pthread_mutex_consistent_np F
@@ -1599,6 +1603,7 @@ GLIBC_2.38 pthread_self F
 GLIBC_2.38 pthread_setcancelstate F
 GLIBC_2.38 pthread_setcanceltype F
 GLIBC_2.38 pthread_setschedparam F
+GLIBC_2.38 pthread_setspecific F
 GLIBC_2.38 pthread_sigmask F
 GLIBC_2.38 ptrace F
 GLIBC_2.38 ptsname F
@@ -2269,6 +2274,11 @@ GLIBC_2.41 pthread_mutexattr_settype F
 GLIBC_2.41 pthread_sigmask F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetobaud F
 GLIBC_2.42 pthread_barrier_destroy F
 GLIBC_2.42 pthread_barrier_init F
 GLIBC_2.42 pthread_barrier_wait F
@@ -2276,6 +2286,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F
 GLIBC_2.42 pthread_barrierattr_getpshared F
 GLIBC_2.42 pthread_barrierattr_init F
 GLIBC_2.42 pthread_barrierattr_setpshared F
+GLIBC_2.42 pthread_getspecific F
+GLIBC_2.42 pthread_key_create F
+GLIBC_2.42 pthread_key_delete F
 GLIBC_2.42 pthread_mutex_consistent F
 GLIBC_2.42 pthread_mutex_consistent_np F
 GLIBC_2.42 pthread_mutex_getprioceiling F
@@ -2297,6 +2310,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F
 GLIBC_2.42 pthread_rwlockattr_getpshared F
 GLIBC_2.42 pthread_rwlockattr_init F
 GLIBC_2.42 pthread_rwlockattr_setpshared F
+GLIBC_2.42 pthread_setspecific F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/mach/hurd/x86_64/libm.abilist b/sysdeps/mach/hurd/x86_64/libm.abilist
index 4810dfb..52c9d56 100644
--- a/sysdeps/mach/hurd/x86_64/libm.abilist
+++ b/sysdeps/mach/hurd/x86_64/libm.abilist
@@ -1158,6 +1158,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/mach/hurd/x86_64/libpthread.abilist b/sysdeps/mach/hurd/x86_64/libpthread.abilist
index 6b8acec..81d355a 100644
--- a/sysdeps/mach/hurd/x86_64/libpthread.abilist
+++ b/sysdeps/mach/hurd/x86_64/libpthread.abilist
@@ -5,8 +5,6 @@ GLIBC_2.38 __errno_location F
 GLIBC_2.38 __h_errno_location F
 GLIBC_2.38 __mutex_lock_solid F
 GLIBC_2.38 __mutex_unlock_solid F
-GLIBC_2.38 __pthread_get_cleanup_stack F
-GLIBC_2.38 __pthread_key_create F
 GLIBC_2.38 __pthread_kill F
 GLIBC_2.38 __pthread_mutex_transfer_np F
 GLIBC_2.38 __pthread_spin_destroy F
@@ -47,17 +45,13 @@ GLIBC_2.38 pthread_exit F
 GLIBC_2.38 pthread_getattr_np F
 GLIBC_2.38 pthread_getconcurrency F
 GLIBC_2.38 pthread_getcpuclockid F
-GLIBC_2.38 pthread_getspecific F
 GLIBC_2.38 pthread_hurd_cond_timedwait_np F
 GLIBC_2.38 pthread_hurd_cond_wait_np F
 GLIBC_2.38 pthread_join F
-GLIBC_2.38 pthread_key_create F
-GLIBC_2.38 pthread_key_delete F
 GLIBC_2.38 pthread_kill F
 GLIBC_2.38 pthread_mutex_transfer_np F
 GLIBC_2.38 pthread_setconcurrency F
 GLIBC_2.38 pthread_setschedprio F
-GLIBC_2.38 pthread_setspecific F
 GLIBC_2.38 pthread_spin_destroy F
 GLIBC_2.38 pthread_spin_init F
 GLIBC_2.38 pthread_spin_lock F
diff --git a/sysdeps/mach/sysdep.h b/sysdeps/mach/sysdep.h
index 8293c66..581bdcd 100644
--- a/sysdeps/mach/sysdep.h
+++ b/sysdeps/mach/sysdep.h
@@ -20,6 +20,11 @@
 /* Get the Mach definitions of ENTRY and kernel_trap.  */
 #include <mach/machine/syscall_sw.h>
 
+/* This macro is defined in Mach system headers, but string functions use it
+   with different definitions depending on whether being compiled for
+   wide-characters or not.  */
+#undef P2ALIGN
+
 /* The Mach definitions assume underscores should be prepended to
    symbol names.  Redefine them to do so only when appropriate.  */
 #undef EXT
diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c
index d90cc6c..6f75197 100644
--- a/sysdeps/posix/libc_fatal.c
+++ b/sysdeps/posix/libc_fatal.c
@@ -16,23 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <atomic.h>
-#include <errno.h>
-#include <fcntl.h>
+#include <assert.h>
 #include <ldsodefs.h>
-#include <libc-pointer-arith.h>
-#include <paths.h>
+#include <setvmaname.h>
 #include <stdarg.h>
-#include <stdbool.h>
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sysdep.h>
-#include <unistd.h>
-#include <sys/mman.h>
 #include <sys/uio.h>
-#include <not-cancel.h>
-#include <setvmaname.h>
+#include <unistd.h>
 
 #ifdef FATAL_PREPARE_INCLUDE
 #include FATAL_PREPARE_INCLUDE
@@ -47,6 +37,10 @@ writev_for_fatal (int fd, const struct iovec *iov, size_t niov, size_t total)
 }
 #endif
 
+/* At most a substring before each conversion specification and the
+   trailing substring (the plus one).  */
+#define IOVEC_MAX (LIBC_MESSAGE_MAX_ARGS * 2 + 1)
+
 /* Abort with an error message.  */
 void
 __libc_message_impl (const char *fmt, ...)
@@ -61,7 +55,7 @@ __libc_message_impl (const char *fmt, ...)
   if (fd == -1)
     fd = STDERR_FILENO;
 
-  struct iovec iov[LIBC_MESSAGE_MAX_ARGS * 2 - 1];
+  struct iovec iov[IOVEC_MAX];
   int iovcnt = 0;
   ssize_t total = 0;
 
@@ -99,6 +93,16 @@ __libc_message_impl (const char *fmt, ...)
       iov[iovcnt].iov_len = len;
       total += len;
       iovcnt++;
+
+      if (__glibc_unlikely (iovcnt > IOVEC_MAX))
+	{
+	  len = IOVEC_MAX_ERR_MSG_LEN;
+	  iov[0].iov_base = (char *) IOVEC_MAX_ERR_MSG;
+	  iov[0].iov_len = len;
+	  total = len;
+	  iovcnt = 1;
+	  break;
+	}
     }
   va_end (ap);
 
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
index 5e6cb07..5cdb64f 100644
--- a/sysdeps/powerpc/Makefile
+++ b/sysdeps/powerpc/Makefile
@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c
 $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
 
 $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
+
+# The test checks if the __tls_get_addr does not clobber caller-saved
+# register, so disable the powerpc specific optimization to force a
+# __tls_get_addr call.
+LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize
 endif
 
 ifneq (no,$(multi-arch))
diff --git a/sysdeps/powerpc/fpu/math-use-builtins-trunc.h b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h
new file mode 100644
index 0000000..3e6a55d
--- /dev/null
+++ b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h
@@ -0,0 +1,9 @@
+#ifdef _ARCH_PWR5X
+# define USE_TRUNCF_BUILTIN 1
+# define USE_TRUNC_BUILTIN 1
+#else
+# define USE_TRUNCF_BUILTIN 0
+# define USE_TRUNC_BUILTIN 0
+#endif
+#define USE_TRUNCL_BUILTIN 0
+#define USE_TRUNCF128_BUILTIN 0
diff --git a/sysdeps/powerpc/fpu/s_modf.c b/sysdeps/powerpc/fpu/s_modf.c
deleted file mode 100644
index 831072b..0000000
--- a/sysdeps/powerpc/fpu/s_modf.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, see <https://www.gnu.org/licenses/>.  */
-
-/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
-   generic implementation faster.  Also disables for old ISAs that do not
-   have ceil/floor instructions.  */
-#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X)
-# include <sysdeps/ieee754/ldbl-opt/s_modf.c>
-#else
-# include <math.h>
-# include <math_ldbl_opt.h>
-# include <libm-alias-double.h>
-
-double
-__modf (double x, double *iptr)
-{
-  if (__builtin_isinf (x))
-    {
-      *iptr = x;
-      return copysign (0.0, x);
-    }
-  else if (__builtin_isnan (x))
-    {
-      *iptr = NAN;
-      return NAN;
-    }
-
-  if (x >= 0.0)
-    {
-      *iptr = floor (x);
-      return copysign (x - *iptr, x);
-    }
-  else
-    {
-      *iptr = ceil (x);
-      return copysign (x - *iptr, x);
-    }
-}
-# ifndef __modf
-libm_alias_double (__modf, modf)
-#  if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0)
-compat_symbol (libc, __modf, modfl, GLIBC_2_0);
-#  endif
-# endif
-#endif
diff --git a/sysdeps/powerpc/fpu/s_modff.c b/sysdeps/powerpc/fpu/s_modff.c
deleted file mode 100644
index 79eeb7b..0000000
--- a/sysdeps/powerpc/fpu/s_modff.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, see <https://www.gnu.org/licenses/>.  */
-
-/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make
-   generic implementation faster.  Also disables for old ISAs that do not
-   have ceil/floor instructions.  */
-#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X)
-# include <sysdeps/ieee754/flt-32/s_modff.c>
-#else
-# include <math.h>
-# include <libm-alias-float.h>
-
-float
-__modff (float x, float *iptr)
-{
-  if (__builtin_isinff (x))
-    {
-      *iptr = x;
-      return copysignf (0.0, x);
-    }
-  else if (__builtin_isnanf (x))
-    {
-      *iptr = NAN;
-      return NAN;
-    }
-
-  if (x >= 0.0)
-    {
-      *iptr = floorf (x);
-      return copysignf (x - *iptr, x);
-    }
-  else
-    {
-      *iptr = ceilf (x);
-      return copysignf (x - *iptr, x);
-    }
-}
-# ifndef __modff
-libm_alias_float (__modf, modf)
-# endif
-#endif
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
index b8315c5..48f3a19 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modf __modf_power5plus
-#include <sysdeps/powerpc/fpu/s_modf.c>
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
index 69591da..15bfa0b 100644
--- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
+++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modff __modff_power5plus
-#include <sysdeps/powerpc/fpu/s_modff.c>
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
index b8315c5..48f3a19 100644
--- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
+++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modf __modf_power5plus
-#include <sysdeps/powerpc/fpu/s_modf.c>
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
index 69591da..15bfa0b 100644
--- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
+++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c
@@ -17,4 +17,4 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define __modff __modff_power5plus
-#include <sysdeps/powerpc/fpu/s_modff.c>
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/powerpc/powerpc64/le/configure b/sysdeps/powerpc/powerpc64/le/configure
index 7092f61..ef17f24 100644
--- a/sysdeps/powerpc/powerpc64/le/configure
+++ b/sysdeps/powerpc/powerpc64/le/configure
@@ -137,75 +137,5 @@ then :
   critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."
 fi
 
-for ac_prog in $OBJCOPY
-do
-  # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-printf %s "checking for $ac_word... " >&6; }
-if test ${ac_cv_prog_OBJCOPY+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e) if test -n "$OBJCOPY"; then
-  ac_cv_prog_OBJCOPY="$OBJCOPY" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  case $as_dir in #(((
-    '') as_dir=./ ;;
-    */) ;;
-    *) as_dir=$as_dir/ ;;
-  esac
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
-    ac_cv_prog_OBJCOPY="$ac_prog"
-    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-fi ;;
-esac
-fi
-OBJCOPY=$ac_cv_prog_OBJCOPY
-if test -n "$OBJCOPY"; then
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJCOPY" >&5
-printf "%s\n" "$OBJCOPY" >&6; }
-else
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
-printf "%s\n" "no" >&6; }
-fi
-
-
-  test -n "$OBJCOPY" && break
-done
-
-if test -z "$OBJCOPY"; then
-  ac_verc_fail=yes
-else
-  # Found it, now check the version.
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking version of $OBJCOPY" >&5
-printf %s "checking version of $OBJCOPY... " >&6; }
-  ac_prog_version=`$OBJCOPY --version 2>&1 | sed -n 's/^.*GNU objcopy.* \([0-9]*\.[0-9.]*\).*$/\1/p'`
-  case $ac_prog_version in
-    '') ac_prog_version="v. ?.??, bad"; ac_verc_fail=yes;;
-    2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*)
-       ac_prog_version="$ac_prog_version, ok"; ac_verc_fail=no;;
-    *) ac_prog_version="$ac_prog_version, bad"; ac_verc_fail=yes;;
-
-  esac
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_prog_version" >&5
-printf "%s\n" "$ac_prog_version" >&6; }
-fi
-if test $ac_verc_fail = yes; then
-  AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le"
-fi
-
-
 test -n "$critic_missing" && as_fn_error $? "*** $critic_missing" "$LINENO" 5
 
diff --git a/sysdeps/powerpc/powerpc64/le/configure.ac b/sysdeps/powerpc/powerpc64/le/configure.ac
index 48d7089..79b3d43 100644
--- a/sysdeps/powerpc/powerpc64/le/configure.ac
+++ b/sysdeps/powerpc/powerpc64/le/configure.ac
@@ -66,11 +66,4 @@ CFLAGS="$save_CFLAGS"])
 AS_IF([test "$libc_cv_compiler_powerpc64le_ldbl128_mabi" = "no"],
       [critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."])
 
-dnl objcopy (binutils) 2.26 or newer required to support the --update-section
-dnl feature for fixing up .gnu.attribute section with IEEE ldbl.
-AC_CHECK_PROG_VER(OBJCOPY, $OBJCOPY, --version,
-		  [GNU objcopy.* \([0-9]*\.[0-9.]*\)],
-		  [2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*],
-		  AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le")
-
 test -n "$critic_missing" && AC_MSG_ERROR([*** $critic_missing])
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c
new file mode 100644
index 0000000..89e7498
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c
@@ -0,0 +1,41 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include "math_config.h"
+
+int
+__ieee754_ilogb (double x)
+{
+  uint64_t ux = asuint64 (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (ex == 0) /* zero or subnormal */
+    {
+      /* Clear sign and exponent */
+      ux <<= 12;
+      if (ux == 0)
+	return FP_ILOGB0;
+      /* subnormal  */
+      return -1023 - stdc_leading_zeros (ux);
+    }
+  if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */
+    return ux << 12 ? FP_ILOGBNAN : INT_MAX;
+  return ex - 1023;
+}
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c
new file mode 100644
index 0000000..1c2a8a5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c
@@ -0,0 +1,41 @@
+/* Get integer exponent of a floating-point value.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdbit.h>
+#include "sysdeps/ieee754/flt-32/math_config.h"
+
+int
+__ieee754_ilogbf (float x)
+{
+  uint32_t ux = asuint (x);
+  int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH;
+  if (ex == 0) /* zero or subnormal */
+    {
+      /* Clear sign and exponent.  */
+      ux <<= 1 + EXPONENT_WIDTH;
+      if (ux == 0)
+	return FP_ILOGB0;
+      /* sbunormal */
+      return -127 - stdc_leading_zeros (ux);
+    }
+  if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */
+    return ux << (1 + EXPONENT_WIDTH) ? FP_ILOGBNAN : INT_MAX;
+  return ex - 127;
+}
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c
new file mode 100644
index 0000000..9c26217
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c
new file mode 100644
index 0000000..047ad4b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_ilogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c
new file mode 100644
index 0000000..5e8891a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-double.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c
new file mode 100644
index 0000000..edb7e9a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c
@@ -0,0 +1,2 @@
+#include <math-type-macros-float.h>
+#include <w_llogb_template.c>
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S
deleted file mode 100644
index 96ad5a2..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S
+++ /dev/null
@@ -1,315 +0,0 @@
-/* Optimized memchr implementation for POWER10 LE.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-# ifndef MEMCHR
-#  define MEMCHR __memchr
-# endif
-# define M_VREG_ZERO v20
-# define M_OFF_START_LOOP 256
-# define MEMCHR_SUBTRACT_VECTORS \
-	vsububm   v4,v4,v18;	    \
-	vsububm   v5,v5,v18;	    \
-	vsububm   v6,v6,v18;	    \
-	vsububm   v7,v7,v18;
-# define M_TAIL(vreg,increment)	   \
-	vctzlsbb  r4,vreg;	   \
-	cmpld     r5,r4;	   \
-	ble       L(null);	   \
-	addi	  r4,r4,increment; \
-	add	  r3,r6,r4;	   \
-	blr
-
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define M_VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))	 \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define M_LXVP(xtp,dq,ra)		   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
-#define CHECK16B(vreg,offset,addr,label) \
-	lxv	  vreg+32,offset(addr);	\
-	vcmpequb. vreg,vreg,v18;	\
-	bne	  cr6,L(label);		\
-	cmpldi	  r5,16;		\
-	ble	  L(null);		\
-	addi	  r5,r5,-16;
-
-/* Load 4 quadwords, merge into one VR for speed and check for NULLs.  r6 has #
-   of bytes already checked.  */
-#define CHECK64B(offset,addr,label)	    \
-	M_LXVP(v4+32,offset,addr);	    \
-	M_LXVP(v6+32,offset+32,addr);	    \
-	MEMCHR_SUBTRACT_VECTORS;	    \
-	vminub	  v14,v4,v5;		    \
-	vminub	  v15,v6,v7;		    \
-	vminub	  v16,v14,v15;		    \
-	vcmpequb. v0,v16,M_VREG_ZERO;	    \
-	beq	  cr6,$+12;		    \
-	li	  r7,offset;		    \
-	b     	  L(label);          	    \
-	cmpldi	  r5,64;		    \
-	ble	  L(null);		    \
-	addi	  r5,r5,-64
-
-/* Implements the function
-   void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]).  */
-
-	.machine power9
-
-ENTRY_TOCLESS (MEMCHR)
-	CALL_MCOUNT 3
-
-	cmpldi	r5,0
-	beq	L(null)
-	mr	r0,r5
-	xori	r6,r4,0xff
-
-	mtvsrd	v18+32,r4	/* matching char in v18  */
-	mtvsrd	v19+32,r6	/* non matching char in v19  */
-
-	vspltb	v18,v18,7	/* replicate  */
-	vspltb	v19,v19,7	/* replicate  */
-	vspltisb  M_VREG_ZERO,0
-
-	/* Next 16B-aligned address. Prepare address for L(aligned).  */
-	addi	  r6,r3,16
-	clrrdi	  r6,r6,4
-
-	/* Align data and fill bytes not loaded with non matching char.	 */
-	lvx	  v0,0,r3
-	lvsr	  v1,0,r3
-	vperm	  v0,v19,v0,v1
-
-	vcmpequb. v6,v0,v18
-	bne	  cr6,L(found)
-	sub	  r4,r6,r3
-	cmpld	  r5,r4
-	ble	  L(null)
-	sub	  r5,r5,r4
-
-	/* Test up to OFF_START_LOOP-16 bytes in 16B chunks.  The main loop is
-	   optimized for longer strings, so checking the first bytes in 16B
-	   chunks benefits a lot small strings.  */
-	.p2align 5
-L(aligned):
-	cmpldi	r5,0
-	beq     L(null)
-
-	CHECK16B(v0,0,r6,tail1)
-	CHECK16B(v1,16,r6,tail2)
-	CHECK16B(v2,32,r6,tail3)
-	CHECK16B(v3,48,r6,tail4)
-	CHECK16B(v4,64,r6,tail5)
-	CHECK16B(v5,80,r6,tail6)
-	CHECK16B(v6,96,r6,tail7)
-	CHECK16B(v7,112,r6,tail8)
-	CHECK16B(v8,128,r6,tail9)
-	CHECK16B(v9,144,r6,tail10)
-	CHECK16B(v10,160,r6,tail11)
-	CHECK16B(v0,176,r6,tail12)
-	CHECK16B(v1,192,r6,tail13)
-	CHECK16B(v2,208,r6,tail14)
-	CHECK16B(v3,224,r6,tail15)
-
-	cmpdi	cr5,r4,0	/* Check if c == 0.  This will be useful to
-				   choose how we will perform the main loop.  */
-
-	/* Prepare address for the loop.  */
-	addi	  r4,r3,M_OFF_START_LOOP
-	clrrdi	  r4,r4,6
-	sub	  r6,r4,r3
-	sub	  r5,r0,r6
-	addi	  r6,r4,128
-
-	/* If c == 0, use the loop without the vsububm.  */
-	beq	cr5,L(loop)
-
-	/* This is very similar to the block after L(loop), the difference is
-	   that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract
-	   each byte loaded by the char we are looking for, this way we can keep
-	   using vminub to merge the results and checking for nulls.  */
-	.p2align 5
-L(memchr_loop):
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	r6,r6,256
-
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	r6,r6,256
-
-	b	L(memchr_loop)
-	/* Switch to a more aggressive approach checking 64B each time.  Use 2
-	   pointers 128B apart and unroll the loop once to make the pointer
-	   updates and usages separated enough to avoid stalls waiting for
-	   address calculation.  */
-	.p2align 5
-L(loop):
-#undef MEMCHR_SUBTRACT_VECTORS
-#define MEMCHR_SUBTRACT_VECTORS /* nothing */
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi	  r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi	  r6,r6,256
-
-	CHECK64B(0,r4,pre_tail_64b)
-	CHECK64B(64,r4,pre_tail_64b)
-	addi      r4,r4,256
-
-	CHECK64B(0,r6,tail_64b)
-	CHECK64B(64,r6,tail_64b)
-	addi      r6,r6,256
-
-	b	  L(loop)
-
-	.p2align  5
-L(pre_tail_64b):
-	mr	r6,r4
-L(tail_64b):
-	/* OK, we found a null byte.  Let's look for it in the current 64-byte
-	   block and mark it in its corresponding VR.  lxvp vx,0(ry) puts the
-	   low 16B bytes into vx+1, and the high into vx, so the order here is
-	   v5, v4, v7, v6.  */
-	vcmpequb  v1,v5,M_VREG_ZERO
-	vcmpequb  v2,v4,M_VREG_ZERO
-	vcmpequb  v3,v7,M_VREG_ZERO
-	vcmpequb  v4,v6,M_VREG_ZERO
-
-	/* Take into account the other 64B blocks we had already checked.  */
-	add	r6,r6,r7
-	/* Extract first bit of each byte.  */
-	M_VEXTRACTBM(r8,v1)
-	M_VEXTRACTBM(r9,v2)
-	M_VEXTRACTBM(r10,v3)
-	M_VEXTRACTBM(r11,v4)
-
-	/* Shift each value into their corresponding position.  */
-	sldi	  r9,r9,16
-	sldi	  r10,r10,32
-	sldi	  r11,r11,48
-
-	/* Merge the results.  */
-	or	  r8,r8,r9
-	or	  r9,r10,r11
-	or	  r11,r9,r8
-
-	cnttzd	  r0,r11	  /* Count trailing zeros before the match.  */
-	cmpld     r5,r0
-	ble	  L(null)
-	add	  r3,r6,r0	  /* Compute final address.  */
-	blr
-
-	.p2align  5
-L(tail1):
-	M_TAIL(v0,0)
-
-	.p2align  5
-L(tail2):
-	M_TAIL(v1,16)
-
-	.p2align  5
-L(tail3):
-	M_TAIL(v2,32)
-
-	.p2align  5
-L(tail4):
-	M_TAIL(v3,48)
-
-	.p2align  5
-L(tail5):
-	M_TAIL(v4,64)
-
-	.p2align  5
-L(tail6):
-	M_TAIL(v5,80)
-
-	.p2align  5
-L(tail7):
-	M_TAIL(v6,96)
-
-	.p2align  5
-L(tail8):
-	M_TAIL(v7,112)
-
-	.p2align  5
-L(tail9):
-	M_TAIL(v8,128)
-
-	.p2align  5
-L(tail10):
-	M_TAIL(v9,144)
-
-	.p2align  5
-L(tail11):
-	M_TAIL(v10,160)
-
-	.p2align  5
-L(tail12):
-	M_TAIL(v0,176)
-
-	.p2align  5
-L(tail13):
-	M_TAIL(v1,192)
-
-	.p2align  5
-L(tail14):
-	M_TAIL(v2,208)
-
-	.p2align  5
-L(tail15):
-	M_TAIL(v3,224)
-
-	.p2align  5
-L(found):
-	vctzlsbb  r7,v6
-	cmpld     r5,r7
-	ble       L(null)
-	add       r3,r3,r7
-	blr
-
-	.p2align  5
-L(null):
-	li	r3,0
-	blr
-
-END (MEMCHR)
-
-weak_alias (__memchr, memchr)
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
index f32dc38..734bf5f 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
@@ -18,26 +18,10 @@
 
 #include <sysdep.h>
 
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))  \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define LXVP(xtp,dq,ra)			   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
 /* Compare 32 bytes.  */
 #define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
-	LXVP(32+vr1,offset,r3);		\
-	LXVP(32+vr2,offset,r4);		\
+	lxvp      32+vr1,offset(r3);	\
+	lxvp      32+vr2,offset(r4);	\
 	vcmpneb.  v5,vr1+1,vr2+1;	\
 	bne	  cr6,L(tail_2);	\
 	vcmpneb.  v4,vr1,vr2;		\
@@ -56,7 +40,7 @@
 #ifndef MEMCMP
 # define MEMCMP memcmp
 #endif
-	.machine  power9
+	.machine  power10
 ENTRY_TOCLESS (MEMCMP, 4)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
index ed7a9f5..f2a503e 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S
@@ -26,7 +26,7 @@
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
 	   Returns 'dst'.  */
 
-	.machine power9
+	.machine power10
 ENTRY_TOCLESS (MEMCPY, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memmove.S b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
index 47c2ac3..4aaa1ef 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memmove.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
@@ -28,7 +28,7 @@
 #ifndef MEMMOVE
 # define MEMMOVE memmove
 #endif
-	.machine power9
+	.machine power10
 ENTRY_TOCLESS (MEMMOVE, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S
index 29d5114..f9442e7 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memset.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S
@@ -25,7 +25,7 @@
 # define MEMSET memset
 #endif
 
-	.machine  power9
+	.machine  power10
 ENTRY_TOCLESS (MEMSET, 5)
 	CALL_MCOUNT 3
 
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
deleted file mode 100644
index fffa1ee..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Optimized strcmp implementation for PowerPC64/POWER10.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-#include <sysdep.h>
-
-#ifndef STRCMP
-# define STRCMP strcmp
-#endif
-
-/* Implements the function
-   int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]).  */
-
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-
-#define LXVP(xtp,dq,ra)		     \
-	.long(((6)<<(32-6))	     \
-	| ((((xtp)-32)>>1)<<(32-10)) \
-	| ((1)<<(32-11))	     \
-	| ((ra)<<(32-16))	     \
-	| dq)
-
-#define COMPARE_16(vreg1,vreg2,offset)  \
-	lxv       vreg1+32,offset(r3);  \
-	lxv       vreg2+32,offset(r4);	\
-	vcmpnezb. v7,vreg1,vreg2;	\
-	bne       cr6,L(different);     \
-
-#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
-	LXVP(vreg1+32,offset,r3);                    \
-	LXVP(vreg2+32,offset,r4);                    \
-	vcmpnezb. v7,vreg1+1,vreg2+1;                \
-	bne	  cr6,L(label1);                     \
-	vcmpnezb. v7,vreg1,vreg2;                    \
-	bne	  cr6,L(label2);                     \
-
-#define TAIL(vreg1,vreg2)     \
-	vctzlsbb r6,v7;	      \
-	vextubrx r5,r6,vreg1; \
-	vextubrx r4,r6,vreg2; \
-	subf	 r3,r4,r5;    \
-	blr;                  \
-
-#define CHECK_N_BYTES(reg1,reg2,len_reg) \
-	sldi	  r0,len_reg,56;         \
-	lxvl	  32+v4,reg1,r0;         \
-	lxvl	  32+v5,reg2,r0;         \
-	add	  reg1,reg1,len_reg;     \
-	add	  reg2,reg2,len_reg;     \
-	vcmpnezb  v7,v4,v5;              \
-	vctzlsbb  r6,v7;                 \
-	cmpld	  cr7,r6,len_reg;        \
-	blt	  cr7,L(different);      \
-
-	/* TODO: change this to .machine power10 when the minimum required
-	binutils allows it.  */
-
-	.machine  power9
-ENTRY_TOCLESS (STRCMP, 4)
-	andi.	r7,r3,4095
-	andi.	r8,r4,4095
-	cmpldi	cr0,r7,4096-16
-	cmpldi	cr1,r8,4096-16
-	bgt	cr0,L(crosses)
-	bgt	cr1,L(crosses)
-	COMPARE_16(v4,v5,0)
-
-L(crosses):
-	andi.	r7,r3,15
-	subfic	r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
-	andi.	r9,r4,15
-	subfic	r5,r9,16	/* r5(nalign2) = 16 - (str2 & 15).  */
-	cmpld	cr7,r7,r5
-	beq	cr7,L(same_aligned)
-	blt	cr7,L(nalign1_min)
-
-	/* nalign2 is minimum and s2 pointer is aligned.  */
-	CHECK_N_BYTES(r3,r4,r5)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.	r10,r3,63	/* Determine offset into 64B hunk.  */
-	andi.	r8,r3,15        /* The offset into the 16B hunk.  */
-	neg	r7,r3
-	andi.	r9,r7,15	/* Number of bytes after a 16B cross.  */
-	rlwinm.	r7,r7,26,0x3F	/* ((r3-4096))>>6&63.  */
-	beq	L(compare_64_pagecross)
-	mtctr	r7
-	b	L(compare_64B_unaligned)
-
-	/* nalign1 is minimum and s1 pointer is aligned.  */
-L(nalign1_min):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.	r10,r4,63	/* Determine offset into 64B hunk.  */
-	andi.	r8,r4,15	/* The offset into the 16B hunk.  */
-	neg	r7,r4
-	andi.	r9,r7,15	/* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F	/* ((r4-4096))>>6&63.  */
-	beq	L(compare_64_pagecross)
-	mtctr	r7
-
-	.p2align 5
-L(compare_64B_unaligned):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	bdnz	L(compare_64B_unaligned)
-
-	/* Cross the page boundary of s2, carefully. Only for first
-	iteration we have to get the count of 64B blocks to be checked.
-	From second iteration and beyond, loop counter is always 63.  */
-L(compare_64_pagecross):
-	li	r11, 63
-	mtctr	r11
-	cmpldi	r10,16
-	ble	L(cross_4)
-	cmpldi	r10,32
-	ble	L(cross_3)
-	cmpldi	r10,48
-	ble	L(cross_2)
-L(cross_1):
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi	r3,r3,48
-	addi	r4,r4,48
-	b	L(compare_64B_unaligned)
-L(cross_2):
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r3,r3,32
-	addi	r4,r4,32
-	b	L(compare_64B_unaligned)
-L(cross_3):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r3,r3,32
-	addi	r4,r4,32
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-	b	L(compare_64B_unaligned)
-L(cross_4):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi	r3,r3,48
-	addi	r4,r4,48
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	b	L(compare_64B_unaligned)
-
-L(same_aligned):
-	CHECK_N_BYTES(r3,r4,r7)
-        /* Align s1 to 32B and adjust s2 address.
-	   Use lxvp only if both s1 and s2 are 32B aligned.  */
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-
-	clrldi	r6,r3,59
-	subfic	r5,r6,32
-	add	r3,r3,r5
-	add	r4,r4,r5
-	andi.	r5,r4,0x1F
-	beq	cr0,L(32B_aligned_loop)
-
-	.p2align 5
-L(16B_aligned_loop):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	b	L(16B_aligned_loop)
-
-	/* Calculate and return the difference.  */
-L(different):
-	TAIL(v4,v5)
-
-	.p2align 5
-L(32B_aligned_loop):
-	COMPARE_32(v14,v16,0,tail1,tail2)
-	COMPARE_32(v18,v20,32,tail3,tail4)
-	COMPARE_32(v22,v24,64,tail5,tail6)
-	COMPARE_32(v26,v28,96,tail7,tail8)
-	addi	r3,r3,128
-	addi	r4,r4,128
-	b	L(32B_aligned_loop)
-
-L(tail1): TAIL(v15,v17)
-L(tail2): TAIL(v14,v16)
-L(tail3): TAIL(v19,v21)
-L(tail4): TAIL(v18,v20)
-L(tail5): TAIL(v23,v25)
-L(tail6): TAIL(v22,v24)
-L(tail7): TAIL(v27,v29)
-L(tail8): TAIL(v26,v28)
-
-END (STRCMP)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
index 4985a92..ec644d5 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
@@ -63,22 +63,6 @@
 	blr
 #endif /* USE_AS_RAWMEMCHR */
 
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
-   >= 2.35.  This is used to keep compatibility with older versions.  */
-#define VEXTRACTBM(rt,vrb)	 \
-	.long(((4)<<(32-6))	 \
-	      | ((rt)<<(32-11))	 \
-	      | ((8)<<(32-16))	 \
-	      | ((vrb)<<(32-21)) \
-	      | 1602)
-
-#define LXVP(xtp,dq,ra)		   \
-	.long(((6)<<(32-6))		   \
-	      | ((((xtp)-32)>>1)<<(32-10)) \
-	      | ((1)<<(32-11))		   \
-	      | ((ra)<<(32-16))		   \
-	      | dq)
-
 #define CHECK16(vreg,offset,addr,label) \
 	lxv	  vreg+32,offset(addr);	\
 	vcmpequb. vreg,vreg,v18;	\
@@ -88,8 +72,8 @@
    of bytes already checked.  */
 #define CHECK64(offset,addr,label)	    \
 	li	  r6,offset;		    \
-	LXVP(v4+32,offset,addr);	    \
-	LXVP(v6+32,offset+32,addr);	    \
+	lxvp      v4+32,offset(addr);	    \
+	lxvp      v6+32,offset+32(addr);    \
 	RAWMEMCHR_SUBTRACT_VECTORS;	    \
 	vminub	  v14,v4,v5;		    \
 	vminub	  v15,v6,v7;		    \
@@ -108,7 +92,7 @@
    The implementation can load bytes past a matching byte, but only
    up to the next 64B boundary, so it never crosses a page.  */
 
-.machine power9
+.machine power10
 
 ENTRY_TOCLESS (FUNCNAME, 4)
 	CALL_MCOUNT MCOUNT_NARGS
@@ -234,10 +218,10 @@ L(tail_64b):
 	add	r5,r5,r6
 
 	/* Extract first bit of each byte.  */
-	VEXTRACTBM(r7,v1)
-	VEXTRACTBM(r8,v2)
-	VEXTRACTBM(r9,v3)
-	VEXTRACTBM(r10,v4)
+	vextractbm r7,v1
+	vextractbm r8,v2
+	vextractbm r9,v3
+	vextractbm r10,v4
 
 	/* Shift each value into their corresponding position.  */
 	sldi	  r8,r8,16
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
deleted file mode 100644
index 10700dd..0000000
--- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* Optimized strncmp implementation for PowerPC64/POWER10.
-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Implements the function
-
-   int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
-
-   The implementation uses unaligned doubleword access to avoid specialized
-   code paths depending of data alignment for first 32 bytes and uses
-   vectorised loops after that.  */
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-
-#define LXVP(xtp,dq,ra)              \
-	.long(((6)<<(32-6))          \
-	| ((((xtp)-32)>>1)<<(32-10)) \
-	| ((1)<<(32-11))             \
-	| ((ra)<<(32-16))            \
-	| dq)
-
-#define COMPARE_16(vreg1,vreg2,offset) \
-	lxv	  vreg1+32,offset(r3); \
-	lxv	  vreg2+32,offset(r4); \
-	vcmpnezb. v7,vreg1,vreg2;      \
-	bne	  cr6,L(different);    \
-	cmpldi	  cr7,r5,16;           \
-	ble	  cr7,L(ret0);         \
-	addi	  r5,r5,-16;
-
-#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
-	LXVP(vreg1+32,offset,r3);                    \
-	LXVP(vreg2+32,offset,r4);                    \
-	vcmpnezb. v7,vreg1+1,vreg2+1;                \
-	bne	  cr6,L(label1);                     \
-	vcmpnezb. v7,vreg1,vreg2;                    \
-	bne	  cr6,L(label2);                     \
-	cmpldi	  cr7,r5,32;                         \
-	ble	  cr7,L(ret0);                       \
-	addi	  r5,r5,-32;
-
-#define TAIL_FIRST_16B(vreg1,vreg2) \
-	vctzlsbb r6,v7;             \
-	cmpld	 cr7,r5,r6;         \
-	ble	 cr7,L(ret0);       \
-	vextubrx r5,r6,vreg1;       \
-	vextubrx r4,r6,vreg2;       \
-	subf	 r3,r4,r5;          \
-	blr;
-
-#define TAIL_SECOND_16B(vreg1,vreg2) \
-	vctzlsbb r6,v7;              \
-	addi	 r0,r6,16;           \
-	cmpld	 cr7,r5,r0;          \
-	ble	 cr7,L(ret0);        \
-	vextubrx r5,r6,vreg1;        \
-	vextubrx r4,r6,vreg2;        \
-	subf	 r3,r4,r5;           \
-	blr;
-
-#define CHECK_N_BYTES(reg1,reg2,len_reg) \
-	sldi	  r6,len_reg,56;	 \
-	lxvl	  32+v4,reg1,r6;	 \
-	lxvl	  32+v5,reg2,r6;	 \
-	add	  reg1,reg1,len_reg;	 \
-	add	  reg2,reg2,len_reg;	 \
-	vcmpnezb  v7,v4,v5;		 \
-	vctzlsbb  r6,v7;		 \
-	cmpld	  cr7,r6,len_reg;	 \
-	blt	  cr7,L(different);	 \
-	cmpld	  cr7,r5,len_reg;	 \
-	ble	  cr7,L(ret0);		 \
-	sub	  r5,r5,len_reg;	 \
-
-	/* TODO: change this to .machine power10 when the minimum required
-	 binutils allows it.  */
-	.machine  power9
-ENTRY_TOCLESS (STRNCMP, 4)
-	/* Check if size is 0.  */
-	cmpdi	 cr0,r5,0
-	beq	 cr0,L(ret0)
-	andi.   r7,r3,4095
-	andi.   r8,r4,4095
-	cmpldi  cr0,r7,4096-16
-	cmpldi  cr1,r8,4096-16
-	bgt     cr0,L(crosses)
-	bgt     cr1,L(crosses)
-	COMPARE_16(v4,v5,0)
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-L(crosses):
-	andi.	 r7,r3,15
-	subfic	 r7,r7,16	/* r7(nalign1) = 16 - (str1 & 15).  */
-	andi.	 r9,r4,15
-	subfic	 r8,r9,16	/* r8(nalign2) = 16 - (str2 & 15).  */
-	cmpld	 cr7,r7,r8
-	beq	 cr7,L(same_aligned)
-	blt	 cr7,L(nalign1_min)
-
-	/* nalign2 is minimum and s2 pointer is aligned.  */
-	CHECK_N_BYTES(r3,r4,r8)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.   r10,r3,63       /* Determine offset into 64B hunk.  */
-	andi.   r8,r3,15        /* The offset into the 16B hunk.  */
-	neg     r7,r3
-	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
-	beq     L(compare_64_pagecross)
-	mtctr   r7
-	b       L(compare_64B_unaligned)
-
-	/* nalign1 is minimum and s1 pointer is aligned.  */
-L(nalign1_min):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Are we on the 64B hunk which crosses a page?  */
-	andi.   r10,r4,63       /* Determine offset into 64B hunk.  */
-	andi.   r8,r4,15        /* The offset into the 16B hunk.  */
-	neg     r7,r4
-	andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-	rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
-	beq     L(compare_64_pagecross)
-	mtctr   r7
-
-	.p2align 5
-L(compare_64B_unaligned):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi    r3,r3,64
-	addi    r4,r4,64
-	bdnz    L(compare_64B_unaligned)
-
-	/* Cross the page boundary of s2, carefully. Only for first
-	iteration we have to get the count of 64B blocks to be checked.
-	From second iteration and beyond, loop counter is always 63.  */
-L(compare_64_pagecross):
-	li      r11, 63
-	mtctr   r11
-	cmpldi  r10,16
-	ble     L(cross_4)
-	cmpldi  r10,32
-	ble     L(cross_3)
-	cmpldi  r10,48
-	ble     L(cross_2)
-L(cross_1):
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi    r3,r3,48
-	addi    r4,r4,48
-	b       L(compare_64B_unaligned)
-L(cross_2):
-	COMPARE_16(v4,v5,0)
-	addi    r3,r3,16
-	addi    r4,r4,16
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi    r3,r3,32
-	addi    r4,r4,32
-	b       L(compare_64B_unaligned)
-L(cross_3):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi    r3,r3,32
-	addi    r4,r4,32
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	COMPARE_16(v4,v5,0)
-	addi    r3,r3,16
-	addi    r4,r4,16
-	b       L(compare_64B_unaligned)
-L(cross_4):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	addi    r3,r3,48
-	addi    r4,r4,48
-	CHECK_N_BYTES(r3,r4,r9)
-	CHECK_N_BYTES(r3,r4,r8)
-	b       L(compare_64B_unaligned)
-
-L(same_aligned):
-	CHECK_N_BYTES(r3,r4,r7)
-	/* Align s1 to 32B and adjust s2 address.
-	   Use lxvp only if both s1 and s2 are 32B aligned.  */
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	addi	r5,r5,32
-
-	clrldi  r6,r3,59
-	subfic	r7,r6,32
-	add	r3,r3,r7
-	add	r4,r4,r7
-	subf	r5,r7,r5
-	andi.	r7,r4,0x1F
-	beq	cr0,L(32B_aligned_loop)
-
-	.p2align 5
-L(16B_aligned_loop):
-	COMPARE_16(v4,v5,0)
-	COMPARE_16(v4,v5,16)
-	COMPARE_16(v4,v5,32)
-	COMPARE_16(v4,v5,48)
-	addi	r3,r3,64
-	addi	r4,r4,64
-	b	L(16B_aligned_loop)
-
-	/* Calculate and return the difference.  */
-L(different):
-	TAIL_FIRST_16B(v4,v5)
-
-	.p2align 5
-L(32B_aligned_loop):
-	COMPARE_32(v14,v16,0,tail1,tail2)
-	COMPARE_32(v18,v20,32,tail3,tail4)
-	COMPARE_32(v22,v24,64,tail5,tail6)
-	COMPARE_32(v26,v28,96,tail7,tail8)
-	addi	r3,r3,128
-	addi	r4,r4,128
-	b	L(32B_aligned_loop)
-
-L(tail1): TAIL_FIRST_16B(v15,v17)
-L(tail2): TAIL_SECOND_16B(v14,v16)
-L(tail3): TAIL_FIRST_16B(v19,v21)
-L(tail4): TAIL_SECOND_16B(v18,v20)
-L(tail5): TAIL_FIRST_16B(v23,v25)
-L(tail6): TAIL_SECOND_16B(v22,v24)
-L(tail7): TAIL_FIRST_16B(v27,v29)
-L(tail8): TAIL_SECOND_16B(v26,v28)
-
-	.p2align 5
-L(ret0):
-	li	r3,0
-	blr
-
-END(STRNCMP)
-libc_hidden_builtin_def(strncmp)
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
index 83b21c6..f0cde81 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
@@ -28,21 +28,6 @@
    The implementation uses unaligned doubleword access for first 32 bytes
    as in POWER8 patch and uses vectorised loops after that.  */
 
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
 /* Get 16 bytes for unaligned case.
    reg1: Vector to hold next 16 bytes.
    reg2: Address to read from.
@@ -61,10 +46,7 @@
 2: \
 	vperm   reg1, v9, reg1, reg3;
 
-/* TODO: change this to .machine power9 when the minimum required binutils
-   allows it.  */
-
-	.machine  power7
+	.machine  power9
 ENTRY_TOCLESS (STRCMP, 4)
 	li	r0, 0
 
@@ -116,7 +98,7 @@ L(align):
 	/* Both s1 and s2 are unaligned.  */
 	GET16BYTES(v4, r7, v10)
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	beq	cr6, L(match)
 	b	L(different)
 
@@ -136,28 +118,28 @@ L(match):
 L(s1_align):
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, r7, r0
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	beq	cr6, L(s1_align)
@@ -167,37 +149,37 @@ L(s1_align):
 L(aligned):
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	bne	cr6, L(different)
 
 	lvx	v4, 0, r7
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	addi	r7, r7, 16
 	addi	r4, r4, 16
 	beq	cr6, L(aligned)
 
 	/* Calculate and return the difference.  */
 L(different):
-	VCTZLSBB(r6, v7)
-	VEXTUBRX(r5, r6, v4)
-	VEXTUBRX(r4, r6, v5)
+	vctzlsbb r6, v7
+	vextubrx r5, r6, v4
+	vextubrx r4, r6, v5
 	subf	r3, r4, r5
 	extsw	r3, r3
 	blr
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
index 60c74ab..5a25f94 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
@@ -29,21 +29,6 @@
 # define STRNCMP strncmp
 #endif
 
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
-   to 2.27.  Macros are defined below for these newer instructions in order
-   to maintain compatibility.  */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
-				| ((t)<<(32-11))  \
-				| ((a)<<(32-16))  \
-				| ((b)<<(32-21)) )
-
 /* Get 16 bytes for unaligned case.
    reg1: Vector to hold next 16 bytes.
    reg2: Address to read from.
@@ -64,9 +49,7 @@
 2: \
 	vperm	reg1, v9, reg1, reg3;
 
-/* TODO: change this to .machine power9 when minimum binutils
-   is upgraded to 2.27.  */
-	.machine  power7
+	.machine  power9
 ENTRY_TOCLESS (STRNCMP, 4)
 	/* Check if size is 0.  */
 	cmpdi	cr0, r5, 0
@@ -163,7 +146,7 @@ L(align):
 	clrldi	r6, r3, 60
 	subfic	r11, r6, 16
 	GET16BYTES(v4, r3, v10)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	beq	cr6, L(match)
 	b	L(different)
 
@@ -186,7 +169,7 @@ L(match):
 L(s1_align):
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -196,7 +179,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -206,7 +189,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -216,7 +199,7 @@ L(s1_align):
 
 	lvx	v4, 0, r3
 	GET16BYTES(v5, r4, v6)
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -228,7 +211,7 @@ L(s1_align):
 L(aligned):
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -238,7 +221,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -248,7 +231,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -258,7 +241,7 @@ L(aligned):
 
 	lvx	v4, 0, r3
 	lvx	v5, 0, r4
-	VCMPNEZB(v7, v5, v4)
+	vcmpnezb. v7, v5, v4
 	bne	cr6, L(different)
 	cmpldi	cr7, r5, 16
 	ble	cr7, L(ret0)
@@ -268,11 +251,11 @@ L(aligned):
 	b	L(aligned)
 	/* Calculate and return the difference.  */
 L(different):
-	VCTZLSBB(r6, v7)
+	vctzlsbb r6, v7
 	cmplw	cr7, r5, r6
 	ble	cr7, L(ret0)
-	VEXTUBRX(r5, r6, v4)
-	VEXTUBRX(r4, r6, v5)
+	vextubrx r5, r6, v4
+	vextubrx r4, r6, v5
 	subf	r3, r4, r5
 	extsw	r3, r3
 	blr
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 624439d..e321ce5 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -30,12 +30,11 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
 		   strncase-power8
 
 ifneq (,$(filter %le,$(config-machine)))
-sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \
-		   memmove-power10 memset-power10 rawmemchr-power9 \
-		   rawmemchr-power10 strcmp-power9 strcmp-power10 \
-		   strncmp-power9 strncmp-power10 strcpy-power9 strcat-power10 \
-		   stpcpy-power9 strlen-power9 strncpy-power9 stpncpy-power9 \
-		   strlen-power10
+sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
+		   rawmemchr-power9 rawmemchr-power10 \
+		   strcmp-power9 strncmp-power9 \
+		   strcpy-power9 strcat-power10 stpcpy-power9 \
+		   strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
 endif
 endif
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index f3acd38..016d05f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
 #ifdef __LITTLE_ENDIAN__
-	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1
-			      && hwcap & PPC_FEATURE_HAS_VSX,
-			      __strncmp_power10)
 	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strncmp_power9)
@@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c.  */
   IFUNC_IMPL (i, name, memchr,
-#ifdef __LITTLE_ENDIAN__
-	      IFUNC_IMPL_ADD (array, i, memchr,
-		              hwcap2 & PPC_FEATURE2_ARCH_3_1
-			      && hwcap & PPC_FEATURE_HAS_VSX,
-			      __memchr_power10)
-#endif
 	      IFUNC_IMPL_ADD (array, i, memchr,
 			      hwcap2 & PPC_FEATURE2_ARCH_2_07
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
@@ -376,10 +367,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcmp,
 #ifdef __LITTLE_ENDIAN__
 	      IFUNC_IMPL_ADD (array, i, strcmp,
-			      (hwcap2 & PPC_FEATURE2_ARCH_3_1)
-			      && (hwcap & PPC_FEATURE_HAS_VSX),
-			      __strcmp_power10)
-	      IFUNC_IMPL_ADD (array, i, strcmp,
 			      hwcap2 & PPC_FEATURE2_ARCH_3_00
 			      && hwcap & PPC_FEATURE_HAS_ALTIVEC,
 			      __strcmp_power9)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c
index b63c796..3abd64a 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c
@@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden;
 extern __typeof (__memchr) __memchr_power7 attribute_hidden;
 extern __typeof (__memchr) __memchr_power8 attribute_hidden;
 
-# ifdef __LITTLE_ENDIAN__
-extern __typeof (__memchr) __memchr_power10 attribute_hidden;
-# endif
 /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
    ifunc symbol properly.  */
 libc_ifunc (__memchr,
-# ifdef __LITTLE_ENDIAN__
-	    (hwcap2 & PPC_FEATURE2_ARCH_3_1
-	     && hwcap & PPC_FEATURE_HAS_VSX)
-	    ? __memchr_power10 :
-# endif
-	      (hwcap2 & PPC_FEATURE2_ARCH_2_07
-	      && hwcap & PPC_FEATURE_HAS_ALTIVEC)
-	      ? __memchr_power8 :
-	        (hwcap & PPC_FEATURE_ARCH_2_06)
-	        ? __memchr_power7
-	        : __memchr_ppc);
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07
+	     && hwcap & PPC_FEATURE_HAS_ALTIVEC)
+	    ? __memchr_power8 :
+	    (hwcap & PPC_FEATURE_ARCH_2_06)
+            ? __memchr_power7
+            : __memchr_ppc);
 
 weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
index 3c636e3..7c77c08 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
@@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden;
 extern __typeof (strcmp) __strcmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
 extern __typeof (strcmp) __strcmp_power9 attribute_hidden;
-extern __typeof (strcmp) __strcmp_power10 attribute_hidden;
 # endif
 
 # undef strcmp
 
 libc_ifunc_redirected (__redirect_strcmp, strcmp,
 # ifdef __LITTLE_ENDIAN__
-		        (hwcap2 & PPC_FEATURE2_ARCH_3_1
-			 && hwcap & PPC_FEATURE_HAS_VSX)
-			? __strcmp_power10 :
 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
 			? __strcmp_power9 :
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 0a664a6..4cfe27f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
 extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
 extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
-extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
 # endif
 # undef strncmp
 
@@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
    ifunc symbol properly.  */
 libc_ifunc_redirected (__redirect_strncmp, strncmp,
 # ifdef __LITTLE_ENDIAN__
-			(hwcap2 & PPC_FEATURE2_ARCH_3_1
-			 && hwcap & PPC_FEATURE_HAS_VSX)
-			? __strncmp_power10 :
 			(hwcap2 & PPC_FEATURE2_ARCH_3_00
 			 && hwcap & PPC_FEATURE_HAS_ALTIVEC)
 			? __strncmp_power9 :
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index de146dd..7572f62 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -62,7 +62,6 @@ tests += \
   tst-abstime \
   tst-atfork1 \
   tst-attr1 \
-  tst-backtrace1 \
   tst-bad-schedattr \
   tst-barrier1 \
   tst-barrier2 \
diff --git a/sysdeps/pthread/tst-backtrace1.c b/sysdeps/pthread/tst-backtrace1.c
deleted file mode 100644
index 01b8a0c..0000000
--- a/sysdeps/pthread/tst-backtrace1.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <execinfo.h>
-#include <pthread.h>
-#include <stdio.h>
-
-#define BT_SIZE 64
-void *bt_array[BT_SIZE];
-int bt_cnt;
-
-int
-do_bt (void)
-{
-  bt_cnt = backtrace (bt_array, BT_SIZE);
-  return 56;
-}
-
-int
-call_do_bt (void)
-{
-  return do_bt () + 1;
-}
-
-void *
-tf (void *arg)
-{
-  if (call_do_bt () != 57)
-    return (void *) 1L;
-  return NULL;
-}
-
-int
-do_test (void)
-{
-  pthread_t th;
-  if (pthread_create (&th, NULL, tf, NULL))
-    {
-      puts ("create failed");
-      return 1;
-    }
-
-  void *res;
-  if (pthread_join (th, &res))
-    {
-      puts ("join failed");
-      return 1;
-    }
-
-  if (res != NULL)
-    {
-      puts ("thread failed");
-      return 1;
-    }
-
-  char **text = backtrace_symbols (bt_array, bt_cnt);
-  if (text == NULL)
-    {
-      puts ("backtrace_symbols failed");
-      return 1;
-    }
-
-  for (int i = 0; i < bt_cnt; ++i)
-    puts (text[i]);
-
-  return 0;
-}
-
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
diff --git a/sysdeps/pthread/tst-cond23.c b/sysdeps/pthread/tst-cond23.c
index 0a68472..a338397 100644
--- a/sysdeps/pthread/tst-cond23.c
+++ b/sysdeps/pthread/tst-cond23.c
@@ -151,7 +151,7 @@ do_test (void)
 #if !defined _POSIX_CLOCK_SELECTION || _POSIX_CLOCK_SELECTION == -1
 
   puts ("_POSIX_CLOCK_SELECTION not supported, test skipped");
-  return 0;
+  return EXIT_UNSUPPORTED;
 
 #else
 
diff --git a/sysdeps/pthread/tst-fopen-threaded.c b/sysdeps/pthread/tst-fopen-threaded.c
index ade58ad..c17f1ea 100644
--- a/sysdeps/pthread/tst-fopen-threaded.c
+++ b/sysdeps/pthread/tst-fopen-threaded.c
@@ -34,11 +34,13 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <stdlib.h>
 
 #include <support/check.h>
 #include <support/temp_file.h>
 #include <support/xstdio.h>
 #include <support/xthread.h>
+#include <support/support.h>
 
 #define NUM_THREADS 100
 #define ITERS 10
@@ -111,7 +113,8 @@ threadOpenCloseRoutine (void *argv)
   /* Wait for all threads to be ready to call fopen and fclose.  */
   xpthread_barrier_wait (&barrier);
 
-  FILE *fd = xfopen ("/tmp/openclosetest", "w+");
+  char *file = (char *) argv;
+  FILE *fd = xfopen (file, "w+");
   xfclose (fd);
   return NULL;
 }
@@ -235,6 +238,10 @@ do_test (void)
       xfclose (fd_file);
     }
 
+  char *tempdir = support_create_temp_directory ("openclosetest-");
+  char *file = xasprintf ("%s/file", tempdir);
+  add_temp_file (file);
+
   /* Test 3: Concurrent open/close.  */
   for (int reps = 1; reps <= ITERS; reps++)
     {
@@ -243,7 +250,7 @@ do_test (void)
         {
           threads[i] =
             xpthread_create (support_small_stack_thread_attribute (),
-                             threadOpenCloseRoutine, NULL);
+                             threadOpenCloseRoutine, file);
         }
       for (int i = 0; i < NUM_THREADS; i++)
         {
@@ -252,6 +259,9 @@ do_test (void)
       xpthread_barrier_destroy (&barrier);
     }
 
+  free (file);
+  free (tempdir);
+
   return 0;
 }
 
diff --git a/sysdeps/s390/s390-32/s390-mcount.S b/sysdeps/s390/s390-32/s390-mcount.S
index 59614ee..7f8457f 100644
--- a/sysdeps/s390/s390-32/s390-mcount.S
+++ b/sysdeps/s390/s390-32/s390-mcount.S
@@ -54,11 +54,7 @@ C_LABEL(_mcount)
 	/* Save the caller-clobbered registers.  */
 	ahi   %r15,-128
 	cfi_adjust_cfa_offset (128)
-	/* binutils 2.28+: .cfi_val_offset r15, -96 */
-	.cfi_escape \
-		/* DW_CFA_val_offset */ 0x14, \
-		/* r15 */               0x0f, \
-		/* scaled offset */     0x18
+	cfi_val_offset (r15, -96)
 	stm   %r14,%r5,96(%r15)
 	cfi_offset (r14, -128)
 	l     %r2,132(%r15)       # callers address  = first parameter
diff --git a/sysdeps/s390/s390-64/s390x-mcount.h b/sysdeps/s390/s390-64/s390x-mcount.h
index b82f1a8..c5bd70d 100644
--- a/sysdeps/s390/s390-64/s390x-mcount.h
+++ b/sysdeps/s390/s390-64/s390x-mcount.h
@@ -68,11 +68,7 @@ C_LABEL(MCOUNT_SYMBOL)
 	/* Save the caller-clobbered registers.  */
 	aghi  %r15,-224
 	cfi_adjust_cfa_offset (224)
-	/* binutils 2.28+: .cfi_val_offset r15, -160 */
-	.cfi_escape \
-		/* DW_CFA_val_offset */ 0x14, \
-		/* r15 */               0x0f, \
-		/* scaled offset */     0x14
+	cfi_val_offset (r15, -160)
 	stmg  %r14,%r5,160(%r15)
 	cfi_offset (r14, -224)
 	cfi_offset (r0, -224+16)
diff --git a/sysdeps/sparc/sparc32/start.S b/sysdeps/sparc/sparc32/start.S
index 694b020..8393760 100644
--- a/sysdeps/sparc/sparc32/start.S
+++ b/sysdeps/sparc/sparc32/start.S
@@ -35,6 +35,7 @@
 
 #include <sysdep.h>
 
+#define FRAME_SIZE 104
 
 	.section ".text"
 	.align 4
@@ -48,12 +49,12 @@ _start:
   /* Terminate the stack frame, and reserve space for functions to
      drop their arguments.  */
 	mov	%g0, %fp
-	sub	%sp, 6*4, %sp
+	sub	%sp, FRAME_SIZE, %sp
 
   /* Extract the arguments and environment as encoded on the stack.  The
      argument info starts after one register window (16 words) past the SP.  */
-	ld	[%sp+22*4], %o1
-	add	%sp, 23*4, %o2
+	ld	[%sp+168], %o1
+	add	%sp, 172, %o2
 
   /* Load the addresses of the user entry points.  */
 #ifndef PIC
@@ -73,6 +74,10 @@ _start:
      be NULL.  */
 	mov	%g1, %o5
 
+  /* Provide the highest stack address to update the __libc_stack_end (used
+     to enable executable stacks if required).  */
+	st	%sp, [%sp+23*4]
+
   /* Let libc do the rest of the initialization, and call main.  */
 	call	__libc_start_main
 	 nop
diff --git a/sysdeps/sparc/sparc64/start.S b/sysdeps/sparc/sparc64/start.S
index c9c25c2..08e1e77 100644
--- a/sysdeps/sparc/sparc64/start.S
+++ b/sysdeps/sparc/sparc64/start.S
@@ -74,6 +74,10 @@ _start:
      be NULL.  */
 	mov     %g1, %o5
 
+  /* Provide the highest stack address to update the __libc_stack_end (used
+     to enable executable stacks if required).  */
+	stx	%sp, [%sp+STACK_BIAS+22*8]
+
   /* Let libc do the rest of the initialization, and call main.  */
 	call    __libc_start_main
 	 nop
diff --git a/sysdeps/unix/bsd/tcsetattr.c b/sysdeps/unix/bsd/tcsetattr.c
index 38b5f71..8693d94 100644
--- a/sysdeps/unix/bsd/tcsetattr.c
+++ b/sysdeps/unix/bsd/tcsetattr.c
@@ -32,7 +32,7 @@
 
 /* Set the state of FD to *TERMIOS_P.  */
 int
-tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
+__tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
 {
   struct termios myt;
 
@@ -56,4 +56,6 @@ tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
       return __ioctl (fd, TIOCSETAF, termios_p);
     }
 }
-libc_hidden_def (tcsetattr)
+
+libc_hidden_def (__tcsetattr)
+weak_alias (__tcsetattr, tcsetattr)
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index ebcf820..2c5bf42 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -151,15 +151,6 @@ sysdep_headers += \
   bits/struct_stat.h \
   bits/struct_stat_time64_helper.h \
   bits/syscall.h \
-  bits/termios-baud.h \
-  bits/termios-c_cc.h \
-  bits/termios-c_cflag.h \
-  bits/termios-c_iflag.h \
-  bits/termios-c_lflag.h \
-  bits/termios-c_oflag.h \
-  bits/termios-misc.h \
-  bits/termios-struct.h \
-  bits/termios-tcflow.h \
   bits/timerfd.h \
   bits/types/struct_msqid64_ds.h \
   bits/types/struct_msqid64_ds_helper.h \
@@ -201,6 +192,7 @@ tests += \
   tst-clone \
   tst-clone2 \
   tst-clone3 \
+  tst-copy_file_range-large \
   tst-epoll \
   tst-epoll-ioctls \
   tst-fanotify \
@@ -421,6 +413,24 @@ tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
 
 endif # $(subdir) == misc
 
+ifeq ($(subdir),termios)
+sysdep_headers += \
+  bits/termios-c_cc.h \
+  bits/termios-c_cflag.h \
+  bits/termios-c_iflag.h \
+  bits/termios-c_lflag.h \
+  bits/termios-c_oflag.h \
+  bits/termios-cbaud.h \
+  bits/termios-misc.h \
+  bits/termios-struct.h \
+  bits/termios-tcflow.h \
+  # sysdep_headers
+
+tests += \
+  tst-termios-linux \
+  # tests
+endif
+
 ifeq ($(subdir),time)
 sysdep_headers += \
   bits/timex.h \
@@ -603,6 +613,7 @@ endif
 ifeq ($(subdir),io)
 sysdep_routines += \
   close_nocancel \
+  close_nocancel_nostatus \
   fallocate \
   fallocate64 \
   fcntl_nocancel \
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 55d5655..585dec7 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -332,6 +332,13 @@ libc {
     sched_getattr;
     sched_setattr;
   }
+  GLIBC_2.42 {
+    cfgetospeed;
+    cfgetispeed;
+    cfsetospeed;
+    cfsetispeed;
+    cfsetspeed;
+  }
   GLIBC_PRIVATE {
     # functions used in other libraries
     __syscall_rt_sigqueueinfo;
@@ -339,6 +346,7 @@ libc {
     __read_nocancel;
     __pread64_nocancel;
     __close_nocancel;
+    __close_nocancel_nostatus;
     __sigtimedwait;
     # functions used by nscd
     __netlink_assert_response;
diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
index 89aced0..ba4a461 100644
--- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
@@ -175,6 +175,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index 6d63c8a..1acc82d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -23,6 +23,7 @@
 #include <sys/prctl.h>
 #include <sys/utsname.h>
 #include <dl-tunables-parse.h>
+#include <dl-symbol-redir-ifunc.h>
 
 #define DCZID_DZP_MASK (1 << 4)
 #define DCZID_BS_MASK (0xf)
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
index aa6bf48..a22e651 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist
@@ -2752,6 +2752,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libm.abilist b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
index ecdabe6..bb8114b 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libm.abilist
@@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index a56ce7f..f7f72b6 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -148,3 +148,23 @@ GLIBC_2.41 _ZGVsMxv_sinpi F
 GLIBC_2.41 _ZGVsMxv_sinpif F
 GLIBC_2.41 _ZGVsMxv_tanpi F
 GLIBC_2.41 _ZGVsMxv_tanpif F
+GLIBC_2.42 _ZGVnN2v_acospi F
+GLIBC_2.42 _ZGVnN2v_acospif F
+GLIBC_2.42 _ZGVnN2v_asinpi F
+GLIBC_2.42 _ZGVnN2v_asinpif F
+GLIBC_2.42 _ZGVnN2v_atanpi F
+GLIBC_2.42 _ZGVnN2v_atanpif F
+GLIBC_2.42 _ZGVnN2vv_atan2pi F
+GLIBC_2.42 _ZGVnN2vv_atan2pif F
+GLIBC_2.42 _ZGVnN4v_acospif F
+GLIBC_2.42 _ZGVnN4v_asinpif F
+GLIBC_2.42 _ZGVnN4v_atanpif F
+GLIBC_2.42 _ZGVnN4vv_atan2pif F
+GLIBC_2.42 _ZGVsMxv_acospi F
+GLIBC_2.42 _ZGVsMxv_acospif F
+GLIBC_2.42 _ZGVsMxv_asinpi F
+GLIBC_2.42 _ZGVsMxv_asinpif F
+GLIBC_2.42 _ZGVsMxv_atanpi F
+GLIBC_2.42 _ZGVsMxv_atanpif F
+GLIBC_2.42 _ZGVsMxvv_atan2pi F
+GLIBC_2.42 _ZGVsMxvv_atan2pif F
diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
index a2eab9e..4485723 100644
--- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c
+++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c
@@ -36,9 +36,7 @@ static struct _aarch64_ctx *extension (void *p)
 static void *
 alloc_makecontext_gcs (size_t stack_size)
 {
-  void *base;
-  size_t size;
-  void *gcsp = __alloc_gcs (stack_size, &base, &size);
+  void *gcsp = __alloc_gcs (stack_size, NULL);
   if (gcsp == NULL)
     /* ENOSYS, bad size or OOM.  */
     abort ();
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index 022a263..d9716f0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -48,25 +48,16 @@ ENTRY (__setcontext)
 	cbz	x0, 1f
 	b	C_SYMBOL_NAME (__syscall_error)
 1:
-	/* Disable ZA of SME.  */
-#if HAVE_AARCH64_PAC_RET
-	PACIASP
-	cfi_window_save
-#endif
-	stp	x29, x30, [sp, -16]!
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (x29, 0)
-	cfi_rel_offset (x30, 8)
-	mov	x29, sp
+	/* Clear ZA state of SME.  */
+	/* The calling convention of __libc_arm_za_disable allows to do
+	   this thus allowing to avoid saving to and reading from stack.
+	   As a result we also don't need to sign the return address and
+	   check it after returning because it is not stored to stack.  */
+	mov	x13, x30
+	cfi_register (x30, x13)
 	bl	__libc_arm_za_disable
-	ldp	x29, x30, [sp], 16
-	cfi_adjust_cfa_offset (-16)
-	cfi_restore (x29)
-	cfi_restore (x30)
-#if HAVE_AARCH64_PAC_RET
-	AUTIASP
-	cfi_window_save
-#endif
+	mov	x30, x13
+	cfi_register (x13, x30)
 	/* Restore the general purpose registers.  */
 	mov	x0, x9
 	cfi_def_cfa (x0, 0)
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index cc41253..58ddb95 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -119,7 +119,7 @@ L(gcs_done):
 2:
 	/* The oucp context is restored here via an indirect branch,
 	   x1 must be restored too which has the real return address.  */
-	BTI_J
+	bti	j
 	mov	x30, x1
 	RET
 PSEUDO_END (__swapcontext)
diff --git a/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h b/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h
new file mode 100644
index 0000000..9d5d345
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h
@@ -0,0 +1,78 @@
+/* Signal frame backtracing support for SFrame on AARCH64.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation; either version 2.1 of
+   the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This code is inspired from libgcc's MD_FALLBACK_FRAME_STATE_FOR
+   implementation.  See libgcc/config/aarch64/linux-unwind.h  */
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <kernel_rt_sigframe.h>
+
+#ifdef __AARCH64EL__
+#define MOVZ_X8_8B      0xd2801168
+#define SVC_0           0xd4000001
+#else
+#define MOVZ_X8_8B      0x681180d2
+#define SVC_0           0x010000d4
+#endif
+
+#define MD_DECODE_SIGNAL_FRAME aarch64_decode_signal_frame
+
+static _Unwind_Reason_Code
+aarch64_decode_signal_frame (frame *frame)
+{
+  unsigned int *pc = (unsigned int *) frame->pc;
+  mcontext_t *mt;
+  struct kernel_rt_sigframe *rt_;
+
+  if ((frame->pc & 3) != 0)
+    return _URC_END_OF_STACK;
+
+  /* A signal frame will have a return address pointing to
+     __kernel_rt_sigreturn.  This code is hardwired as:
+
+     0xd2801168         movz x8, #0x8b
+     0xd4000001         svc  0x0
+   */
+  if (pc[0] != MOVZ_X8_8B || pc[1] != SVC_0)
+    return _URC_END_OF_STACK;
+
+  rt_ = (struct kernel_rt_sigframe *) frame->sp;
+  mt = &rt_->uc.uc_mcontext;
+
+  /* Frame pointer register number.  */
+#define FP_REGNUM 30
+
+  frame->pc = (_Unwind_Ptr) mt->pc;
+  frame->sp = (_Unwind_Ptr) mt->sp;
+  frame->fp = (_Unwind_Ptr) mt->regs[FP_REGNUM];
+  return _URC_NO_REASON;
+}
+
+#define MD_DETECT_OUTERMOST_FRAME aarch64_detect_outermost_frame
+
+static _Unwind_Reason_Code
+aarch64_detect_outermost_frame (frame *frame)
+{
+  /* Initial frame has LR and FP set to zero.  We track only FP.  */
+  if (frame->fp == 0)
+    return _URC_END_OF_STACK;
+
+  return _URC_NO_REASON;
+}
diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
index 455da93..840d6fe 100644
--- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h
@@ -209,6 +209,7 @@
 #define __NR_open 45
 #define __NR_open_by_handle_at 498
 #define __NR_open_tree 538
+#define __NR_open_tree_attr 577
 #define __NR_openat 450
 #define __NR_openat2 547
 #define __NR_osf_adjtime 140
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
index 1f9f7f2..d830884 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h
@@ -36,4 +36,6 @@
 
 #ifdef __USE_MISC
 # define ADDRB 04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h
index 324d5d8..69421f6 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h
@@ -17,30 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
-# define CBAUD	0000037
-# define CBAUDEX 0000000
-# define CMSPAR	  010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
+# define CBAUD	    000000037
+# define CBAUDEX    000000000
+# define CIBAUD     007600000
+# define IBSHIFT    16
 #endif
 
-#define  B57600   00020
-#define  B115200  00021
-#define  B230400  00022
-#define  B460800  00023
-#define  B500000  00024
-#define  B576000  00025
-#define  B921600  00026
-#define  B1000000 00027
-#define  B1152000 00030
-#define  B1500000 00031
-#define  B2000000 00032
-#define  B2500000 00033
-#define  B3000000 00034
-#define  B3500000 00035
-#define  B4000000 00036
-
-#define __MAX_BAUD B4000000
+#define  __B57600   00020
+#define  __B115200  00021
+#define  __B230400  00022
+#define  __B460800  00023
+#define  __B500000  00024
+#define  __B576000  00025
+#define  __B921600  00026
+#define  __B1000000 00027
+#define  __B1152000 00030
+#define  __B1500000 00031
+#define  __B2000000 00032
+#define  __B2500000 00033
+#define  __B3000000 00034
+#define  __B3500000 00035
+#define  __B4000000 00036
+#define  __BOTHER   00037
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
index de4d5fc..f50e9ef 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h
@@ -30,8 +30,15 @@ struct termios
     tcflag_t c_lflag;		/* local mode flags */
     cc_t c_cc[NCCS];		/* control characters */
     cc_t c_line;		/* line discipline (== c_cc[33]) */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
+    /* Input and output baud rates.  */
+    __extension__ union {
+      speed_t __ispeed;
+      speed_t c_ispeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1
+    __extension__ union {
+      speed_t __ospeed;
+      speed_t c_ospeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1
   };
diff --git a/sysdeps/unix/sysv/linux/alpha/kernel-features.h b/sysdeps/unix/sysv/linux/alpha/kernel-features.h
index 6eae48f..83fdf91 100644
--- a/sysdeps/unix/sysv/linux/alpha/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/alpha/kernel-features.h
@@ -54,4 +54,15 @@
 #undef __ASSUME_CLONE3
 #define __ASSUME_CLONE3 0
 
+/* Alpha did not provide BOTHER, CIBAUD or the termios2 ioctls until
+   kernel 4.20.  Even though struct __kernel_termios and struct
+   termios2 are the same on Alpha, Calling the legacy TCSETS* ioctls
+   with BOTHER set triggers a bug in these old kernels, so only use
+   the legacy TCSETS* ioctl numbers if neither BOTHER nor split speed is
+   needed; that way the code will fail gracefully. */
+#if __LINUX_KERNEL_VERSION < 0x041400
+# undef  __ASSUME_TERMIOS2
+# define __ASSUME_TERMIOS2 0
+#endif
+
 #endif /* _KERNEL_FEATURES_H */
diff --git a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h b/sysdeps/unix/sysv/linux/alpha/kernel_termios.h
deleted file mode 100644
index 6a777dd..0000000
--- a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
-
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-    cc_t c_line;		/* line discipline */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
-  };
-
-#define _HAVE_C_ISPEED 1
-#define _HAVE_C_OSPEED 1
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist
index d5df965..4b5736a 100644
--- a/sysdeps/unix/sysv/linux/alpha/libc.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist
@@ -3099,6 +3099,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/alpha/libm.abilist b/sysdeps/unix/sysv/linux/alpha/libm.abilist
index db08345..4b383b1 100644
--- a/sysdeps/unix/sysv/linux/alpha/libm.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libm.abilist
@@ -1428,6 +1428,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/unix/sysv/linux/alpha/termios_arch.h
index 7b45fcd..20025f2 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
+++ b/sysdeps/unix/sysv/linux/alpha/termios_arch.h
@@ -1,5 +1,6 @@
-/* Optimized strcmp implementation for POWER10/PPC64.
-   Copyright (C) 2021-2025 Free Software Foundation, Inc.
+/* Architectural parameters for Linux termios - Alpha/PowerPC version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +17,10 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define STRCMP __strcmp_power10
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
+#ifndef TERMIOS_INTERNALS_H
+# error "<termios_arch.h> should only be included from <termios_internals.h>"
+#endif
 
-#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S>
-#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1
+#define _HAVE_STRUCT_OLD_TERMIOS 0
diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
index 01075e8..2534f0f 100644
--- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h
@@ -177,6 +177,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist
index c46c08d..b8a4478 100644
--- a/sysdeps/unix/sysv/linux/arc/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libc.abilist
@@ -2513,6 +2513,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arc/libm.abilist b/sysdeps/unix/sysv/linux/arc/libm.abilist
index 30b13b9..c865ec8 100644
--- a/sysdeps/unix/sysv/linux/arc/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arc/libm.abilist
@@ -847,6 +847,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
index 9704472..8e585a4 100644
--- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h
@@ -223,6 +223,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 371
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 322
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
index 4df150c..959e446 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist
@@ -2805,6 +2805,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arm/be/libm.abilist b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
index 825ba11..63bad09 100644
--- a/sysdeps/unix/sysv/linux/arm/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/be/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
index be29478..a930d1a 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist
@@ -2802,6 +2802,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/arm/le/libm.abilist b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
index 825ba11..63bad09 100644
--- a/sysdeps/unix/sysv/linux/arm/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/arm/le/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/bits/ioctls.h b/sysdeps/unix/sysv/linux/bits/ioctls.h
index 7e226e4..1ddcd4f 100644
--- a/sysdeps/unix/sysv/linux/bits/ioctls.h
+++ b/sysdeps/unix/sysv/linux/bits/ioctls.h
@@ -22,87 +22,4 @@
 /* Use the definitions from the kernel header files.  */
 #include <asm/ioctls.h>
 
-/* Routing table calls.  */
-#define SIOCADDRT	0x890B		/* add routing table entry	*/
-#define SIOCDELRT	0x890C		/* delete routing table entry	*/
-#define SIOCRTMSG	0x890D		/* call to routing system	*/
-
-/* Socket configuration controls. */
-#define SIOCGIFNAME	0x8910		/* get iface name		*/
-#define SIOCSIFLINK	0x8911		/* set iface channel		*/
-#define SIOCGIFCONF	0x8912		/* get iface list		*/
-#define SIOCGIFFLAGS	0x8913		/* get flags			*/
-#define SIOCSIFFLAGS	0x8914		/* set flags			*/
-#define SIOCGIFADDR	0x8915		/* get PA address		*/
-#define SIOCSIFADDR	0x8916		/* set PA address		*/
-#define SIOCGIFDSTADDR	0x8917		/* get remote PA address	*/
-#define SIOCSIFDSTADDR	0x8918		/* set remote PA address	*/
-#define SIOCGIFBRDADDR	0x8919		/* get broadcast PA address	*/
-#define SIOCSIFBRDADDR	0x891a		/* set broadcast PA address	*/
-#define SIOCGIFNETMASK	0x891b		/* get network PA mask		*/
-#define SIOCSIFNETMASK	0x891c		/* set network PA mask		*/
-#define SIOCGIFMETRIC	0x891d		/* get metric			*/
-#define SIOCSIFMETRIC	0x891e		/* set metric			*/
-#define SIOCGIFMEM	0x891f		/* get memory address (BSD)	*/
-#define SIOCSIFMEM	0x8920		/* set memory address (BSD)	*/
-#define SIOCGIFMTU	0x8921		/* get MTU size			*/
-#define SIOCSIFMTU	0x8922		/* set MTU size			*/
-#define SIOCSIFNAME	0x8923		/* set interface name		*/
-#define	SIOCSIFHWADDR	0x8924		/* set hardware address 	*/
-#define SIOCGIFENCAP	0x8925		/* get/set encapsulations       */
-#define SIOCSIFENCAP	0x8926
-#define SIOCGIFHWADDR	0x8927		/* Get hardware address		*/
-#define SIOCGIFSLAVE	0x8929		/* Driver slaving support	*/
-#define SIOCSIFSLAVE	0x8930
-#define SIOCADDMULTI	0x8931		/* Multicast address lists	*/
-#define SIOCDELMULTI	0x8932
-#define SIOCGIFINDEX	0x8933		/* name -> if_index mapping	*/
-#define SIOGIFINDEX	SIOCGIFINDEX	/* misprint compatibility :-)	*/
-#define SIOCSIFPFLAGS	0x8934		/* set/get extended flags set	*/
-#define SIOCGIFPFLAGS	0x8935
-#define SIOCDIFADDR	0x8936		/* delete PA address		*/
-#define	SIOCSIFHWBROADCAST	0x8937	/* set hardware broadcast addr	*/
-#define SIOCGIFCOUNT	0x8938		/* get number of devices */
-
-#define SIOCGIFBR	0x8940		/* Bridging support		*/
-#define SIOCSIFBR	0x8941		/* Set bridging options 	*/
-
-#define SIOCGIFTXQLEN	0x8942		/* Get the tx queue length	*/
-#define SIOCSIFTXQLEN	0x8943		/* Set the tx queue length 	*/
-
-
-/* ARP cache control calls. */
-		    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
-#define SIOCDARP	0x8953		/* delete ARP table entry	*/
-#define SIOCGARP	0x8954		/* get ARP table entry		*/
-#define SIOCSARP	0x8955		/* set ARP table entry		*/
-
-/* RARP cache control calls. */
-#define SIOCDRARP	0x8960		/* delete RARP table entry	*/
-#define SIOCGRARP	0x8961		/* get RARP table entry		*/
-#define SIOCSRARP	0x8962		/* set RARP table entry		*/
-
-/* Driver configuration calls */
-
-#define SIOCGIFMAP	0x8970		/* Get device parameters	*/
-#define SIOCSIFMAP	0x8971		/* Set device parameters	*/
-
-/* DLCI configuration calls */
-
-#define SIOCADDDLCI	0x8980		/* Create new DLCI device	*/
-#define SIOCDELDLCI	0x8981		/* Delete DLCI device		*/
-
-/* Device private ioctl calls.  */
-
-/* These 16 ioctls are available to devices via the do_ioctl() device
-   vector.  Each device should include this file and redefine these
-   names as their own. Because these are device dependent it is a good
-   idea _NOT_ to issue them to random objects and hope.  */
-
-#define SIOCDEVPRIVATE 		0x89F0	/* to 89FF */
-
-/*
- *	These 16 ioctl calls are protocol private
- */
-
-#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#include <linux/sockios.h>
diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h
index 3159097..0be4b47 100644
--- a/sysdeps/unix/sysv/linux/bits/mman-shared.h
+++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h
@@ -43,10 +43,9 @@
 # endif
 
 /* Access restrictions for pkey_alloc.  */
-# ifndef PKEY_DISABLE_ACCESS
-#  define PKEY_DISABLE_ACCESS 0x1
-#  define PKEY_DISABLE_WRITE 0x2
-# endif
+# define PKEY_UNRESTRICTED 0x0
+# define PKEY_DISABLE_ACCESS 0x1
+# define PKEY_DISABLE_WRITE 0x2
 
 __BEGIN_DECLS
 
diff --git a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
index bbbb621..befd25a 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h
@@ -34,5 +34,7 @@
 #define CLOCAL	0004000
 
 #ifdef __USE_MISC
-# define ADDRB 04000000000
+# define ADDRB    04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/bits/termios-baud.h b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h
index e63a3eb..b9aadff 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h
@@ -17,32 +17,31 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
 # define CBAUD	 000000010017 /* Baud speed mask (not in POSIX).  */
 # define CBAUDEX 000000010000 /* Extra baud speed mask, included in CBAUD.
 				 (not in POSIX).  */
-# define CIBAUD	 002003600000 /* Input baud rate (not used).  */
-# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
-# define CRTSCTS 020000000000 /* Flow control.  */
+# define CIBAUD	 002003600000 /* Input baud rate.  */
+# define IBSHIFT 16
 #endif
 
 /* Extra output baud rates (not in POSIX).  */
-#define  B57600    0010001
-#define  B115200   0010002
-#define  B230400   0010003
-#define  B460800   0010004
-#define  B500000   0010005
-#define  B576000   0010006
-#define  B921600   0010007
-#define  B1000000  0010010
-#define  B1152000  0010011
-#define  B1500000  0010012
-#define  B2000000  0010013
-#define  B2500000  0010014
-#define  B3000000  0010015
-#define  B3500000  0010016
-#define  B4000000  0010017
-#define __MAX_BAUD B4000000
+#define  __BOTHER    0010000
+#define  __B57600    0010001
+#define  __B115200   0010002
+#define  __B230400   0010003
+#define  __B460800   0010004
+#define  __B500000   0010005
+#define  __B576000   0010006
+#define  __B921600   0010007
+#define  __B1000000  0010010
+#define  __B1152000  0010011
+#define  __B1500000  0010012
+#define  __B2000000  0010013
+#define  __B2500000  0010014
+#define  __B3000000  0010015
+#define  __B3500000  0010016
+#define  __B4000000  0010017
diff --git a/sysdeps/unix/sysv/linux/bits/termios-struct.h b/sysdeps/unix/sysv/linux/bits/termios-struct.h
index 4c501a5..0aba1a4 100644
--- a/sysdeps/unix/sysv/linux/bits/termios-struct.h
+++ b/sysdeps/unix/sysv/linux/bits/termios-struct.h
@@ -29,8 +29,15 @@ struct termios
     tcflag_t c_lflag;		/* local mode flags */
     cc_t c_line;			/* line discipline */
     cc_t c_cc[NCCS];		/* control characters */
-    speed_t c_ispeed;		/* input speed */
-    speed_t c_ospeed;		/* output speed */
+    /* Input and output baud rates.  */
+    __extension__ union {
+      speed_t __ispeed;
+      speed_t c_ispeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1
+    __extension__ union {
+      speed_t __ospeed;
+      speed_t c_ospeed;
+    };
 #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1
   };
diff --git a/sysdeps/unix/sysv/linux/bits/termios.h b/sysdeps/unix/sysv/linux/bits/termios.h
index 3bd1e22..20746a0 100644
--- a/sysdeps/unix/sysv/linux/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/bits/termios.h
@@ -24,35 +24,41 @@ typedef unsigned char	cc_t;
 typedef unsigned int	speed_t;
 typedef unsigned int	tcflag_t;
 
-#include <bits/termios-struct.h>
+#ifdef _TERMIOS_H
+# include <bits/termios-struct.h>
+#endif
+
 #include <bits/termios-c_cc.h>
 #include <bits/termios-c_iflag.h>
 #include <bits/termios-c_oflag.h>
 
 /* c_cflag bit meaning */
-#define  B0	0000000		/* hang up */
-#define  B50	0000001
-#define  B75	0000002
-#define  B110	0000003
-#define  B134	0000004
-#define  B150	0000005
-#define  B200	0000006
-#define  B300	0000007
-#define  B600	0000010
-#define  B1200	0000011
-#define  B1800	0000012
-#define  B2400	0000013
-#define  B4800	0000014
-#define  B9600	0000015
-#define  B19200	0000016
-#define  B38400	0000017
+#include <bits/termios-c_cflag.h>
+
 #ifdef __USE_MISC
-# define EXTA B19200
-# define EXTB B38400
+#define __B0	 0000000	/* hang up */
+#define __B50	 0000001
+#define __B75	 0000002
+#define __B110	 0000003
+#define __B134	 0000004
+#define __B150	 0000005
+#define __B200	 0000006
+#define __B300	 0000007
+#define __B600	 0000010
+#define __B1200	 0000011
+#define __B1800	 0000012
+#define __B2400	 0000013
+#define __B4800	 0000014
+#define __B9600  0000015
+#define __B19200 0000016
+#define __B38400 0000017
+#include <bits/termios-cbaud.h>
+
+# define __EXTA	 __B19200
+# define __EXTB	 __B38400
+# define BOTHER  __BOTHER
 #endif
-#include <bits/termios-baud.h>
 
-#include <bits/termios-c_cflag.h>
 #include <bits/termios-c_lflag.h>
 
 #ifdef __USE_MISC
@@ -74,3 +80,5 @@ typedef unsigned int	tcflag_t;
 #include <bits/termios-tcflow.h>
 
 #include <bits/termios-misc.h>
+
+#include <bits/termios-baud.h>
diff --git a/sysdeps/unix/sysv/linux/cfsetspeed.c b/sysdeps/unix/sysv/linux/cfsetspeed.c
new file mode 100644
index 0000000..8ce46f8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/cfsetspeed.c
@@ -0,0 +1,59 @@
+/* cfsetspeed(), Linux version.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <termios_internals.h>
+
+/* Set both the input and output baud rates stored in *TERMIOS_P to SPEED.  */
+int
+__cfsetspeed (struct termios *termios_p, speed_t speed)
+{
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
+
+  termios_p->c_ospeed = speed;
+  termios_p->c_ispeed = speed;
+  termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  termios_p->c_cflag |= cbaud | (cbaud << IBSHIFT);
+
+  return 0;
+}
+libc_hidden_def (__cfsetspeed)
+versioned_symbol (libc, __cfsetspeed, cfsetspeed, GLIBC_2_42);
+
+#if _TERMIOS_OLD_COMPAT
+
+int
+attribute_compat_text_section
+__old_cfsetspeed (old_termios_t *termios_p, speed_t speed)
+{
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise these fields don't exist in old_termios_t */
+  termios_p->c_ospeed = real_speed;
+  termios_p->c_ispeed = real_speed;
+#endif
+  termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  termios_p->c_cflag |= speed | (speed << IBSHIFT);
+
+  return 0;
+}
+compat_symbol (libc, __old_cfsetspeed, cfsetspeed, GLIBC_2_0);
+
+#endif /* _TERMIOS_OLD_COMPAT */
diff --git a/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c
new file mode 100644
index 0000000..b1df5ed
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c
@@ -0,0 +1,28 @@
+/* Linux close syscall implementation -- non-cancellable, no errno update.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <sysdep-cancel.h>
+#include <not-cancel.h>
+
+void
+__close_nocancel_nostatus (int fd)
+{
+  INTERNAL_SYSCALL_CALL (close, fd);
+}
+libc_hidden_def (__close_nocancel_nostatus)
diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
index a719a55..73fdba1 100644
--- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h
@@ -184,6 +184,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist
index f123757..6325fc1 100644
--- a/sysdeps/unix/sysv/linux/csky/libc.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libc.abilist
@@ -2789,6 +2789,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/csky/libm.abilist b/sysdeps/unix/sysv/linux/csky/libm.abilist
index 6560f3e..4ed463c 100644
--- a/sysdeps/unix/sysv/linux/csky/libm.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libm.abilist
@@ -913,6 +913,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
index dc592c5..d8ffab9 100644
--- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h
@@ -214,6 +214,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 326
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 275
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist
index 2dc85b9..86b3fbd 100644
--- a/sysdeps/unix/sysv/linux/hppa/libc.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist
@@ -2826,6 +2826,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/hppa/libm.abilist b/sysdeps/unix/sysv/linux/hppa/libm.abilist
index 2938d9d..d681d6e 100644
--- a/sysdeps/unix/sysv/linux/hppa/libm.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
index c10897f..196dfec 100644
--- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h
@@ -245,6 +245,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 342
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist
index 1e38217..6555592 100644
--- a/sysdeps/unix/sysv/linux/i386/libc.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libc.abilist
@@ -3009,6 +3009,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist
index e9f296c..de77b0f 100644
--- a/sysdeps/unix/sysv/linux/i386/libm.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libm.abilist
@@ -1308,6 +1308,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/if_index.c b/sysdeps/unix/sysv/linux/if_index.c
index 0b01fd1..5d13759 100644
--- a/sysdeps/unix/sysv/linux/if_index.c
+++ b/sysdeps/unix/sysv/linux/if_index.c
@@ -32,35 +32,23 @@
 unsigned int
 __if_nametoindex (const char *ifname)
 {
-#ifndef SIOCGIFINDEX
-  __set_errno (ENOSYS);
-  return 0;
-#else
-  struct ifreq ifr;
   if (strlen (ifname) >= IFNAMSIZ)
     {
       __set_errno (ENODEV);
       return 0;
     }
 
-  strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name));
-
   int fd = __opensock ();
-
   if (fd < 0)
     return 0;
 
-  if (__ioctl (fd, SIOCGIFINDEX, &ifr) < 0)
-    {
-      int saved_errno = errno;
-      __close_nocancel_nostatus (fd);
-      if (saved_errno == EINVAL)
-	__set_errno (ENOSYS);
-      return 0;
-    }
+  struct ifreq ifr;
+  strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name));
+
+  int status = __ioctl (fd, SIOCGIFINDEX, &ifr);
   __close_nocancel_nostatus (fd);
-  return ifr.ifr_ifindex;
-#endif
+
+  return status < 0 ? 0 : ifr.ifr_ifindex;
 }
 libc_hidden_def (__if_nametoindex)
 weak_alias (__if_nametoindex, if_nametoindex)
@@ -83,8 +71,8 @@ weak_alias (__if_freenameindex, if_freenameindex)
 libc_hidden_weak (if_freenameindex)
 
 
-static struct if_nameindex *
-if_nameindex_netlink (void)
+struct if_nameindex *
+__if_nameindex (void)
 {
   struct netlink_handle nh = { 0, 0, 0, NULL, NULL };
   struct if_nameindex *idx = NULL;
@@ -196,19 +184,6 @@ if_nameindex_netlink (void)
 
   return idx;
 }
-
-
-struct if_nameindex *
-__if_nameindex (void)
-{
-#ifndef SIOCGIFINDEX
-  __set_errno (ENOSYS);
-  return NULL;
-#else
-  struct if_nameindex *result = if_nameindex_netlink ();
-  return result;
-#endif
-}
 weak_alias (__if_nameindex, if_nameindex)
 libc_hidden_weak (if_nameindex)
 
diff --git a/sysdeps/unix/sysv/linux/isatty.c b/sysdeps/unix/sysv/linux/isatty.c
new file mode 100644
index 0000000..3faaec5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/isatty.c
@@ -0,0 +1,29 @@
+/* Test whether a file descriptor refers to a terminal.  Linux version.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <termios_internals.h>
+
+/* Return 1 if FD is a terminal, 0 if not. This simply does a
+   TCGETS2 ioctl into a dummy buffer without parsing the result. */
+int
+__isatty (int fd)
+{
+  struct termios2 k_termios;
+  return INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0;
+}
+weak_alias (__isatty, isatty)
diff --git a/sysdeps/unix/sysv/linux/isatty_nostatus.c b/sysdeps/unix/sysv/linux/isatty_nostatus.c
new file mode 100644
index 0000000..406decb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/isatty_nostatus.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <termios_internals.h>
+
+/* Return 1 if FD is a terminal, 0 if not, without changing errno  */
+int
+__isatty_nostatus (int fd)
+{
+  struct termios2 k_termios;
+  return INTERNAL_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0;
+}
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index 86b2d3c..a49a915 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -54,6 +54,10 @@
    configurations).  */
 #define __ASSUME_SET_ROBUST_LIST	1
 
+/* The termios2 interface was introduced across all architectures except
+   Alpha in kernel 2.6.22. */
+#define __ASSUME_TERMIOS2	1
+
 /* Support for various CLOEXEC and NONBLOCK flags was added in
    2.6.27.  */
 #define __ASSUME_IN_NONBLOCK	1
diff --git a/sysdeps/unix/sysv/linux/libc_sigaction.c b/sysdeps/unix/sysv/linux/libc_sigaction.c
index bbfc177..67dbc04 100644
--- a/sysdeps/unix/sysv/linux/libc_sigaction.c
+++ b/sysdeps/unix/sysv/linux/libc_sigaction.c
@@ -49,7 +49,7 @@ __libc_sigaction (int sig, const struct sigaction *act, struct sigaction *oact)
     {
       kact.k_sa_handler = act->sa_handler;
       memcpy (&kact.sa_mask, &act->sa_mask, sizeof (sigset_t));
-      kact.sa_flags = act->sa_flags;
+      kact.sa_flags = (unsigned int) act->sa_flags;
       SET_SA_RESTORER (&kact, act);
     }
 
diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
index f123d84..f57a152 100644
--- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h
@@ -171,6 +171,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
index 927fc21..a6cab96 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
@@ -2273,6 +2273,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
index 8e35285..4b3ea80 100644
--- a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist
@@ -1148,6 +1148,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
index 715809a..a95cb41 100644
--- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h
@@ -234,6 +234,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 341
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
index 74da49d..7b7b72a 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
@@ -2785,6 +2785,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
index 825ba11..63bad09 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
index e5d6781..df398e4 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
@@ -2952,6 +2952,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
index 45026df..9dba60b 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist
@@ -974,6 +974,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
index 24e218f..fe08f5c 100644
--- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h
@@ -244,6 +244,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 372
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
index 4dbd4b6..ca8df6f 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
@@ -2838,6 +2838,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
index a428778..5596e08 100644
--- a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
index c5965bb..9508154 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
@@ -2835,6 +2835,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
index a428778..5596e08 100644
--- a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/mips/Versions b/sysdeps/unix/sysv/linux/mips/Versions
index 9ea0fa6..48f0037 100644
--- a/sysdeps/unix/sysv/linux/mips/Versions
+++ b/sysdeps/unix/sysv/linux/mips/Versions
@@ -26,6 +26,10 @@ libc {
     pthread_attr_setstack;
     pthread_attr_setstacksize;
   }
+  GLIBC_2.42 {
+    tcgetattr;
+    tcsetattr;
+  }
   GLIBC_PRIVATE {
     # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime).
     __vdso_clock_gettime;
diff --git a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h b/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h
deleted file mode 100644
index ef69821..0000000
--- a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* struct termios definition.  Linux/mips version.
-   Copyright (C) 2019-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _TERMIOS_H
-# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead."
-#endif
-
-#define NCCS 32
-struct termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[NCCS];		/* control characters */
-#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0
-#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0
-  };
diff --git a/sysdeps/unix/sysv/linux/mips/kernel_termios.h b/sysdeps/unix/sysv/linux/mips/kernel_termios.h
deleted file mode 100644
index fd8d35a..0000000
--- a/sysdeps/unix/sysv/linux/mips/kernel_termios.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.24 kernel.  */
-
-#define __KERNEL_NCCS 23
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
index a7615cb..7d76d65 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h
@@ -229,6 +229,7 @@
 #define __NR_open 4005
 #define __NR_open_by_handle_at 4340
 #define __NR_open_tree 4428
+#define __NR_open_tree_attr 4467
 #define __NR_openat 4288
 #define __NR_openat2 4437
 #define __NR_pause 4029
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
index 10715e0..4d51cc4 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
@@ -2913,7 +2913,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
index 1e13743..cdcc488 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
index 3d229b9..7f90fad 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
@@ -2911,7 +2911,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
index 8182a71..888164b 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist
@@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
index 4d863c2..bca3ea6 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h
@@ -212,6 +212,7 @@
 #define __NR_open 6002
 #define __NR_open_by_handle_at 6304
 #define __NR_open_tree 6428
+#define __NR_open_tree_attr 6467
 #define __NR_openat 6251
 #define __NR_openat2 6437
 #define __NR_pause 6033
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
index e4cb452..fc366d1 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
@@ -2919,7 +2919,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
index 9b6683e..5bcd929 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h
@@ -201,6 +201,7 @@
 #define __NR_open 5002
 #define __NR_open_by_handle_at 5299
 #define __NR_open_tree 5428
+#define __NR_open_tree_attr 5467
 #define __NR_openat 5247
 #define __NR_openat2 5437
 #define __NR_pause 5033
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
index 8a32d25..debd5c3 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
@@ -2821,7 +2821,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/mips/termios_arch.h b/sysdeps/unix/sysv/linux/mips/termios_arch.h
new file mode 100644
index 0000000..392d9aa
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/termios_arch.h
@@ -0,0 +1,34 @@
+/* Architectural parameters for Linux termios - MIPS version
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _TERMIOS2_NCCS 23
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
+
+#define _HAVE_STRUCT_OLD_TERMIOS 1
+
+#define OLD_NCCS 32
+struct old_termios
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[OLD_NCCS];		/* control characters */
+};
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index ece3297..5ac6dd4 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -53,6 +53,9 @@ __typeof (__write) __write_nocancel;
 /* Uncancelable close.  */
 __typeof (__close) __close_nocancel;
 
+/* Uncancellable close that does not also set errno in case of failure.  */
+void __close_nocancel_nostatus (int);
+
 /* Uncancelable fcntl.  */
 int __fcntl64_nocancel (int, int, ...);
 
@@ -65,17 +68,10 @@ hidden_proto (__read_nocancel)
 hidden_proto (__pread64_nocancel)
 hidden_proto (__write_nocancel)
 hidden_proto (__close_nocancel)
+hidden_proto (__close_nocancel_nostatus)
 hidden_proto (__fcntl64_nocancel)
 #endif
 
-/* Non cancellable close syscall that does not also set errno in case of
-   failure.  */
-static inline void
-__close_nocancel_nostatus (int fd)
-{
-  __close_nocancel (fd);
-}
-
 /* Non cancellable writev syscall that does not also set errno in case of
    failure.  */
 static inline void
diff --git a/sysdeps/unix/sysv/linux/old_termios.h b/sysdeps/unix/sysv/linux/old_termios.h
new file mode 100644
index 0000000..56d19ba
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/old_termios.h
@@ -0,0 +1,23 @@
+/* old_termios.h for Linux other than MIPS and SPARC
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* By default, no old termios structure */
+#define _HAVE_STRUCT_OLD_TERMIOS 0
+#define OLD_NCCS NCCS
+typedef struct termios old_termios_t;
diff --git a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
index a071c76..c2a1d51 100644
--- a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h
@@ -183,6 +183,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_or1k_atomic 244
diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist
index 64dac95..b62d59f 100644
--- a/sysdeps/unix/sysv/linux/or1k/libc.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist
@@ -2263,6 +2263,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/or1k/libm.abilist b/sysdeps/unix/sysv/linux/or1k/libm.abilist
index 029c3cb..bef7a98 100644
--- a/sysdeps/unix/sysv/linux/or1k/libm.abilist
+++ b/sysdeps/unix/sysv/linux/or1k/libm.abilist
@@ -847,6 +847,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
index 9ea8cfb..a90d581 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h
@@ -35,5 +35,7 @@
 #define CLOCAL	00100000
 
 #ifdef __USE_MISC
-# define ADDRB 04000000000
+# define ADDRB    04000000000
+# define CMSPAR  010000000000 /* Mark or space (stick) parity.  */
+# define CRTSCTS 020000000000 /* Flow control.  */
 #endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h
index 374d9f8..7bcbba4 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h
@@ -17,29 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
-# define CBAUD	0000377
-# define CBAUDEX 0000020
-# define CMSPAR   010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
+# define CBAUD	    000000377
+# define CBAUDEX    000000020
+# define CIBAUD     077600000
+# define IBSHIFT    16
 #endif
 
-#define  B57600   00020
-#define  B115200  00021
-#define  B230400  00022
-#define  B460800  00023
-#define  B500000  00024
-#define  B576000  00025
-#define  B921600  00026
-#define  B1000000 00027
-#define  B1152000 00030
-#define  B1500000 00031
-#define  B2000000 00032
-#define  B2500000 00033
-#define  B3000000 00034
-#define  B3500000 00035
-#define  B4000000 00036
-#define __MAX_BAUD B4000000
+#define  __B57600   00020
+#define  __B115200  00021
+#define  __B230400  00022
+#define  __B460800  00023
+#define  __B500000  00024
+#define  __B576000  00025
+#define  __B921600  00026
+#define  __B1000000 00027
+#define  __B1152000 00030
+#define  __B1500000 00031
+#define  __B2000000 00032
+#define  __B2500000 00033
+#define  __B3000000 00034
+#define  __B3500000 00035
+#define  __B4000000 00036
+#define  __BOTHER   00037
diff --git a/sysdeps/unix/sysv/linux/powerpc/configure b/sysdeps/unix/sysv/linux/powerpc/configure
index 61ae675..ef2055d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/configure
+++ b/sysdeps/unix/sysv/linux/powerpc/configure
@@ -40,48 +40,7 @@ fi
 printf "%s\n" "$libc_cv_mlong_double_128ibm" >&6; }
 
 if test "$libc_cv_mlong_double_128ibm" = no; then
-  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC $CFLAGS supports -mabi=ibmlongdouble" >&5
-printf %s "checking whether $CC $CFLAGS supports -mabi=ibmlongdouble... " >&6; }
-if test ${libc_cv_mabi_ibmlongdouble+y}
-then :
-  printf %s "(cached) " >&6
-else case e in #(
-  e)   save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble"
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <float.h>
-int
-main (void)
-{
-
-#if LDBL_MANT_DIG != 106
-# error "compiler doesn't implement IBM extended format of long double"
-#endif
-long double foobar (long double x) { return x; }
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"
-then :
-  libc_cv_mabi_ibmlongdouble=yes
-else case e in #(
-  e) libc_cv_mabi_ibmlongdouble=no ;;
-esac
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
-  CFLAGS="$save_CFLAGS" ;;
-esac
-fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mabi_ibmlongdouble" >&5
-printf "%s\n" "$libc_cv_mabi_ibmlongdouble" >&6; }
-
-  if test "$libc_cv_mabi_ibmlongdouble" = yes; then
-    CFLAGS="$CFLAGS -mabi=ibmlongdouble"
-  else
-    as_fn_error $? "this configuration requires -mlong-double-128 IBM extended format support" "$LINENO" 5
-  fi
+  CFLAGS="$CFLAGS -mabi=ibmlongdouble"
 fi
 
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports --no-tls-get-addr-optimize" >&5
diff --git a/sysdeps/unix/sysv/linux/powerpc/configure.ac b/sysdeps/unix/sysv/linux/powerpc/configure.ac
index 8d2ec60..42347a6 100644
--- a/sysdeps/unix/sysv/linux/powerpc/configure.ac
+++ b/sysdeps/unix/sysv/linux/powerpc/configure.ac
@@ -16,24 +16,7 @@ long double foobar (long double x) { return x; }]])],
 CFLAGS="$save_CFLAGS"])
 
 if test "$libc_cv_mlong_double_128ibm" = no; then
-  AC_CACHE_CHECK(whether $CC $CFLAGS supports -mabi=ibmlongdouble,
-		 libc_cv_mabi_ibmlongdouble, [dnl
-  save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble"
-  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <float.h>]], [[
-#if LDBL_MANT_DIG != 106
-# error "compiler doesn't implement IBM extended format of long double"
-#endif
-long double foobar (long double x) { return x; }]])],
-		 libc_cv_mabi_ibmlongdouble=yes,
-		 libc_cv_mabi_ibmlongdouble=no)
-  CFLAGS="$save_CFLAGS"])
-
-  if test "$libc_cv_mabi_ibmlongdouble" = yes; then
-    CFLAGS="$CFLAGS -mabi=ibmlongdouble"
-  else
-    AC_MSG_ERROR([this configuration requires -mlong-double-128 IBM extended format support])
-  fi
+  CFLAGS="$CFLAGS -mabi=ibmlongdouble"
 fi
 
 LIBC_LINKER_FEATURE([--no-tls-get-addr-optimize], [-Wl,--no-tls-get-addr-optimize],
diff --git a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h b/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h
deleted file mode 100644
index f6ea570..0000000
--- a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-
-/* We need the definition of tcflag_t, cc_t, and speed_t.  */
-#include <termios.h>
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-    cc_t c_line;		/* line discipline */
-    speed_t c_ispeed;           /* input speed */
-    speed_t c_ospeed;           /* output speed */
-  };
-
-#define _HAVE_C_ISPEED 1
-#define _HAVE_C_OSPEED 1
-
-/* We have the kernel termios structure, so we can presume this code knows
-   what it's doing...  */
-
-#undef  TCGETS
-#undef  TCSETS
-#undef  TCSETSW
-#undef  TCSETSF
-#define TCGETS	_IOR ('t', 19, struct __kernel_termios)
-#define TCSETS	_IOW ('t', 20, struct __kernel_termios)
-#define TCSETSW	_IOW ('t', 21, struct __kernel_termios)
-#define TCSETSF	_IOW ('t', 22, struct __kernel_termios)
-
-#endif /* kernel_termios.h */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
index b3481e4..c371df8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h
@@ -235,6 +235,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 346
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 286
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
index cc5e93c..883e66f 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
@@ -3142,6 +3142,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
index a43cb2c..7f584d3 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist
@@ -1085,6 +1085,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
index 9814997..84cd9e0 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
@@ -3187,6 +3187,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
index 3a08e9f..d1cd4b1 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist
@@ -1084,6 +1084,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
index 45108e8..df8844d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h
@@ -220,6 +220,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 346
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 286
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
index 7f46295..8832568 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
@@ -2896,6 +2896,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
index 93796cd..bfc5310 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist
@@ -1078,6 +1078,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
index f24f81b..b6ff801 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
@@ -2972,6 +2972,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
index 7fe20c0..dedfefc 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist
@@ -1432,6 +1432,7 @@ GLIBC_2.41 tanpil F
 GLIBC_2.42 __compoundnieee128 F
 GLIBC_2.42 __pownieee128 F
 GLIBC_2.42 __powrieee128 F
+GLIBC_2.42 __rootnieee128 F
 GLIBC_2.42 __rsqrtieee128 F
 GLIBC_2.42 compoundn F
 GLIBC_2.42 compoundnf F
@@ -1457,6 +1458,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/powerpc/termios_arch.h b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h
new file mode 100644
index 0000000..919b437
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h
@@ -0,0 +1,33 @@
+/* Architectural parameters for Linux termios - PowerPC version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1
+#define _HAVE_STRUCT_OLD_TERMIOS 0
+
+/* PowerPC quirk: on PowerPC only, ioctl() emulates the TCGETS/TCSETS*
+   ioctls with tcgetattr/tcsetattr using the glibc struct termios.
+   As struct termios2 is the same as the kernel struct termios on PowerPC,
+   simply consider the kernel ones as the termios2 interface, even
+   though the kernel doesn't call it that. */
+
+#define TCGETS2	 _IOR ('t', 19, struct termios2)
+#define TCSETS2	 _IOW ('t', 20, struct termios2)
+#define TCSETSW2 _IOW ('t', 21, struct termios2)
+#define TCSETSF2 _IOW ('t', 22, struct termios2)
diff --git a/sysdeps/unix/sysv/linux/riscv/hwprobe.c b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
index e0cbd22..bc7f6f3 100644
--- a/sysdeps/unix/sysv/linux/riscv/hwprobe.c
+++ b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
@@ -23,13 +23,13 @@
 #include <sysdep-vdso.h>
 
 int __riscv_hwprobe (struct riscv_hwprobe *pairs, size_t pair_count,
-		     size_t cpu_count, unsigned long int *cpus,
+		     size_t cpusetsize, __RISCV_HWPROBE_CPUS_TYPE cpus,
 		     unsigned int flags)
 {
   int r;
 
   r = INTERNAL_VSYSCALL (riscv_hwprobe, 5, pairs, pair_count,
-                         cpu_count, cpus, flags);
+                         cpusetsize, cpus.__ul, flags);
 
   /* Negate negative errno values to match pthreads API. */
   return -r;
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
index 5333879..1bae763 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h
@@ -168,6 +168,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
index 9330c7a..1771a23 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
@@ -2516,6 +2516,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
index 454235d..9342294 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist
@@ -1148,6 +1148,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
index eed1dff..1a1ebf8 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h
@@ -175,6 +175,7 @@
 #define __NR_nfsservctl 42
 #define __NR_open_by_handle_at 265
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 56
 #define __NR_openat2 437
 #define __NR_perf_event_open 241
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
index ea4555d..4b48352 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
@@ -2716,6 +2716,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
index b01d2b4..76e74c9 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist
@@ -1245,6 +1245,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
index bebad6c..40415aa 100644
--- a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
+++ b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h
@@ -21,6 +21,7 @@
 #define _SYS_HWPROBE_H 1
 
 #include <features.h>
+#include <sched.h>
 #include <stddef.h>
 #include <errno.h>
 #ifdef __has_include
@@ -63,22 +64,39 @@ struct riscv_hwprobe {
 
 __BEGIN_DECLS
 
-extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs, size_t __pair_count,
-			    size_t __cpu_count, unsigned long int *__cpus,
+#if defined __cplusplus || !__GNUC_PREREQ (2, 7)
+# define __RISCV_HWPROBE_CPUS_TYPE cpu_set_t *
+#else
+/* The fourth argument to __riscv_hwprobe should be a null pointer or a
+   pointer to a cpu_set_t (either the fixed-size type or allocated with
+   CPU_ALLOC).  However, early versions of this header file used the
+   argument type unsigned long int *.  The transparent union allows
+   the argument to be either cpu_set_t * or unsigned long int * for
+   compatibility.  The older header file requiring unsigned long int *
+   can be identified by the lack of the __RISCV_HWPROBE_CPUS_TYPE macro.
+   In C++ and with compilers that do not support transparent unions, the
+   argument type must be cpu_set_t *.  */
+typedef union {
+	cpu_set_t *__cs;
+	unsigned long int *__ul;
+} __RISCV_HWPROBE_CPUS_TYPE __attribute__ ((__transparent_union__));
+# define __RISCV_HWPROBE_CPUS_TYPE __RISCV_HWPROBE_CPUS_TYPE
+#endif
+
+extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs,
+			    size_t __pair_count, size_t __cpusetsize,
+			    __RISCV_HWPROBE_CPUS_TYPE __cpus,
 			    unsigned int __flags)
-     __nonnull ((1)) __wur
-     __fortified_attr_access (__read_write__, 1, 2)
-     __fortified_attr_access (__read_only__, 4, 3);
+     __THROW __nonnull ((1)) __attr_access ((__read_write__, 1, 2));
 
-/* A pointer to the __riscv_hwprobe vDSO function is passed as the second
+/* A pointer to the __riscv_hwprobe function is passed as the second
    argument to ifunc selector routines. Include a function pointer type for
    convenience in calling the function in those settings. */
-typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs, size_t __pair_count,
-				  size_t __cpu_count, unsigned long int *__cpus,
+typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs,
+				  size_t __pair_count, size_t __cpusetsize,
+				  __RISCV_HWPROBE_CPUS_TYPE __cpus,
 				  unsigned int __flags)
-     __nonnull ((1)) __wur
-     __fortified_attr_access (__read_write__, 1, 2)
-     __fortified_attr_access (__read_only__, 4, 3);
+     __nonnull ((1)) __attr_access ((__read_write__, 1, 2));
 
 /* Helper function usable from ifunc selectors that probes a single key. */
 static __inline int
diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h
index ee015df..05e0e05 100644
--- a/sysdeps/unix/sysv/linux/riscv/sysdep.h
+++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h
@@ -145,11 +145,12 @@
 #  define HAVE_CLOCK_GETRES64_VSYSCALL	"__vdso_clock_getres"
 #  define HAVE_CLOCK_GETTIME64_VSYSCALL	"__vdso_clock_gettime"
 #  define HAVE_GETTIMEOFDAY_VSYSCALL	"__vdso_gettimeofday"
+#  define HAVE_GETRANDOM_VSYSCALL	"__vdso_getrandom"
 # else
 #  define VDSO_NAME	"LINUX_5.4"
 #  define VDSO_HASH	61765876
 
-/* RV32 does not support the gettime VDSO syscalls.  */
+/* RV32 does not support the gettime and getrandom VDSO syscalls.  */
 # endif
 # define HAVE_CLONE3_WRAPPER		1
 
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
index 0bf8f95..f77f39f 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h
@@ -232,6 +232,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 336
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
index 3e625fa..f0decc7 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
@@ -3140,6 +3140,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
index b37c0b5..be2d177 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist
@@ -1372,6 +1372,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
index 061f8db..65d6644 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h
@@ -204,6 +204,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 336
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 288
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
index 46b4a04..da8a2bf 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
@@ -2933,6 +2933,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
index 42bfa28..7d7ba26 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist
@@ -1372,6 +1372,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
index 52cc320..5948ab0 100644
--- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h
@@ -228,6 +228,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 360
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 295
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
index 36a94c9..fb30341 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist
@@ -2832,6 +2832,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/sh/be/libm.abilist b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
index 8ba29d2..5b0b080 100644
--- a/sysdeps/unix/sysv/linux/sh/be/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/be/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
index f79aba6..d716673 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist
@@ -2829,6 +2829,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/sh/le/libm.abilist b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
index 8ba29d2..5b0b080 100644
--- a/sysdeps/unix/sysv/linux/sh/le/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sh/le/libm.abilist
@@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F
 GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf32 F
diff --git a/sysdeps/unix/sysv/linux/sparc/Versions b/sysdeps/unix/sysv/linux/sparc/Versions
index f127bdf..7dd61a5 100644
--- a/sysdeps/unix/sysv/linux/sparc/Versions
+++ b/sysdeps/unix/sysv/linux/sparc/Versions
@@ -29,6 +29,10 @@ libc {
 
     __getshmlba;
   }
+  GLIBC_2.42 {
+    tcgetattr;
+    tcsetattr;
+  }
   GLIBC_PRIVATE {
     # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime).
     __vdso_clock_gettime;
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h
index 677db7b..34eba18 100644
--- a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h
+++ b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h
@@ -17,30 +17,29 @@
    <https://www.gnu.org/licenses/>.  */
 
 #ifndef _TERMIOS_H
-# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead."
+# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead."
 #endif
 
 #ifdef __USE_MISC
 # define CBAUD   0x0000100f
 # define CBAUDEX 0x00001000
-# define CIBAUD	 0x100f0000	/* input baud rate (not used) */
-# define CMSPAR  0x40000000	/* mark or space (stick) parity */
-# define CRTSCTS 0x80000000	/* flow control */
+# define CIBAUD	 0x100f0000	/* input baud rate */
+# define IBSHIFT 16
 #endif
 
-#define  B57600  0x00001001
-#define  B115200 0x00001002
-#define  B230400 0x00001003
-#define  B460800 0x00001004
-#define  B76800  0x00001005
-#define  B153600 0x00001006
-#define  B307200 0x00001007
-#define  B614400 0x00001008
-#define  B921600 0x00001009
-#define  B500000 0x0000100a
-#define  B576000 0x0000100b
-#define B1000000 0x0000100c
-#define B1152000 0x0000100d
-#define B1500000 0x0000100e
-#define B2000000 0x0000100f
-#define __MAX_BAUD B2000000
+#define  __B57600  0x00001001
+#define  __B115200 0x00001002
+#define  __B230400 0x00001003
+#define  __B460800 0x00001004
+#define  __B76800  0x00001005
+#define  __B153600 0x00001006
+#define  __B307200 0x00001007
+#define  __B614400 0x00001008
+#define  __B921600 0x00001009
+#define  __B500000 0x0000100a
+#define  __B576000 0x0000100b
+#define __B1000000 0x0000100c
+#define __B1152000 0x0000100d
+#define __B1500000 0x0000100e
+#define __B2000000 0x0000100f
+#define __BOTHER   0x00001000
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h
deleted file mode 100644
index 269ca9d..0000000
--- a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* struct termios definition.  Linux/sparc version.
-   Copyright (C) 2019-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#ifndef _TERMIOS_H
-# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead."
-#endif
-
-#define NCCS 17
-struct termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[NCCS];		/* control characters */
-#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0
-#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0
-  };
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
index ee870bc..85828a8 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h
@@ -230,6 +230,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 333
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 284
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
index 4a6acc0..6deedf2 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
@@ -3161,7 +3161,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
index 4d10689..8107101 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist
@@ -1379,6 +1379,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
index 3acbebe..d83ecd1 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h
@@ -211,6 +211,7 @@
 #define __NR_open 5
 #define __NR_open_by_handle_at 333
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 284
 #define __NR_openat2 437
 #define __NR_pause 29
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
index 931109d..1ce22bf 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
@@ -2797,7 +2797,19 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
+GLIBC_2.42 tcgetattr F
+GLIBC_2.42 tcsetattr F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
 GLIBC_2.42 ulabs F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
index 6c64126..418ed9d 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist
@@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/sparc/termios_arch.h b/sysdeps/unix/sysv/linux/sparc/termios_arch.h
new file mode 100644
index 0000000..f3b3f65
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sparc/termios_arch.h
@@ -0,0 +1,34 @@
+/* Architectural parameters for Linux termios - SPARC version
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
+
+#define _HAVE_STRUCT_OLD_TERMIOS 1
+
+#define OLD_NCCS 17
+struct old_termios
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[OLD_NCCS];		/* control characters */
+};
diff --git a/sysdeps/unix/sysv/linux/speed.c b/sysdeps/unix/sysv/linux/speed.c
index 017f741..4efb0de 100644
--- a/sysdeps/unix/sysv/linux/speed.c
+++ b/sysdeps/unix/sysv/linux/speed.c
@@ -16,82 +16,351 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <stddef.h>
-#include <errno.h>
-#include <termios.h>
-#include <sysdep.h>
+#include <termios_internals.h>
 
-/* This is a gross hack around a kernel bug.  If the cfsetispeed functions
-   is called with the SPEED argument set to zero this means use the same
-   speed as for output.  But we don't have independent input and output
-   speeds and therefore cannot record this.
+/* Conversions between legacy c_cflag fields and actual baud rates */
 
-   We use an unused bit in the `c_iflag' field to keep track of this
-   use of `cfsetispeed'.  The value here must correspond to the one used
-   in `tcsetattr.c'.  */
-#define IBAUD0	020000000000
+/* These expressions may seem complicated; the _cbix() macro
+   compresses the CBAUD field into an index in the range 0-31. On most
+   Linux platforms, the CBAUD field is 5 bits, but the topmost bit
+   indicated by CBAUDEX, is discontinous with the rest.
+
+   The resulting masks look like:
+
+		Alpha		PowerPC		others
+
+   CBAUD	0x001f		0x00ff		0x100f
+   CBAUDEX	0x0000		0x0010		0x1000
+
+   LOWCBAUD	0x001f		0x000f		0x000f
+   CBAUDMASK	0x001f		0x001f		0x100f
+
+   CBAUDMASK is used to test for invalid values passed to the
+   compatibility functions or in termios::c_cflag on PowerPC.
+
+   The divide-multiply sequence in the _cbix() macro gets converted
+   to shift and masks as necessary by the compiler. */
+
+#define LOWCBAUD (CBAUD & (CBAUDEX-1))
+#define _cbix(x) (((x) & LOWCBAUD) | \
+		  (CBAUDEX ? ((x) & CBAUDEX)/CBAUDEX * (LOWCBAUD+1) : 0))
+#define CBAUDMASK (LOWCBAUD | CBAUDEX)
+
+/* Compile time sanity checks for broken CBAUD or CIBAUD definitions */
+#if CIBAUD != (CBAUD << IBSHIFT)
+# error "CIBAUD should == CBAUD << IBSHIFT"
+#elif CBAUDEX & (CBAUDEX-1)
+# error "CBAUDEX should either be 0 or a single bit"
+#elif !(CBAUD & 1)
+# error "The CBAUD field should start at bit 0"
+#elif CBAUDEX & ~CBAUD
+# error "CBAUD should include the CBAUDEX bit"
+#endif
+
+speed_t
+___cbaud_to_speed (tcflag_t c_cflag, speed_t other)
+{
+  static const speed_t cbaudix_to_speed [] =
+    {
+      [0 ... _cbix(CBAUDMASK)] = -1,
+      [_cbix(__B0)] = 0,
+      [_cbix(__B50)] = 50,
+      [_cbix(__B75)] = 75,
+      [_cbix(__B110)] = 110,
+      [_cbix(__B134)] = 134,
+      [_cbix(__B150)] = 150,
+      [_cbix(__B200)] = 200,
+      [_cbix(__B300)] = 300,
+      [_cbix(__B600)] = 600,
+      [_cbix(__B1200)] = 1200,
+      [_cbix(__B1800)] = 1800,
+      [_cbix(__B2400)] = 2400,
+      [_cbix(__B4800)] = 4800,
+      [_cbix(__B9600)] = 9600,
+      [_cbix(__B19200)] = 19200,
+      [_cbix(__B38400)] = 38400,
+      [_cbix(__B57600)] = 57600,
+      [_cbix(__B115200)] = 115200,
+      [_cbix(__B230400)] = 230400,
+      [_cbix(__B460800)] = 460800,
+      [_cbix(__B500000)] = 500000,
+      [_cbix(__B576000)] = 576000,
+      [_cbix(__B921600)] = 921600,
+      [_cbix(__B1000000)] = 1000000,
+      [_cbix(__B1152000)] = 1152000,
+      [_cbix(__B1500000)] = 1500000,
+      [_cbix(__B2000000)] = 2000000,
+#ifdef __B7200
+      [_cbix(__B7200)] = 7200,
+#endif
+#ifdef __B14400
+      [_cbix(__B14400)] = 14400,
+#endif
+#ifdef __B28800
+      [_cbix(__B28800)] = 28800,
+#endif
+#ifdef __B76800
+      [_cbix(__B76800)] = 76800,
+#endif
+#ifdef __B153600
+      [_cbix(__B153600)] = 153600,
+#endif
+#ifdef __B307200
+      [_cbix(__B307200)] = 307200,
+#endif
+#ifdef __B614400
+      [_cbix(__B614400)] = 614400,
+#endif
+#ifdef __B2500000
+      [_cbix(__B2500000)] = 2500000,
+#endif
+#ifdef __B3000000
+      [_cbix(__B3000000)] = 3000000,
+#endif
+#ifdef __B3500000
+      [_cbix(__B3500000)] = 3500000,
+#endif
+#ifdef __B4000000
+      [_cbix(__B4000000)] = 4000000,
+#endif
+    };
+  speed_t speed;
+
+  if (c_cflag & (tcflag_t)(~CBAUDMASK))
+    return other;
+
+  speed = cbaudix_to_speed[_cbix(c_cflag)];
+  return speed == (speed_t)-1 ? other : speed;
+}
+
+tcflag_t
+___speed_to_cbaud (speed_t speed)
+{
+  switch (speed) {
+  case 0:
+    return __B0;
+  case 50:
+    return __B50;
+  case 75:
+    return __B75;
+  case 110:
+    return __B110;
+  case 134:
+    return __B134;
+  case 150:
+    return __B150;
+  case 200:
+    return __B200;
+  case 300:
+    return __B300;
+  case 600:
+    return __B600;
+  case 1200:
+    return __B1200;
+  case 1800:
+    return __B1800;
+  case 2400:
+    return __B2400;
+  case 4800:
+    return __B4800;
+  case 9600:
+    return __B9600;
+  case 19200:
+    return __B19200;
+  case 38400:
+    return __B38400;
+  case 57600:
+    return __B57600;
+  case 115200:
+    return __B115200;
+  case 230400:
+    return __B230400;
+  case 460800:
+    return __B460800;
+  case 500000:
+    return __B500000;
+  case 576000:
+    return __B576000;
+  case 921600:
+    return __B921600;
+  case 1000000:
+    return __B1000000;
+  case 1152000:
+    return __B1152000;
+  case 1500000:
+    return __B1500000;
+  case 2000000:
+    return __B2000000;
+#ifdef __B76800
+  case 76800:
+    return __B76800;
+#endif
+#ifdef __B153600
+  case 153600:
+    return __B153600;
+#endif
+#ifdef __B307200
+  case 307200:
+    return __B307200;
+#endif
+#ifdef __B614400
+  case 614400:
+    return __B614400;
+#endif
+#ifdef __B2500000
+  case 2500000:
+    return __B2500000;
+#endif
+#ifdef __B3000000
+  case 3000000:
+    return __B3000000;
+#endif
+#ifdef __B3500000
+  case 3500000:
+    return __B3500000;
+#endif
+#ifdef __B4000000
+  case 4000000:
+    return __B4000000;
+#endif
+  default:
+    return __BOTHER;
+  }
+}
+
+
+/* Canonicalize the representation of speed fields in a kernel
+   termios2 structure.  Specifically, if there is a valid legacy cbaud
+   representation (not __BOTHER), use it and propagate the
+   corresponding speed value to ispeed/ospeed, otherwise the other way
+   around if possible.  Finally, if the input speed is zero, copy the
+   output speed to the input speed.
+
+   The kernel doesn't do this canonicalization, which can affect
+   legacy utilities, so do it here.
+
+   This is used by tcgetattr() and tcsetattr(). */
+void
+___termios2_canonicalize_speeds (struct termios2 *k_termios_p)
+{
+  k_termios_p->c_ospeed =
+      ___cbaud_to_speed (cbaud (k_termios_p->c_cflag),  k_termios_p->c_ospeed);
+  k_termios_p->c_ispeed =
+      ___cbaud_to_speed (cibaud (k_termios_p->c_cflag), k_termios_p->c_ispeed);
+
+  if (!k_termios_p->c_ispeed)
+    k_termios_p->c_ispeed = k_termios_p->c_ospeed;
+
+  k_termios_p->c_cflag &= ~(CBAUD | CIBAUD);
+  k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ospeed);
+  k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ispeed) << IBSHIFT;
+}
 
 
 /* Return the output baud rate stored in *TERMIOS_P.  */
 speed_t
-cfgetospeed (const struct termios *termios_p)
+__cfgetospeed (const struct termios *termios_p)
 {
-  return termios_p->c_cflag & (CBAUD | CBAUDEX);
+  return termios_p->c_ospeed;
 }
+libc_hidden_def (__cfgetospeed)
+versioned_symbol (libc, __cfgetospeed, cfgetospeed, GLIBC_2_42);
 
-/* Return the input baud rate stored in *TERMIOS_P.
-   Although for Linux there is no difference between input and output
-   speed, the numerical 0 is a special case for the input baud rate. It
-   should set the input baud rate to the output baud rate. */
+/* Return the input baud rate stored in *TERMIOS_P.  */
 speed_t
-cfgetispeed (const struct termios *termios_p)
+__cfgetispeed (const struct termios *termios_p)
 {
-  return ((termios_p->c_iflag & IBAUD0)
-	  ? 0 : termios_p->c_cflag & (CBAUD | CBAUDEX));
+  return termios_p->c_ispeed;
 }
+libc_hidden_def (__cfgetispeed)
+versioned_symbol (libc, __cfgetispeed, cfgetispeed, GLIBC_2_42);
 
 /* Set the output baud rate stored in *TERMIOS_P to SPEED.  */
 int
-cfsetospeed (struct termios *termios_p, speed_t speed)
+__cfsetospeed (struct termios *termios_p, speed_t speed)
 {
-  if ((speed & ~CBAUD) != 0
-      && (speed < B57600 || speed > __MAX_BAUD))
-    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
 
-#if _HAVE_STRUCT_TERMIOS_C_OSPEED
   termios_p->c_ospeed = speed;
+  termios_p->c_cflag &= ~CBAUD;
+  termios_p->c_cflag |= cbaud;
+
+  return 0;
+}
+libc_hidden_def (__cfsetospeed)
+versioned_symbol (libc, __cfsetospeed, cfsetospeed, GLIBC_2_42);
+
+/* Set the input baud rate stored in *TERMIOS_P to SPEED. */
+int
+__cfsetispeed (struct termios *termios_p, speed_t speed)
+{
+  tcflag_t cbaud = ___speed_to_cbaud (speed);
+
+  termios_p->c_ispeed = speed;
+  termios_p->c_cflag &= ~CIBAUD;
+  termios_p->c_cflag |= cbaud << IBSHIFT;
+
+  return 0;
+}
+libc_hidden_def (__cfsetispeed)
+versioned_symbol (libc, __cfsetispeed, cfsetispeed, GLIBC_2_42);
+
+#if _TERMIOS_OLD_COMPAT
+
+/* Legacy versions which returns cbaud-encoded speed_t values */
+
+speed_t
+attribute_compat_text_section
+__old_cfgetospeed (const old_termios_t *termios_p)
+{
+  return cbaud (termios_p->c_cflag);
+}
+compat_symbol (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0);
+
+speed_t
+attribute_compat_text_section
+__old_cfgetispeed (const old_termios_t *termios_p)
+{
+  return cibaud (termios_p->c_cflag);
+}
+compat_symbol (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0);
+
+int
+attribute_compat_text_section
+__old_cfsetospeed (old_termios_t *termios_p, speed_t speed)
+{
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise this field doesn't exist in old_termios_t */
+  termios_p->c_ospeed = real_speed;
 #endif
-  termios_p->c_cflag &= ~(CBAUD | CBAUDEX);
+  termios_p->c_cflag &= ~CBAUD;
   termios_p->c_cflag |= speed;
 
   return 0;
 }
-libc_hidden_def (cfsetospeed)
+compat_symbol (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0);
 
-
-/* Set the input baud rate stored in *TERMIOS_P to SPEED.
-   Although for Linux there is no difference between input and output
-   speed, the numerical 0 is a special case for the input baud rate.  It
-   should set the input baud rate to the output baud rate.  */
 int
-cfsetispeed (struct termios *termios_p, speed_t speed)
+attribute_compat_text_section
+__old_cfsetispeed (old_termios_t *termios_p, speed_t speed)
 {
-  if ((speed & ~CBAUD) != 0
-      && (speed < B57600 || speed > __MAX_BAUD))
+  speed_t real_speed = ___cbaud_to_speed (speed, -1);
+  if (real_speed == (speed_t)-1)
     return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
 
-#if _HAVE_STRUCT_TERMIOS_C_ISPEED
-  termios_p->c_ispeed = speed;
+#if !_HAVE_STRUCT_OLD_TERMIOS
+  /* Otherwise this field doesn't exist in old_termios_t */
+  termios_p->c_ispeed = real_speed;
 #endif
-  if (speed == 0)
-    termios_p->c_iflag |= IBAUD0;
-  else
-    {
-      termios_p->c_iflag &= ~IBAUD0;
-      termios_p->c_cflag &= ~(CBAUD | CBAUDEX);
-      termios_p->c_cflag |= speed;
-    }
+  termios_p->c_cflag &= ~CIBAUD;
+  termios_p->c_cflag |= speed << IBSHIFT;
 
   return 0;
 }
-libc_hidden_def (cfsetispeed)
+compat_symbol (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0);
+
+#endif /* _TERMIOS_OLD_COMPAT */
diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list
index 6f3351a..bde20e4 100644
--- a/sysdeps/unix/sysv/linux/syscall-names.list
+++ b/sysdeps/unix/sysv/linux/syscall-names.list
@@ -21,8 +21,8 @@
 # This file can list all potential system calls.  The names are only
 # used if the installed kernel headers also provide them.
 
-# The list of system calls is current as of Linux 6.14.
-kernel 6.14
+# The list of system calls is current as of Linux 6.15.
+kernel 6.15
 
 FAST_atomic_update
 FAST_cmpxchg
@@ -316,6 +316,7 @@ olduname
 open
 open_by_handle_at
 open_tree
+open_tree_attr
 openat
 openat2
 or1k_atomic
diff --git a/sysdeps/unix/sysv/linux/tcgetattr.c b/sysdeps/unix/sysv/linux/tcgetattr.c
index d672e0c..ca17569 100644
--- a/sysdeps/unix/sysv/linux/tcgetattr.c
+++ b/sysdeps/unix/sysv/linux/tcgetattr.c
@@ -15,66 +15,56 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
-#include <string.h>
-#include <termios.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sysdep.h>
-
-/* The difference here is that the termios structure used in the
-   kernel is not the same as we use in the libc.  Therefore we must
-   translate it here.  */
-#include <kernel_termios.h>
+#include <termios_internals.h>
 
 /* Put the state of FD into *TERMIOS_P.  */
 int
 __tcgetattr (int fd, struct termios *termios_p)
 {
-  struct __kernel_termios k_termios;
-  int retval;
-
-  retval = INLINE_SYSCALL (ioctl, 3, fd, TCGETS, &k_termios);
+  struct termios2 k_termios;
+  long int retval = INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios);
 
-  if (__glibc_likely (retval == 0))
+  if (__glibc_likely (retval != -1))
     {
-      termios_p->c_iflag = k_termios.c_iflag;
-      termios_p->c_oflag = k_termios.c_oflag;
-      termios_p->c_cflag = k_termios.c_cflag;
-      termios_p->c_lflag = k_termios.c_lflag;
-      termios_p->c_line = k_termios.c_line;
-#if _HAVE_STRUCT_TERMIOS_C_ISPEED
-# if _HAVE_C_ISPEED
-      termios_p->c_ispeed = k_termios.c_ispeed;
-# else
-      termios_p->c_ispeed = k_termios.c_cflag & (CBAUD | CBAUDEX);
-# endif
-#endif
-#if _HAVE_STRUCT_TERMIOS_C_OSPEED
-# if _HAVE_C_OSPEED
+      ___termios2_canonicalize_speeds (&k_termios);
+
+      memset (termios_p, 0, sizeof (*termios_p));
+      termios_p->c_iflag  = k_termios.c_iflag;
+      termios_p->c_oflag  = k_termios.c_oflag;
+      termios_p->c_cflag  = k_termios.c_cflag;
+      termios_p->c_lflag  = k_termios.c_lflag;
+      termios_p->c_line   = k_termios.c_line;
       termios_p->c_ospeed = k_termios.c_ospeed;
-# else
-      termios_p->c_ospeed = k_termios.c_cflag & (CBAUD | CBAUDEX);
-# endif
-#endif
-      if (sizeof (cc_t) == 1 || _POSIX_VDISABLE == 0
-	  || (unsigned char) _POSIX_VDISABLE == (unsigned char) -1)
-	memset (__mempcpy (&termios_p->c_cc[0], &k_termios.c_cc[0],
-			   __KERNEL_NCCS * sizeof (cc_t)),
-		_POSIX_VDISABLE, (NCCS - __KERNEL_NCCS) * sizeof (cc_t));
-      else
-	{
-	  memcpy (&termios_p->c_cc[0], &k_termios.c_cc[0],
-		  __KERNEL_NCCS * sizeof (cc_t));
+      termios_p->c_ispeed = k_termios.c_ispeed;
 
-	  for (size_t cnt = __KERNEL_NCCS; cnt < NCCS; ++cnt)
-	    termios_p->c_cc[cnt] = _POSIX_VDISABLE;
-	}
+      copy_c_cc (termios_p->c_cc, NCCS, k_termios.c_cc, _TERMIOS2_NCCS);
     }
 
   return retval;
 }
-
 libc_hidden_def (__tcgetattr)
+
+#if _TERMIOS_OLD_COMPAT && _HAVE_STRUCT_OLD_TERMIOS
+
+versioned_symbol (libc, __tcgetattr, tcgetattr, GLIBC_2_42);
+
+/* Legacy version for shorter struct termios */
+int
+attribute_compat_text_section
+__old_tcgetattr (int fd, old_termios_t *termios_p)
+{
+  struct termios new_termios;
+  int retval = __tcgetattr (fd, &new_termios);
+  if (__glibc_likely (retval != -1))
+    {
+      memcpy (termios_p, &new_termios, sizeof (*termios_p));
+    }
+  return retval;
+}
+compat_symbol (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0);
+
+#else
+
 weak_alias (__tcgetattr, tcgetattr)
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/tcsetattr.c b/sysdeps/unix/sysv/linux/tcsetattr.c
index 5a13ad8..4f07a03 100644
--- a/sysdeps/unix/sysv/linux/tcsetattr.c
+++ b/sysdeps/unix/sysv/linux/tcsetattr.c
@@ -15,67 +15,94 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
-#include <string.h>
-#include <termios.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sysdep.h>
-
-/* The difference here is that the termios structure used in the
-   kernel is not the same as we use in the libc.  Therefore we must
-   translate it here.  */
-#include <kernel_termios.h>
-
-
-/* This is a gross hack around a kernel bug.  If the cfsetispeed functions
-   is called with the SPEED argument set to zero this means use the same
-   speed as for output.  But we don't have independent input and output
-   speeds and therefore cannot record this.
-
-   We use an unused bit in the `c_iflag' field to keep track of this
-   use of `cfsetispeed'.  The value here must correspond to the one used
-   in `speed.c'.  */
-#define IBAUD0	020000000000
+#include <termios_internals.h>
 
+#define static_assert_equal(x,y) _Static_assert ((x) == (y), #x " != " #y)
 
 /* Set the state of FD to *TERMIOS_P.  */
 int
 __tcsetattr (int fd, int optional_actions, const struct termios *termios_p)
 {
-  struct __kernel_termios k_termios;
-  unsigned long int cmd;
+  struct termios2 k_termios;
+  unsigned long cmd;
 
-  switch (optional_actions)
-    {
-    case TCSANOW:
-      cmd = TCSETS;
-      break;
-    case TCSADRAIN:
-      cmd = TCSETSW;
-      break;
-    case TCSAFLUSH:
-      cmd = TCSETSF;
-      break;
-    default:
-      return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
-    }
+  memset (&k_termios, 0, sizeof k_termios);
 
-  k_termios.c_iflag = termios_p->c_iflag & ~IBAUD0;
+  k_termios.c_iflag = termios_p->c_iflag;
   k_termios.c_oflag = termios_p->c_oflag;
   k_termios.c_cflag = termios_p->c_cflag;
   k_termios.c_lflag = termios_p->c_lflag;
-  k_termios.c_line = termios_p->c_line;
-#if _HAVE_C_ISPEED && _HAVE_STRUCT_TERMIOS_C_ISPEED
-  k_termios.c_ispeed = termios_p->c_ispeed;
-#endif
-#if _HAVE_C_OSPEED && _HAVE_STRUCT_TERMIOS_C_OSPEED
+  k_termios.c_line  = termios_p->c_line;
+
   k_termios.c_ospeed = termios_p->c_ospeed;
-#endif
-  memcpy (&k_termios.c_cc[0], &termios_p->c_cc[0],
-	  __KERNEL_NCCS * sizeof (cc_t));
+  k_termios.c_ispeed = termios_p->c_ispeed;
+
+  ___termios2_canonicalize_speeds (&k_termios);
+
+  copy_c_cc (k_termios.c_cc, _TERMIOS2_NCCS, termios_p->c_cc, NCCS);
+
+  /*
+   * Choose the proper ioctl number to invoke.
+   *
+   * Alpha got TCSETS2 late (Linux 4.20), but has the same structure
+   * format, and it only needs TCSETS2 if either it needs to use
+   * __BOTHER or split speed.  All other architectures have TCSETS2 as
+   * far back as the current glibc supports.  Calling TCSETS with
+   * __BOTHER causes unpredictable results on old Alpha kernels and
+   * could even crash them.
+   */
+  static_assert_equal(TCSADRAIN, TCSANOW + 1);
+  static_assert_equal(TCSAFLUSH, TCSANOW + 2);
+  static_assert_equal(TCSETSW2,  TCSETS2 + 1);
+  static_assert_equal(TCSETSF2,  TCSETS2 + 2);
+  static_assert_equal(TCSETSW,   TCSETS  + 1);
+  static_assert_equal(TCSETSF,   TCSETS  + 2);
+
+  cmd = (long)optional_actions - TCSANOW;
+  if (cmd > 2)
+    return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL);
+
+  if (__ASSUME_TERMIOS2 ||
+      k_termios.c_ospeed != k_termios.c_ispeed ||
+      cbaud (k_termios.c_cflag) == __BOTHER)
+    {
+      cmd += TCSETS2;
+    }
+  else
+    {
+      cmd += TCSETS;
+      k_termios.c_cflag &= ~CIBAUD;
+    }
+
+  return INLINE_SYSCALL_CALL (ioctl, fd, cmd, &k_termios);
+}
+libc_hidden_def (__tcsetattr)
+
+#if _HAVE_STRUCT_OLD_TERMIOS && _TERMIOS_OLD_COMPAT
+
+versioned_symbol (libc, __tcsetattr, tcsetattr, GLIBC_2_42);
 
-  return INLINE_SYSCALL (ioctl, 3, fd, cmd, &k_termios);
+/* Legacy version for shorter struct termios without speed fields */
+int
+attribute_compat_text_section
+__old_tcsetattr (int fd, int optional_actions, const old_termios_t *termios_p)
+{
+  struct termios new_termios;
+
+  memset (&new_termios, 0, sizeof (new_termios));
+  new_termios.c_iflag  = termios_p->c_iflag;
+  new_termios.c_oflag  = termios_p->c_oflag;
+  new_termios.c_cflag  = termios_p->c_cflag;
+  new_termios.c_lflag  = termios_p->c_lflag;
+  new_termios.c_line   = termios_p->c_line;
+  copy_c_cc(new_termios.c_cc, NCCS, termios_p->c_cc, OLD_NCCS);
+
+  return __tcsetattr (fd, optional_actions, &new_termios);
 }
+compat_symbol (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0);
+
+#else
+
 weak_alias (__tcsetattr, tcsetattr)
-libc_hidden_def (tcsetattr)
+
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/unix/sysv/linux/termios_arch.h
index 4387908..8dbf420 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
+++ b/sysdeps/unix/sysv/linux/termios_arch.h
@@ -1,4 +1,6 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+/* Architectural parameters for Linux termios - generic version
+
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,11 +17,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
-#define STRNCMP __strncmp_power10
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
+#define _TERMIOS2_NCCS 19
+#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0
 
-#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S>
-#endif
+#define _HAVE_STRUCT_OLD_TERMIOS 0
diff --git a/sysdeps/unix/sysv/linux/termios_internals.h b/sysdeps/unix/sysv/linux/termios_internals.h
new file mode 100644
index 0000000..e8dbfe7
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/termios_internals.h
@@ -0,0 +1,143 @@
+/* termios functions internal implementation header for Linux
+
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef TERMIOS_INTERNALS_H
+#define TERMIOS_INTERNALS_H 1
+
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sysdep.h>
+#include <shlib-compat.h>
+
+#include <termios_arch.h>
+
+/* ---- Kernel interface definitions ---- */
+
+/* The the termios2 structure used in the kernel interfaces is not the
+   same as the termios structure we use in the libc.  Therefore we
+   must translate it here.  */
+
+struct termios2
+{
+  tcflag_t c_iflag;		/* input mode flags */
+  tcflag_t c_oflag;		/* output mode flags */
+  tcflag_t c_cflag;		/* control mode flags */
+  tcflag_t c_lflag;		/* local mode flags */
+#if _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE
+  cc_t c_cc[_TERMIOS2_NCCS];	/* control characters */
+  cc_t c_line;			/* line discipline */
+#else
+  cc_t c_line;			/* line discipline */
+  cc_t c_cc[_TERMIOS2_NCCS];	/* control characters */
+#endif
+  speed_t c_ispeed;		/* input speed */
+  speed_t c_ospeed;		/* output speed */
+};
+
+/* Alpha got termios2 late, but TCGETS has exactly the same structure
+   format and function as TCGETS2. On all other platforms, the termios2
+   interface exists as far back as this version of glibc supports.
+
+   For TCSETS* it is more complicated; this is handled in tcsetattr.c.
+
+   Some other architectures only have the equivalent of the termios2
+   interface, in which case the old ioctl names are the only ones
+   presented, but are equivalent to the new ones. */
+#ifndef TCGETS2
+# define TCGETS2  TCGETS
+# define TCSETS2  TCSETS
+# define TCSETSW2 TCSETSW
+# define TCSETSF2 TCSETSF
+#elif !__ASSUME_TERMIOS2
+/* Hack for Alpha */
+# undef  TCGETS2
+# define TCGETS2 TCGETS
+#endif
+
+/* ---- Application interface definitions ---- */
+
+/*
+ * Should old speed_t and struct termios (if applicable) compatibility
+ * functions be included?
+ */
+#define _TERMIOS_OLD_COMPAT SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_42)
+
+/*
+ * Old struct termios (without c_ispeed and c_ospeed fields) if
+ * applicable. The new struct termios *must* be binary identical up to
+ * the sizeof the old structure.
+ *
+ * This only applies to SPARC and MIPS; for other architectures the
+ * new and old speed_t interfaces both use the same struct termios.
+ */
+#if _HAVE_STRUCT_OLD_TERMIOS
+typedef struct old_termios old_termios_t;
+#else
+# define OLD_NCCS NCCS
+typedef struct termios old_termios_t;
+#endif
+
+/* ---- Internal function definitions ---- */
+
+/*
+ * Copy a set of c_cc fields of possibly different width. If the target
+ * field is longer, then fill with _POSIX_VDISABLE == -1.
+ */
+static inline void
+copy_c_cc (cc_t *to, size_t nto, const cc_t *from, size_t nfrom)
+{
+  if (nto < nfrom)
+    nfrom = nto;
+
+  to = __mempcpy (to, from, nfrom * sizeof(cc_t));
+  if (nto > nfrom)
+    memset (to, _POSIX_VDISABLE, (nto - nfrom) * sizeof(cc_t));
+}
+
+/* Extract the output and input legacy speed fields from c_cflag. */
+static inline tcflag_t
+cbaud (tcflag_t c_cflag)
+{
+  return c_cflag & CBAUD;
+}
+
+static inline tcflag_t
+cibaud (tcflag_t c_cflag)
+{
+  return cbaud (c_cflag >> IBSHIFT);
+}
+
+extern speed_t
+___cbaud_to_speed (tcflag_t c_cflag, speed_t other)
+    __attribute_const__ attribute_hidden;
+
+extern tcflag_t
+___speed_to_cbaud (speed_t speed)
+    __attribute_const__ attribute_hidden;
+
+extern void
+___termios2_canonicalize_speeds (struct termios2 *k_termios_p)
+    attribute_hidden;
+
+#endif /* TERMIOS_INTERNALS_H */
diff --git a/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c b/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c
new file mode 100644
index 0000000..14fdf82
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c
@@ -0,0 +1,239 @@
+/* Test for copy_file_range with large sizes (bug 33245).
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This test exercises copy_file_range with various large file sizes
+   on FUSE filesystems to verify proper handling of system call return
+   values.  No data is actually copied.  */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/fuse.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xthread.h>
+#include <support/xunistd.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static void
+fuse_thread (struct support_fuse *f, void *closure)
+{
+  /* Node IDs for our test files.  */
+  enum { NODE_SOURCE = 2, NODE_DEST = 3 };
+  /* A large size, so that the kernel does not fail the
+     copy_file_range attempt before performing the FUSE callback.
+     Only the source file size matters to the kernel, but both files
+     use the same size for simplicity.  */
+  const uint64_t file_size = 1LLU << 61;
+
+  struct fuse_in_header *inh;
+  while ((inh = support_fuse_next (f)) != NULL)
+    {
+      if (support_fuse_handle_mountpoint (f)
+          || (inh->nodeid == 1 && support_fuse_handle_directory (f)))
+        continue;
+
+      switch (inh->opcode)
+        {
+        case FUSE_LOOKUP:
+          {
+            char *name = support_fuse_cast (LOOKUP, inh);
+            int node = 0;
+            if (inh->nodeid == 1 && strcmp (name, "source") == 0)
+              node = NODE_SOURCE;
+            else if (inh->nodeid == 1 && strcmp (name, "dest") == 0)
+              node = NODE_DEST;
+
+            if (node != 0)
+              {
+                struct fuse_entry_out *out
+                  = support_fuse_prepare_entry (f, node);
+                out->attr.mode = S_IFREG | 0600;
+                out->attr.size = file_size;
+                support_fuse_reply_prepared (f);
+              }
+            else
+              support_fuse_reply_error (f, ENOENT);
+          }
+          break;
+
+        case FUSE_OPEN:
+          /* File open */
+          {
+            if (inh->nodeid == NODE_SOURCE || inh->nodeid == NODE_DEST)
+              {
+                struct fuse_open_out out = { .fh = inh->nodeid };
+                support_fuse_reply (f, &out, sizeof (out));
+              }
+            else
+              support_fuse_reply_error (f, ENOENT);
+          }
+          break;
+
+        case FUSE_GETATTR:
+          /* Get file attributes */
+          if (inh->nodeid == NODE_SOURCE || inh->nodeid == NODE_DEST)
+            {
+              struct fuse_attr_out *out = support_fuse_prepare_attr (f);
+              out->attr.mode = S_IFREG | 0600;
+              out->attr.size = file_size;
+              support_fuse_reply_prepared (f);
+            }
+          else
+            support_fuse_reply_error (f, ENOENT);
+          break;
+
+        case FUSE_COPY_FILE_RANGE:
+          {
+            struct fuse_copy_file_range_in *p
+              = support_fuse_cast (COPY_FILE_RANGE, inh);
+
+            /* Verify this is a copy from source to dest, starting at
+               offset 0.  */
+            TEST_COMPARE (p->fh_in, NODE_SOURCE);
+            TEST_COMPARE (p->nodeid_out, NODE_DEST);
+            TEST_COMPARE (p->off_in, 0);
+            TEST_COMPARE (p->off_out, 0);
+            TEST_VERIFY (p->len > 0);
+            TEST_VERIFY (p->len <= file_size);
+
+            /* Pretend the copy succeeded.  */
+            struct fuse_write_out out = { .size = p->len };
+            support_fuse_reply (f, &out, sizeof (out));
+          }
+          break;
+
+        case FUSE_FLUSH:
+          support_fuse_reply_empty (f);
+          break;
+
+        default:
+          support_fuse_reply_error (f, EIO);
+        }
+    }
+}
+
+static void
+test_size (struct support_fuse *f, off64_t size)
+{
+  /* On 32-bit targets, not all possible return values from
+     copy_file_range are representable.  The current (Linux 6.5.18)
+     kernel FUSE implementation can produce negative non-error results
+     from copy_file_range in the range [1UL << 31, UINT_MAX - 4095],
+     but this seems to be a FUSE bug.  */
+  if (size != (ssize_t) size)
+    {
+      printf ("info:%s size 0x%llx is too large for ssize_t\n",
+              test_verbose ? "    " : "", (unsigned long long int) size);
+      return;
+    }
+
+  verbose_printf ("info:     testing copy size 0x%llx\n",
+                  (unsigned long long int) size);
+
+  const char *mountpoint = support_fuse_mountpoint (f);
+  char *source_path = xasprintf ("%s/source", mountpoint);
+  char *dest_path = xasprintf ("%s/dest", mountpoint);
+
+  int source_fd = xopen (source_path, O_RDONLY, 0);
+  int dest_fd = xopen (dest_path, O_WRONLY, 0);
+
+  ssize_t copied = copy_file_range (source_fd, NULL, dest_fd, NULL, size, 0);
+  /* Avoid FAIL_UNSUPPORTED if it is likely bogus due to previous
+     copy_file_range successes.  */
+  if (copied == -1 && errno == ENOSYS)
+    {
+      /* Unmounting avoids a test hang on exit.  */
+      xclose (dest_fd);
+      xclose (source_fd);
+      support_fuse_unmount (f);
+      FAIL_UNSUPPORTED ("copy_file_range not supported");
+    }
+
+  TEST_COMPARE (copied, size);
+
+  xclose (dest_fd);
+  xclose (source_fd);
+  free (dest_path);
+  free (source_path);
+}
+
+static void
+test_all_sizes (struct support_fuse *f)
+{
+  test_size (f, 0); /* Not actually handled by the callback.  */
+  test_size (f, 20);
+  test_size (f, 1 << 30);
+  test_size (f, INT_MAX);
+  for (int i = 0; i <= 5; ++i)
+    test_size (f, (1U << 31) + i);
+  for (int i = -4100; i <= -4090; ++i)
+    test_size (f, UINT_MAX + i);
+  for (int i = -100; i <= 0; ++i)
+    test_size (f, UINT_MAX + i);
+
+  /* We would like to test larger values than UINT_MAX here, but they
+     do not work because the FUSE protocol uses uint32_t for the
+     copy_file_range result in struct fuse_write_out.  */
+}
+
+static void *
+test_cancel_state_variants (void *f_ptr)
+{
+  struct support_fuse *f = (struct support_fuse *) f_ptr;
+
+  verbose_printf ("info:   testing default cancellation settings\n");
+  test_all_sizes (f);
+
+  verbose_printf ("info:   testing with cancellation disabled\n");
+  TEST_COMPARE (pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL), 0);
+  test_all_sizes (f);
+
+  verbose_printf ("info:   testing with cancellation enabled\n");
+  TEST_COMPARE (pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL), 0);
+  test_all_sizes (f);
+
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  support_fuse_init ();
+  struct support_fuse *f = support_fuse_mount (fuse_thread, NULL);
+
+  verbose_printf ("info: testing on main thread\n");
+  test_cancel_state_variants (f);
+
+  verbose_printf ("info: testing on secondary thread\n");
+  TEST_VERIFY (xpthread_join (xpthread_create
+                              (NULL, test_cancel_state_variants, f))
+               == NULL);
+
+  verbose_printf ("info: testing on separate thread\n");
+
+  support_fuse_unmount (f);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-pkey.c b/sysdeps/unix/sysv/linux/tst-pkey.c
index 4d12d2e..1000d8f 100644
--- a/sysdeps/unix/sysv/linux/tst-pkey.c
+++ b/sysdeps/unix/sysv/linux/tst-pkey.c
@@ -191,7 +191,7 @@ do_test (void)
   pthread_t delayed_thread = xpthread_create
     (NULL, &delayed_thread_func, &delayed_thread_check_access);
 
-  keys[0] = pkey_alloc (0, 0);
+  keys[0] = pkey_alloc (0, PKEY_UNRESTRICTED);
   if (keys[0] < 0)
     {
       if (errno == ENOSYS)
@@ -333,7 +333,7 @@ do_test (void)
           if (i == allowed_key)
             {
               if (do_write)
-                TEST_COMPARE (pkey_set (keys[i], 0), 0);
+                TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0);
               else
                 TEST_COMPARE (pkey_set (keys[i], PKEY_DISABLE_WRITE), 0);
             }
@@ -360,7 +360,7 @@ do_test (void)
      inherit that access.  */
   for (int i = 0; i < key_count; ++i)
     {
-      TEST_COMPARE (pkey_set (keys[i], 0), 0);
+      TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0);
       TEST_VERIFY (check_page_access (i, false));
       TEST_VERIFY (check_page_access (i, true));
     }
diff --git a/sysdeps/unix/sysv/linux/tst-termios-linux.c b/sysdeps/unix/sysv/linux/tst-termios-linux.c
new file mode 100644
index 0000000..e4b0c8b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-termios-linux.c
@@ -0,0 +1,592 @@
+/* Linux termios regression tests
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include <shlib-compat.h>
+#include <array_length.h>
+
+#include <support/check.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/tty.h>
+
+/* Evaluate an expression and make sure errno did not get set; return
+   the value of the expression */
+#define CHECKERR(expr)				\
+  ({						\
+    errno = 0;					\
+    const __typeof (expr) _val = (expr);	\
+    TEST_COMPARE(errno, 0);			\
+    _val;					\
+  })
+
+/* Evaluate an expression and verify that is return a specific value,
+   as well as errno not having been set. */
+#define VERIFY(expr,val) TEST_COMPARE(CHECKERR(expr), val)
+/* Check for zero and errno not set */
+#define CHECKZERO(expr)  VERIFY(expr, 0)
+
+/* Table of legacy speed constants */
+
+#define BOGUS ((speed_t)-1)
+#define ANY   ((speed_t)-2)
+
+struct cbaud_table
+{
+  speed_t speed;
+  speed_t cbaud;
+  const char *name;
+};
+
+static const struct cbaud_table cbaud_table [] =
+{
+  { 0, __B0, "__B0" },
+  { 50, __B50, "__B50" },
+  { 75, __B75, "__B75" },
+  { 110, __B110, "__B110" },
+  { 134, __B134, "__B134" },
+  { 150, __B150, "__B150" },
+  { 200, __B200, "__B200" },
+  { 300, __B300, "__B300" },
+  { 600, __B600, "__B600" },
+  { 1200, __B1200, "__B1200" },
+  { 1800, __B1800, "__B1800" },
+  { 2400, __B2400, "__B2400" },
+  { 4800, __B4800, "__B4800" },
+#ifdef __B7200
+  { 7200, __B7200, "__B7200" },
+#endif
+  { 9600, __B9600, "__B9600" },
+#ifdef __B14400
+  { 14400, __B14400, "__B14400" },
+#endif
+  { 19200, __B19200, "__B19200" },
+#ifdef __B28800
+  { 28800, __B28800, "__B28800" },
+#endif
+  { 38400, __B38400, "__B38400" },
+  { 57600, __B57600, "__B57600" },
+#ifdef __B76800
+  { 76800, __B76800, "__B76800" },
+#endif
+  { 115200, __B115200, "__B115200" },
+#ifdef __B153600
+  { 153600, __B153600, "__B153600" },
+#endif
+  { 230400, __B230400, "__B230400" },
+#ifdef __B307200
+  { 307200, __B307200, "__B307200" },
+#endif
+  { 460800, __B460800, "__B460800" },
+  { 500000, __B500000, "__B500000" },
+  { 576000, __B576000, "__B576000" },
+#ifdef __B614400
+  { 614400, __B614400, "__B614400" },
+#endif
+  { 921600, __B921600, "__B921600" },
+  { 1000000, __B1000000, "__B1000000" },
+  { 1152000, __B1152000, "__B1152000" },
+  { 1500000, __B1500000, "__B1500000" },
+  { 2000000, __B2000000, "__B2000000" },
+#ifdef __B2500000
+  { 2500000, __B2500000, "__B2500000" },
+#endif
+#ifdef __B3000000
+  { 3000000, __B3000000, "__B3000000" },
+#endif
+#ifdef __B3500000
+  { 3500000, __B3500000, "__B3500000" },
+#endif
+#ifdef __B4000000
+  { 4000000, __B4000000, "__B4000000" },
+#endif
+  { ANY, __BOTHER, "__BOTHER" },
+  { BOGUS, BOGUS, "invalid" }
+};
+
+/* List of common speeds to test */
+
+static const speed_t test_speeds [] =
+{
+  0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400,
+  4800, 7200, 9600, 14400, 19200, 28800, 33600, 38400, 57600,
+  76800, 115200, 153600, 230400, 307200, 460800, 500000,
+  576000, 614400, 921600, 1000000, 1152000, 1500000, 2000000,
+  2500000, 3000000, 3500000, 4000000, 5000000, 10000000
+};
+
+/* Speed function tests */
+
+/* These intentionally are a separate implementation from speed.c;
+   these should be "trivially correct" and don't need to be optimized
+   in any way */
+
+/* Returns __BOTHER if there is no legacy value for this speed */
+static speed_t speed_to_cbaud (speed_t speed)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->speed != ANY; ct++)
+    {
+      if (ct->speed == speed)
+	break;
+    }
+  return ct->cbaud;
+}
+
+/* Returns ANY if cbaud is __BOTHER, or BOGUS if invalid */
+static speed_t cbaud_to_speed (speed_t cbaud)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->cbaud != BOGUS; ct++)
+    {
+      if (ct->cbaud == cbaud)
+	break;
+    }
+  return ct->speed;
+}
+
+static const char *cbaud_name (speed_t cbaud)
+{
+  const struct cbaud_table *ct;
+  for (ct = cbaud_table; ct->cbaud != BOGUS; ct++)
+    {
+      if (ct->cbaud == cbaud)
+	break;
+    }
+  return ct->name;
+}
+
+static int check_speed (speed_t expected, speed_t speed, speed_t cbaud,
+			speed_t cfspeed, baud_t cfbaud, char io)
+{
+  speed_t want_cbaud;
+  cbaud &= CBAUD;
+
+  if (expected != ANY && speed != expected)
+    FAIL_RET ("c_%cspeed = %u, expected %u", io, speed, expected);
+
+  if (cfspeed != speed)
+    FAIL_RET ("cfget%cspeed = %u, expected %u", io, cfspeed, speed);
+
+  if (cfbaud != cfspeed)
+    FAIL_RET ("cfget%cbaud = %u, but cfget%cspeed = %u",
+	      io, cfbaud, io, cfspeed);
+
+  want_cbaud = speed_to_cbaud (speed);
+
+  if (cbaud != want_cbaud)
+    FAIL_RET ("c_%cspeed = %u: %s = %s (%06o), should be %s (%06o)",
+	      io, speed,
+	      io == 'o' ? "CBAUD" : "CIBAUD", cbaud_name (cbaud), cbaud,
+	      cbaud_name (want_cbaud), want_cbaud);
+
+  return 0;
+}
+
+/* Validate that the speeds in the struct termios are properly normalized.
+   The difference is the handling of ispeed == 0. */
+
+/* Use this after cfset* () */
+static void check_speeds_cf (const struct termios *tio_p,
+			     speed_t ospeed, speed_t ispeed)
+{
+  check_speed (ospeed, tio_p->c_ospeed, tio_p->c_cflag,
+	       CHECKERR (cfgetospeed (tio_p)),
+	       CHECKERR (cfgetobaud (tio_p)), 'o');
+  check_speed (ispeed, tio_p->c_ispeed, tio_p->c_cflag >> IBSHIFT,
+	       CHECKERR (cfgetispeed (tio_p)),
+	       CHECKERR (cfgetibaud (tio_p)), 'i');
+}
+
+/* Use this after tc[gs]etattr () */
+static void check_speeds_tc (int fd, speed_t ospeed, speed_t ispeed)
+{
+  struct termios tio;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  check_speeds_cf (&tio, ospeed, ispeed ? ispeed : ospeed);
+}
+
+/* For search and replace convenience */
+#define check_bauds_cf check_speeds_cf
+#define check_bauds_tc check_speeds_tc
+
+/* Common routine for setting speeds, with checking */
+static void
+set_speeds (int fd, speed_t ospeed, speed_t ispeed)
+{
+  struct termios tio;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  CHECKZERO (cfsetospeed (&tio, ospeed));
+  CHECKZERO (cfsetispeed (&tio, ispeed));
+  check_speeds_cf (&tio, ospeed, ispeed);
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, ospeed, ispeed ? ispeed : ospeed);
+}
+
+/* Actual tests */
+
+typedef void (*speed_test_t)(int ttyfd, speed_t speed);
+static void
+run_speed_test (int fd, speed_test_t test);
+
+/* New interface cfset*speed test */
+static void
+new_cfspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t old_ospeed, old_ispeed;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_ospeed = CHECKERR (cfgetospeed (&tio));
+  old_ispeed = CHECKERR (cfgetispeed (&tio));
+
+  /* Check initial normalization */
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+
+  /* Check cfset*speed normalization */
+  CHECKZERO (cfsetospeed (&tio, speed));
+  check_speeds_cf (&tio, speed, old_ispeed);
+  CHECKZERO (cfsetispeed (&tio, speed));
+  check_speeds_cf (&tio, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  check_speeds_cf (&tio, old_ospeed, speed);
+  CHECKZERO (cfsetispeed (&tio, B0));
+  check_speeds_cf (&tio, old_ospeed, B0);
+  CHECKZERO (cfsetspeed (&tio, speed));
+  check_speeds_cf (&tio, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (cfsetispeed (&tio, old_ispeed));
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+}
+
+/* New interface cfset*speed test with tcsetattr */
+static void
+new_tcspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t old_ospeed, old_ispeed;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_ospeed = CHECKERR (cfgetospeed (&tio));
+  old_ispeed = CHECKERR (cfgetispeed (&tio));
+
+  /* Check initial normalization */
+  check_speeds_cf (&tio, old_ospeed, old_ispeed);
+
+  /* Check cfset*speed normalization */
+  CHECKZERO (cfsetospeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, old_ispeed);
+  CHECKZERO (cfsetispeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, speed);
+  CHECKZERO (cfsetispeed (&tio, B0));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, B0);
+  CHECKZERO (cfsetspeed (&tio, speed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, speed, speed);
+  CHECKZERO (cfsetospeed (&tio, old_ospeed));
+  CHECKZERO (cfsetispeed (&tio, old_ispeed));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_speeds_tc (fd, old_ospeed, old_ispeed);
+}
+
+/* New interface cfset*baud test */
+static void
+new_cfbaud_test (int fd, baud_t baud)
+{
+  struct termios tio;
+  baud_t old_obaud, old_ibaud;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_obaud = CHECKERR (cfgetobaud (&tio));
+  old_ibaud = CHECKERR (cfgetibaud (&tio));
+
+  /* Check initial normalization */
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+
+  /* Check cfset*baud normalization */
+  CHECKZERO (cfsetobaud (&tio, baud));
+  check_bauds_cf (&tio, baud, old_ibaud);
+  CHECKZERO (cfsetibaud (&tio, baud));
+  check_bauds_cf (&tio, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  check_bauds_cf (&tio, old_obaud, baud);
+  CHECKZERO (cfsetibaud (&tio, B0));
+  check_bauds_cf (&tio, old_obaud, B0);
+  CHECKZERO (cfsetbaud (&tio, baud));
+  check_bauds_cf (&tio, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (cfsetibaud (&tio, old_ibaud));
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+}
+
+/* New interface cfset*baud test with tcsetattr */
+static void
+new_tcbaud_test (int fd, baud_t baud)
+{
+  struct termios tio;
+  baud_t old_obaud, old_ibaud;
+
+  CHECKZERO (tcgetattr (fd, &tio));
+  old_obaud = CHECKERR (cfgetobaud (&tio));
+  old_ibaud = CHECKERR (cfgetibaud (&tio));
+
+  /* Check initial normalization */
+  check_bauds_cf (&tio, old_obaud, old_ibaud);
+
+  /* Check cfset*baud normalization */
+  CHECKZERO (cfsetobaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, old_ibaud);
+  CHECKZERO (cfsetibaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, baud);
+  CHECKZERO (cfsetibaud (&tio, B0));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, B0);
+  CHECKZERO (cfsetbaud (&tio, baud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, baud, baud);
+  CHECKZERO (cfsetobaud (&tio, old_obaud));
+  CHECKZERO (cfsetibaud (&tio, old_ibaud));
+  CHECKZERO (tcsetattr (fd, TCSANOW, &tio));
+  check_bauds_tc (fd, old_obaud, old_ibaud);
+}
+
+/*
+ * Old interface tests. This depends critically on the new struct
+ * termios being guaranteed to be a superset of the legacy struct
+ * termios.
+ */
+#if TEST_COMPAT (libc, GLIBC_2_0, GLIBC_2_42)
+extern int __old_cfsetospeed (struct termios *tio_p, speed_t speed);
+compat_symbol_reference (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0);
+extern int __old_cfsetispeed (struct termios *tio_p, speed_t speed);
+compat_symbol_reference (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0);
+extern speed_t __old_cfgetospeed (const struct termios *tio_p);
+compat_symbol_reference (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0);
+extern speed_t __old_cfgetispeed (const struct termios *tio_p);
+compat_symbol_reference (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0);
+extern int __old_tcsetattr (int fd, int act, const struct termios *tio_p);
+compat_symbol_reference (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0);
+extern int __old_tcgetattr (int fd, struct termios *tio_p);
+compat_symbol_reference (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0);
+
+static int old_tcsetattr (int fd, const struct termios *tio_p)
+{
+  struct termios old_tio = *tio_p;
+
+  /* Deliberately corrupt c_ispeed and c_ospeed */
+  old_tio.c_ispeed = 0xdeadbeef;
+  old_tio.c_ospeed = 0xfeedface;
+  return __old_tcsetattr (fd, TCSANOW, &old_tio);
+}
+static int old_tcgetattr (int fd, struct termios *tio_p)
+{
+  int rv;
+  memset (tio_p, 0xde, sizeof *tio_p);
+  rv = __old_tcgetattr (fd, tio_p);
+  if (rv)
+    return rv;
+
+  /* Deliberately corrupt c_ispeed and c_ospeed */
+  tio_p->c_ispeed = 0xdeadbeef;
+  tio_p->c_ospeed = 0xfeedface;
+  return 0;
+}
+
+/* Old interface test. This relies on the new struct termios always
+   being a binary superset of the old one.
+   This doesn't bother testing split speed, since that never worked
+   on the old glibc. */
+static void
+old_tcspeed_test (int fd, speed_t speed)
+{
+  struct termios tio;
+  speed_t cbaud;
+
+  if (!speed)
+    return;			/* Skip B0 for this test */
+
+  cbaud = speed_to_cbaud (speed);
+  if (cbaud == __BOTHER)
+    return;
+
+  CHECKZERO (old_tcgetattr (fd, &tio));
+  CHECKZERO (__old_cfsetospeed (&tio, cbaud));
+  VERIFY (__old_cfgetospeed (&tio), cbaud);
+  CHECKZERO (__old_cfsetispeed (&tio, cbaud));
+  VERIFY (__old_cfgetispeed (&tio), cbaud);
+  CHECKZERO (old_tcsetattr (fd, &tio));
+  check_speeds_tc (fd, speed, speed);
+}
+
+/* Verify that invalid CBAUD values return error for the old interfaces */
+static void
+old_invalid_speeds_test (int fd)
+{
+  struct termios tio;
+  speed_t cbaud;
+
+  for (cbaud = 0 ; cbaud ; cbaud > 0xffff ? (cbaud <<= 1) : cbaud++) {
+    speed_t realspeed;
+    realspeed = (cbaud & ~CBAUD) ? BOGUS : cbaud_to_speed (cbaud);
+    if (realspeed >= ANY)
+      {
+	int rv;
+
+	errno = 0;
+	rv = __old_cfsetospeed (&tio, cbaud);
+	if (rv != -1 || errno != EINVAL)
+	  FAIL("__old_cfsetospeed() accepted invalid value %06o", cbaud);
+
+	errno = 0;
+	rv = __old_cfsetispeed (&tio, cbaud);
+	if (rv != -1 || errno != EINVAL)
+	  FAIL("__old_cfsetispeed() accepted invalid value %06o", cbaud);
+      }
+    else
+      {
+	CHECKZERO (__old_cfsetospeed (&tio, cbaud));
+	VERIFY (__old_cfgetospeed (&tio), cbaud);
+	CHECKZERO (__old_cfsetispeed (&tio, cbaud));
+	VERIFY (__old_cfgetispeed (&tio), cbaud);
+	if (cbaud)
+	  {
+	    CHECKZERO (old_tcsetattr (fd, &tio));
+	    check_speeds_tc (fd, realspeed, realspeed);
+	  }
+      }
+  }
+}
+
+static void
+compat_tests (int fd)
+{
+  run_speed_test (fd, old_tcspeed_test);
+  old_invalid_speeds_test (fd);
+}
+#else /* No TEST_COMPAT */
+#define compat_tests(fd) ((void)(fd))
+#endif
+
+static void
+run_speed_test (int fd, speed_test_t test)
+{
+  unsigned short seed [3] = { 0x1234, 0x5678, 0x9abc };
+  struct speeds {
+    speed_t ospeed, ispeed;
+  };
+  static const struct speeds initial_speeds [] = {
+    { 2400, 2400 },		/* Standard speed, non-split */
+    { 123456, 123456 },		/* Nonstandard speed, non-split */
+    { 75, 1200 },		/* Standard split speeds */
+    { 9600, 456789 },		/* One standard, one nonstandard */
+    { 54321, 1234567890 }	/* Nonstandard, one very high */
+  };
+
+  array_foreach_const (is, initial_speeds)
+    {
+      /* Set up initial conditions */
+      set_speeds (fd, is->ospeed, is->ispeed);
+
+      /* Test all common speeds */
+      array_foreach_const (ts, test_speeds)
+	test (fd, *ts);
+
+      /* Test pseudorandom speeds; array_length(test_speeds)
+	 here is an arbitrary value */
+      const size_t random_test_count = array_length(test_speeds);
+      for (size_t i = 0 ; i < random_test_count ; i++)
+	test (fd, (speed_t) jrand48 (seed));
+
+      /* Test power-of-2 speeds */
+      for (speed_t s = 1 ; s ; s <<= 1)
+	test (fd, s);
+
+      /* Test power of 2 multiples of 75; 75 << 25 is the maximum below 2^32 */
+      for (int i = 0 ; i < 26 ; i++)
+	test (fd, (speed_t)75 << i);
+    }
+}
+
+static void
+run_speed_tests (int fd)
+{
+  /* Test proper canonicalization using the new interface */
+  run_speed_test (fd, new_cfspeed_test);
+  run_speed_test (fd, new_tcspeed_test);
+
+  /* Try the new cfset*baud() functions */
+  run_speed_test (fd, new_cfbaud_test);
+  run_speed_test (fd, new_tcbaud_test);
+
+  /* Tests of the legacy functions */
+  compat_tests (fd);
+}
+
+/* test dispatch */
+
+static void
+run_in_chroot (void)
+{
+  /* Create a pty slave to use as a tty. Most of the termios settings,
+     including the speeds, have no impact on a pty, but they are still
+     settable like for any other tty, which makes them very convenient
+     for testing. */
+  int ptmfd, ttyfd;
+
+  support_openpty (&ptmfd, &ttyfd, NULL, NULL, NULL);
+  run_speed_tests (ttyfd);
+  close (ttyfd);
+  close (ptmfd);
+}
+
+static int
+do_test (void)
+{
+  support_become_root ();
+  run_in_chroot ();
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
index 17b84c7..06fbae5 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h
@@ -207,6 +207,7 @@
 #define __NR_open 2
 #define __NR_open_by_handle_at 304
 #define __NR_open_tree 428
+#define __NR_open_tree_attr 467
 #define __NR_openat 257
 #define __NR_openat2 437
 #define __NR_pause 34
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
index 7ab9073..5648772 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
@@ -2748,6 +2748,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
index 11c5ebc..6719814 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist
@@ -1302,6 +1302,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
index fb834a7..6938382 100644
--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
@@ -87,10 +87,10 @@ $(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
 $(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
 $(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
 
-CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
-CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile -DTEST_AMX
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2
 endif
 
 endif # $(subdir) == elf
diff --git a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
index 006c532..812e023 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
+++ b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c
@@ -22,7 +22,7 @@
 extern void restore_rt (void) asm ("__restore_rt") attribute_hidden;
 
 #define SET_SA_RESTORER(kact, act)			\
-  (kact)->sa_flags = (act)->sa_flags | SA_RESTORER;	\
+  (kact)->sa_flags |= SA_RESTORER;			\
   (kact)->sa_restorer = &restore_rt
 
 #define RESET_SA_RESTORER(act, kact) 			\
diff --git a/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h b/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h
new file mode 100644
index 0000000..585ca01
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h
@@ -0,0 +1,76 @@
+/* Signal frame backtracing support for SFrame on AMD, x86-64 and x86.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License
+   as published by the Free Software Foundation; either version 2.1 of
+   the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This code is inspired from libgcc's MD_FALLBACK_FRAME_STATE_FOR
+   implementation.  See libgcc/config/i386/linux-unwind.h  */
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#ifdef __x86_64__
+
+/* SFrame is only supported by x86_64 targets.  */
+
+#define MD_DECODE_SIGNAL_FRAME x86_64_decode_signal_frame
+
+#ifdef __LP64__
+#define RT_SIGRETURN_SYSCALL	0x050f0000000fc0c7ULL
+#else
+#define RT_SIGRETURN_SYSCALL	0x050f40000201c0c7ULL
+#endif
+
+static _Unwind_Reason_Code
+x86_64_decode_signal_frame (frame *frame)
+{
+  unsigned char *pc = (unsigned char *) frame->pc;
+  mcontext_t *st;
+
+  unsigned char pc0 = *(unsigned char *)(pc + 0);
+  unsigned long long pc1;
+  memcpy (&pc1, pc + 1, sizeof (unsigned long long));
+
+  /* movq $__NR_rt_sigreturn, %rax ; syscall.  */
+  if ( pc0 == 0x48
+      && pc1 == RT_SIGRETURN_SYSCALL)
+    {
+      ucontext_t *uc_ = (ucontext_t *)frame->sp;
+      st = &uc_->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  frame->pc = (_Unwind_Ptr) st->gregs[REG_RIP];
+  frame->sp = (_Unwind_Ptr) st->gregs[REG_RSP];
+  frame->fp = (_Unwind_Ptr) st->gregs[REG_RBP];
+  return _URC_NO_REASON;
+}
+
+#define MD_DETECT_OUTERMOST_FRAME x86_64_detect_outermost_frame
+
+static _Unwind_Reason_Code
+x86_64_detect_outermost_frame (frame *frame)
+{
+  /* Outermost frame has the frame pointer cleared.  */
+  if (frame->fp == 0)
+    return _URC_END_OF_STACK;
+
+  return _URC_NO_REASON;
+}
+
+#endif /* ifdef __x86_64__  */
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
index 1dcd6ab..135ef3d 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h
@@ -200,6 +200,7 @@
 #define __NR_open 1073741826
 #define __NR_open_by_handle_at 1073742128
 #define __NR_open_tree 1073742252
+#define __NR_open_tree_attr 1073742291
 #define __NR_openat 1073742081
 #define __NR_openat2 1073742261
 #define __NR_pause 1073741858
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
index e11876f..25a39d0 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
@@ -2767,6 +2767,16 @@ GLIBC_2.41 sched_getattr F
 GLIBC_2.41 sched_setattr F
 GLIBC_2.42 __inet_ntop_chk F
 GLIBC_2.42 __inet_pton_chk F
+GLIBC_2.42 cfgetibaud F
+GLIBC_2.42 cfgetispeed F
+GLIBC_2.42 cfgetobaud F
+GLIBC_2.42 cfgetospeed F
+GLIBC_2.42 cfsetbaud F
+GLIBC_2.42 cfsetibaud F
+GLIBC_2.42 cfsetispeed F
+GLIBC_2.42 cfsetobaud F
+GLIBC_2.42 cfsetospeed F
+GLIBC_2.42 cfsetspeed F
 GLIBC_2.42 pthread_gettid_np F
 GLIBC_2.42 uabs F
 GLIBC_2.42 uimaxabs F
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
index 2b1b75e..1a1069a 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist
@@ -1302,6 +1302,14 @@ GLIBC_2.42 powrf32x F
 GLIBC_2.42 powrf64 F
 GLIBC_2.42 powrf64x F
 GLIBC_2.42 powrl F
+GLIBC_2.42 rootn F
+GLIBC_2.42 rootnf F
+GLIBC_2.42 rootnf128 F
+GLIBC_2.42 rootnf32 F
+GLIBC_2.42 rootnf32x F
+GLIBC_2.42 rootnf64 F
+GLIBC_2.42 rootnf64x F
+GLIBC_2.42 rootnl F
 GLIBC_2.42 rsqrt F
 GLIBC_2.42 rsqrtf F
 GLIBC_2.42 rsqrtf128 F
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 01b0192..4fbd48e 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -4,7 +4,13 @@ endif
 
 ifeq ($(subdir),elf)
 sysdep_routines += get-cpuid-feature-leaf
-sysdep-dl-routines += dl-get-cpu-features
+sysdep-dl-routines += \
+  dl-get-cpu-features \
+  dl-tlsdesc \
+  tls_get_addr \
+  tlsdesc \
+# sysdep-dl-routines
+
 sysdep_headers += \
   bits/platform/features.h \
   bits/platform/x86.h \
@@ -90,14 +96,22 @@ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512
 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
 tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
 
-CFLAGS-tst-gnu2-tls2.c += -msse
+CFLAGS-tst-gnu2-tls2.c += -msse2
 CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
 CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
 CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
 
-LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy
+LDFLAGS-tst-gnu2-tls2 += -rdynamic
+LDFLAGS-tst-gnu2-tls2mod0.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod1.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod2.so += -Wl,-z,undefs
+
+CFLAGS-tst-gnu2-tls2-x86-noxsave.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavec.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavexsavec.c += -msse2
+LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy -rdynamic
 
 # Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled
 # via tunable.
@@ -113,6 +127,18 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \
   $(objpfx)tst-gnu2-tls2mod0.so \
   $(objpfx)tst-gnu2-tls2mod1.so \
   $(objpfx)tst-gnu2-tls2mod2.so
+
+CFLAGS-tst-tls23.c += -msse2
+CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell
+
+LDFLAGS-tst-tls23 += -rdynamic
+tst-tls23-mod.so-no-z-defs = yes
+
+$(objpfx)tst-tls23-mod.so: $(libsupport)
+endif
+
+ifeq ($(subdir),gmon)
+CFLAGS-mcount.c += -mgeneral-regs-only
 endif
 
 ifeq ($(subdir),math)
diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
index c7ea9ac..dff26e9 100644
--- a/sysdeps/x86/configure
+++ b/sysdeps/x86/configure
@@ -171,8 +171,12 @@ fi
 config_vars="$config_vars
 have-x86-isa-level = $libc_cv_have_x86_isa_level"
 config_vars="$config_vars
+x86-isa-level-2-or-above = 2 3 4"
+config_vars="$config_vars
 x86-isa-level-3-or-above = 3 4"
 config_vars="$config_vars
+x86-isa-level-4-or-above = 4"
+config_vars="$config_vars
 enable-x86-isa-level = $libc_cv_include_x86_isa_level"
 
 
diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
index 031f917..54960a7 100644
--- a/sysdeps/x86/configure.ac
+++ b/sysdeps/x86/configure.ac
@@ -117,7 +117,9 @@ else
   AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
 fi
 LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
+LIBC_CONFIG_VAR([x86-isa-level-2-or-above], [2 3 4])
 LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
+LIBC_CONFIG_VAR([x86-isa-level-4-or-above], [4])
 LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
 
 dnl Check if TEST_CC supports -mfpmath=387
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index e50f1d6..b7d1506 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1256,7 +1256,7 @@ no_cpuid:
 #endif
 
   if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
-      || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
+      || cpu_features->xsave_state_size != 0)
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
 	{
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index c3c73e7..b8e963b 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -183,6 +183,29 @@
 
 #define atom_text_section .section ".text.atom", "ax"
 
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+   __tls_get_addr may be called with 8-byte/4-byte stack alignment.
+   Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
+   assume that stack will be always aligned at 16 bytes.  */
+# ifdef __x86_64__
+#  define DL_STACK_ALIGNMENT 8
+#  define MINIMUM_ALIGNMENT 16
+# else
+#  define DL_STACK_ALIGNMENT 4
+# endif
+#endif
+
+/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
+   STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
+   _dl_fixup/__tls_get_addr.  */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
+
 #endif	/* __ASSEMBLER__ */
 
 #endif	/* _X86_SYSDEP_H */
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
index de900a4..b3195ff 100644
--- a/sysdeps/x86/tst-gnu2-tls2.c
+++ b/sysdeps/x86/tst-gnu2-tls2.c
@@ -1,20 +1,26 @@
-#ifndef __x86_64__
-#include <sys/platform/x86.h>
+#ifndef TEST_AMX
+# ifndef __x86_64__
+# include <sys/platform/x86.h>
 
-#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
-#endif
+# define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+# endif
 
-/* Clear XMM0...XMM7  */
-#define PREPARE_MALLOC()				\
-{							\
-  asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" );	\
-  asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" );	\
-  asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" );	\
-  asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" );	\
-  asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" );	\
-  asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" );	\
-  asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" );	\
-  asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" );	\
+/* Set XMM0...XMM7 to all 1s.  */
+# define PREPARE_MALLOC()					\
+{								\
+  asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" );	\
+  asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" );	\
+  asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" );	\
+  asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" );	\
+  asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" );	\
+  asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" );	\
+  asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" );	\
+  asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" );	\
 }
+#endif
 
 #include <elf/tst-gnu2-tls2.c>
+
+#ifndef TEST_AMX
+v2di v1, v2, v3;
+#endif
diff --git a/sysdeps/x86/tst-gnu2-tls2.h b/sysdeps/x86/tst-gnu2-tls2.h
new file mode 100644
index 0000000..fdbb565
--- /dev/null
+++ b/sysdeps/x86/tst-gnu2-tls2.h
@@ -0,0 +1,37 @@
+/* Test TLSDESC relocation, x86 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef TEST_AMX
+# include <support/check.h>
+
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
+
+# define BEFORE_TLSDESC_CALL()					\
+  v1 = __extension__(v2di){0, 0};				\
+  v2 = __extension__(v2di){0, 0};
+
+# define AFTER_TLSDESC_CALL()					\
+  v3 = __extension__(v2di){0, 0};				\
+  asm volatile ("" : "+x" (v3));				\
+  union { v2di x; long long a[2]; } u;				\
+  u.x = v3;							\
+  TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
+#endif
+
+#include <elf/tst-gnu2-tls2.h>
diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c
new file mode 100644
index 0000000..6130d91
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.c
@@ -0,0 +1,22 @@
+#ifndef __x86_64__
+#include <sys/platform/x86.h>
+
+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+#endif
+
+/* Set XMM0...XMM7 to all 1s.  */
+#define PREPARE_MALLOC()					\
+{								\
+  asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" );	\
+  asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" );	\
+  asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" );	\
+  asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" );	\
+  asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" );	\
+  asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" );	\
+  asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" );	\
+  asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" );	\
+}
+
+#include <elf/tst-tls23.c>
+
+v2di v1, v2, v3;
diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h
new file mode 100644
index 0000000..21cee4c
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.h
@@ -0,0 +1,35 @@
+/* Test that __tls_get_addr preserves XMM registers.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <support/check.h>
+
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
+
+#define BEFORE_TLS_CALL()					\
+  v1 = __extension__(v2di){0, 0};				\
+  v2 = __extension__(v2di){0, 0};
+
+#define AFTER_TLS_CALL()					\
+  v3 = __extension__(v2di){0, 0};				\
+  asm volatile ("" : "+x" (v3));				\
+  union { v2di x; long long a[2]; } u;				\
+  u.x = v3;							\
+  TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
+
+#include <elf/tst-tls23.h>
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 5723ec1..8cace35 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -11,6 +11,7 @@ endif
 
 ifeq ($(subdir),csu)
 gen-as-const-headers += link-defines.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
 endif
 
 ifeq ($(subdir),gmon)
@@ -19,6 +20,10 @@ sysdep_routines += _mcount
 # recursive calls when ENTRY is used.  Just copy the normal static
 # object.
 sysdep_noprof += _mcount
+
+ifeq (yes,$(have-x86-apx))
+CFLAGS-mcount.c += -mno-apxf
+endif
 endif
 
 ifeq ($(subdir),string)
@@ -41,9 +46,6 @@ ifeq ($(subdir),elf)
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
 		   -mno-mmx)
 
-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
-
-tests += ifuncmain8
 modules-names += ifuncmod8
 
 $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so
@@ -214,11 +216,25 @@ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
 endif
 
 test-internal-extras += tst-gnu2-tls2mod1
-endif # $(subdir) == elf
 
-ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
-endif
+tests-special += $(objpfx)check-rtld.out
+
+$(objpfx)rtld.reloc: $(objpfx)rtld.os
+	@rm -f $@T
+	LC_ALL=C $(READELF) -rW $< > $@T
+	test -s $@T
+	mv -f $@T $@
+common-generated += $(objpfx)rtld.reloc
+
+# Verify that there are no run-time relocations against __ehdr_start nor
+# _end.
+$(objpfx)check-rtld.out: $(objpfx)rtld.reloc
+	LC_ALL=C; \
+	if grep -E "R_X86_64_64.*(__ehdr_start|_end)" $^ > $@; \
+	  then false; else true; fi; \
+	$(evaluate-test)
+generated += check-rtld.out
+endif # $(subdir) == elf
 
 ifeq ($(subdir),wcsmbs)
 
@@ -250,6 +266,15 @@ endif
 
 
 ifneq ($(enable-cet),no)
+# Add -fcf-protection to CFLAGS when CET is enabled.
+CFLAGS-.o += -fcf-protection
+CFLAGS-.os += -fcf-protection
+CFLAGS-.op += -fcf-protection
+CFLAGS-.oS += -fcf-protection
+
+# Compile assembly codes with <cet.h> when CET is enabled.
+asm-CPPFLAGS += -fcf-protection -include cet.h
+
 ifeq ($(subdir),elf)
 sysdep-dl-routines += dl-cet
 
@@ -442,18 +467,7 @@ $(objpfx)tst-shstk-legacy-1g.out: \
   $(..)/sysdeps/x86_64/tst-shstk-legacy-1g.sh $(objpfx)tst-shstk-legacy-1g
 	$(SHELL) $< $(common-objpfx) '$(test-program-prefix)' 2> $@; \
 	$(evaluate-test)
-endif
 
-# Add -fcf-protection to CFLAGS when CET is enabled.
-CFLAGS-.o += -fcf-protection
-CFLAGS-.os += -fcf-protection
-CFLAGS-.op += -fcf-protection
-CFLAGS-.oS += -fcf-protection
-
-# Compile assembly codes with <cet.h> when CET is enabled.
-asm-CPPFLAGS += -fcf-protection -include cet.h
-
-ifeq ($(subdir),elf)
 ifeq (yes,$(build-shared))
 tests-special += $(objpfx)check-cet.out
 endif
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index bbf520b..32324f6 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -289,6 +289,8 @@ fi
 config_vars="$config_vars
 have-x86-apx = $libc_cv_x86_have_apx"
 
+libc_cv_support_sframe=yes
+
 test -n "$critic_missing" && as_fn_error $? "
 *** $critic_missing" "$LINENO" 5
 
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index 4a3f7f4..a00958e 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -104,5 +104,7 @@ if test $libc_cv_x86_have_apx = yes; then
 fi
 LIBC_CONFIG_VAR([have-x86-apx], [$libc_cv_x86_have_apx])
 
+libc_cv_support_sframe=yes
+
 test -n "$critic_missing" && AC_MSG_ERROR([
 *** $critic_missing])
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index d1bb125..9a55fc5 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -22,7 +22,6 @@
 #include <features-offsets.h>
 #include <isa-level.h>
 #include "tlsdesc.h"
-#include "dl-trampoline-save.h"
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_tlsdesc_dynamic.  */
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a055722..ac85f96 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -22,7 +22,6 @@
 #include <features-offsets.h>
 #include <link-defines.h>
 #include <isa-level.h>
-#include "dl-trampoline-save.h"
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_fixup.  */
diff --git a/sysdeps/x86_64/fpu/math-use-builtins-trunc.h b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h
new file mode 100644
index 0000000..c2387eb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h
@@ -0,0 +1,9 @@
+#ifdef __SSE4_1__
+# define USE_TRUNC_BUILTIN 1
+# define USE_TRUNCF_BUILTIN 1
+#else
+# define USE_TRUNC_BUILTIN 0
+# define USE_TRUNCF_BUILTIN 0
+#endif
+#define USE_TRUNCL_BUILTIN 0
+#define USE_TRUNCF128_BUILTIN 0
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 3403422..708b142 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -26,6 +26,14 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
 CFLAGS-s_cosf-fma.c = -mfma -mavx2
 CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
+# Check if ISA level is 2 or above.
+ifeq (,$(filter $(have-x86-isa-level),$(x86-isa-level-2-or-above)))
+sysdep_calls += \
+  s_modf-sse4_1 \
+  s_modff-sse4_1 \
+# sysdep_calls
+endif
+
 # Check if ISA level is 3 or above.
 ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
 libm-sysdep_routines += \
@@ -43,6 +51,10 @@ libm-sysdep_routines += \
   s_truncf-avx \
 # libm-sysdep_routines
 else
+sysdep_calls += \
+  s_modf-avx \
+  s_modff-avx \
+# sysdep_calls
 ifeq (no,$(have-x86-apx))
 libm-sysdep_routines += \
   e_asin-fma4 \
@@ -121,6 +133,11 @@ libm-sysdep_routines += \
   s_truncf-c \
 # libm-sysdep_routines
 endif
+
+# $(sysdep_calls) functions are built both for libc and libm.  While the
+# libc objects have the prefix s_, the libm ones are prefixed with m_.
+sysdep_routines += $(sysdep_calls)
+libm-sysdep_routines += $(sysdep_calls:s_%=m_%)
 endif
 
 CFLAGS-e_asin-fma4.c = -mfma4
@@ -140,6 +157,12 @@ CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX
+
+CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x -fno-builtin-modff64
+CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32
+
+CFLAGS-s_modf-avx.c = -mavx -fno-builtin-modff32x -fno-builtin-modff64
+CFLAGS-s_modff-avx.c = -mavx -fno-builtin-modff32
 endif
 
 ifeq ($(subdir),mathvec)
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h
new file mode 100644
index 0000000..071595f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h
@@ -0,0 +1,41 @@
+/* Common definition for ifunc selections optimized with SSE4.1 and AVX.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
+    return OPTIMIZE (avx);
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+  return OPTIMIZE (sse41);
+#else
+  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
+    return OPTIMIZE (sse41);
+
+  return OPTIMIZE (sse2);
+#endif
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c
new file mode 100644
index 0000000..ab4f03d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c
@@ -0,0 +1,3 @@
+#define __modf __modf_avx
+
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c
new file mode 100644
index 0000000..00aa8cd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c
@@ -0,0 +1,3 @@
+#define __modf __modf_sse41
+
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/unix/sysv/linux/kernel_termios.h b/sysdeps/x86_64/fpu/multiarch/s_modf.c
index f02a197..e365bfc 100644
--- a/sysdeps/unix/sysv/linux/kernel_termios.h
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
+/* Multiple versions of modf
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,23 +16,26 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef _KERNEL_TERMIOS_H
-#define _KERNEL_TERMIOS_H 1
-/* The following corresponds to the values from the Linux 2.1.20 kernel.  */
-
-#define __KERNEL_NCCS 19
-
-struct __kernel_termios
-  {
-    tcflag_t c_iflag;		/* input mode flags */
-    tcflag_t c_oflag;		/* output mode flags */
-    tcflag_t c_cflag;		/* control mode flags */
-    tcflag_t c_lflag;		/* local mode flags */
-    cc_t c_line;		/* line discipline */
-    cc_t c_cc[__KERNEL_NCCS];	/* control characters */
-  };
-
-#define _HAVE_C_ISPEED 0
-#define _HAVE_C_OSPEED 0
-
-#endif /* kernel_termios.h */
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
+
+# define modf __redirect_modf
+# define __modf __redirect___modf
+# include <math.h>
+# undef modf
+# undef __modf
+
+# define SYMBOL_NAME modf
+# include "ifunc-sse4_1-avx.h"
+
+libc_ifunc_redirected (__redirect_modf, __modf, IFUNC_SELECTOR ());
+libm_alias_double (__modf, modf)
+# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+#  define __modf __modf_sse41
+# else
+#  define __modf __modf_sse2
+# endif
+#endif
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c
new file mode 100644
index 0000000..07cb9c1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c
@@ -0,0 +1,3 @@
+#define __modff __modff_avx
+
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c
new file mode 100644
index 0000000..060c5e3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c
@@ -0,0 +1,3 @@
+#define __modff __modff_sse41
+
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/fpu/multiarch/s_modff.c
index 761128d..a4b5429 100644
--- a/sysdeps/x86_64/dl-trampoline-save.h
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff.c
@@ -1,5 +1,5 @@
-/* x86-64 PLT trampoline register save macros.
-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+/* Multiple versions of modff
+   Copyright (C) 2025 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,19 +16,26 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+# define modff __redirect_modff
+# define __modff __redirect___modff
+# include <math.h>
+# undef modff
+# undef __modff
 
-   __tls_get_addr may be called with 8-byte stack alignment.  Although
-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
-   that stack will be always aligned at 16 bytes.  */
-# define DL_STACK_ALIGNMENT 8
-#endif
+# define SYMBOL_NAME modff
+# include "ifunc-sse4_1-avx.h"
 
-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
-   stack to 16 bytes before calling _dl_fixup.  */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
-   || 16 > DL_STACK_ALIGNMENT)
+libc_ifunc_redirected (__redirect_modff, __modff, IFUNC_SELECTOR ());
+libm_alias_float (__modf, modf)
+# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+#  define __modff __modff_sse41
+# else
+#  define __modff __modff_sse2
+# endif
+#endif
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a834977..c2dcadd 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -922,7 +922,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				     (CPU_FEATURE_USABLE (AVX2)
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncpy_avx2)
-	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
 				     1,
 				     __wcsncpy_generic))
 
@@ -952,7 +952,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				     (CPU_FEATURE_USABLE (AVX2)
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcpncpy_avx2)
-	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy,
+	      X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy,
 				     1,
 				     __wcpncpy_generic))