diff options
Diffstat (limited to 'sysdeps')
366 files changed, 9290 insertions, 4125 deletions
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile index 4b7f8a5..bb97d31 100644 --- a/sysdeps/aarch64/Makefile +++ b/sysdeps/aarch64/Makefile @@ -41,15 +41,18 @@ gen-as-const-headers += \ dl-link.sym \ rtld-global-offsets.sym -tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2 +tests-internal += \ + tst-ifunc-arg-1 \ + tst-ifunc-arg-2 \ + tst-ifunc-arg-3 \ + tst-ifunc-arg-4 \ + # tests-internal -ifeq (yes,$(aarch64-variant-pcs)) tests += tst-vpcs modules-names += tst-vpcs-mod LDFLAGS-tst-vpcs-mod.so = -Wl,-z,lazy $(objpfx)tst-vpcs: $(objpfx)tst-vpcs-mod.so endif -endif ifeq ($(subdir),csu) gen-as-const-headers += \ @@ -75,7 +78,9 @@ sysdep_routines += \ __alloc_gcs tests += \ - tst-sme-jmp + tst-sme-jmp \ + tst-sme-za-state \ + # tests endif ifeq ($(subdir),malloc) diff --git a/sysdeps/aarch64/__alloc_gcs.c b/sysdeps/aarch64/__alloc_gcs.c index e70b459..b98e5fc 100644 --- a/sysdeps/aarch64/__alloc_gcs.c +++ b/sysdeps/aarch64/__alloc_gcs.c @@ -15,6 +15,8 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#include "aarch64-gcs.h" + #include <sysdep.h> #include <unistd.h> #include <sys/mman.h> @@ -34,7 +36,7 @@ map_shadow_stack (void *addr, size_t size, unsigned long flags) #define GCS_ALTSTACK_RESERVE 160 void * -__alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size) +__alloc_gcs (size_t stack_size, struct gcs_record *gcs) { size_t size = (stack_size / 2 + GCS_ALTSTACK_RESERVE) & -8UL; if (size > GCS_MAX_SIZE) @@ -45,9 +47,6 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size) if (base == MAP_FAILED) return NULL; - *ss_base = base; - *ss_size = size; - uint64_t *gcsp = (uint64_t *) ((char *) base + size); /* Skip end of GCS token. */ gcsp--; @@ -58,6 +57,14 @@ __alloc_gcs (size_t stack_size, void **ss_base, size_t *ss_size) __munmap (base, size); return NULL; } + + if (gcs != NULL) + { + gcs->gcs_base = base; + gcs->gcs_token = gcsp; + gcs->gcs_size = size; + } + /* Return the target GCS pointer for context switch. */ return gcsp + 1; } diff --git a/sysdeps/aarch64/__arm_za_disable.S b/sysdeps/aarch64/__arm_za_disable.S index 6290803..92f4814 100644 --- a/sysdeps/aarch64/__arm_za_disable.S +++ b/sysdeps/aarch64/__arm_za_disable.S @@ -88,10 +88,8 @@ L(save_loop): L(end): ret L(fail): -#if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -#endif + paciasp + cfi_negate_ra_state stp x29, x30, [sp, -32]! cfi_adjust_cfa_offset (32) cfi_rel_offset (x29, 0) diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S index 981bf80..70ac02c 100644 --- a/sysdeps/aarch64/__longjmp.S +++ b/sysdeps/aarch64/__longjmp.S @@ -24,51 +24,43 @@ /* __longjmp(jmpbuf, val) */ ENTRY (__longjmp) - cfi_def_cfa(x0, 0) - cfi_offset(x19, JB_X19<<3) - cfi_offset(x20, JB_X20<<3) - cfi_offset(x21, JB_X21<<3) - cfi_offset(x22, JB_X22<<3) - cfi_offset(x23, JB_X23<<3) - cfi_offset(x24, JB_X24<<3) - cfi_offset(x25, JB_X25<<3) - cfi_offset(x26, JB_X26<<3) - cfi_offset(x27, JB_X27<<3) - cfi_offset(x28, JB_X28<<3) - cfi_offset(x29, JB_X29<<3) - cfi_offset(x30, JB_LR<<3) - - cfi_offset( d8, JB_D8<<3) - cfi_offset( d9, JB_D9<<3) - cfi_offset(d10, JB_D10<<3) - cfi_offset(d11, JB_D11<<3) - cfi_offset(d12, JB_D12<<3) - cfi_offset(d13, JB_D13<<3) - cfi_offset(d14, JB_D14<<3) - cfi_offset(d15, JB_D15<<3) #if IS_IN(libc) - /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */ -# if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -# endif - stp x29, x30, [sp, -16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - mov x29, sp + /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. + The calling convention of __libc_arm_za_disable allows to do + this thus allowing to avoid saving to and reading from stack. + As a result we also don't need to sign the return address and + check it after returning because it is not stored to stack. */ + mov x13, x30 + cfi_register (x30, x13) bl __libc_arm_za_disable - ldp x29, x30, [sp], 16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) -# if HAVE_AARCH64_PAC_RET - AUTIASP - cfi_window_save -# endif + mov x30, x13 + cfi_register (x13, x30) #endif + cfi_def_cfa (x0, 0) + cfi_offset (x19, JB_X19<<3) + cfi_offset (x20, JB_X20<<3) + cfi_offset (x21, JB_X21<<3) + cfi_offset (x22, JB_X22<<3) + cfi_offset (x23, JB_X23<<3) + cfi_offset (x24, JB_X24<<3) + cfi_offset (x25, JB_X25<<3) + cfi_offset (x26, JB_X26<<3) + cfi_offset (x27, JB_X27<<3) + cfi_offset (x28, JB_X28<<3) + cfi_offset (x29, JB_X29<<3) + cfi_offset (x30, JB_LR<<3) + + cfi_offset ( d8, JB_D8<<3) + cfi_offset ( d9, JB_D9<<3) + cfi_offset (d10, JB_D10<<3) + cfi_offset (d11, JB_D11<<3) + cfi_offset (d12, JB_D12<<3) + cfi_offset (d13, JB_D13<<3) + cfi_offset (d14, JB_D14<<3) + cfi_offset (d15, JB_D15<<3) + ldp x19, x20, [x0, #JB_X19<<3] ldp x21, x22, [x0, #JB_X21<<3] ldp x23, x24, [x0, #JB_X23<<3] diff --git a/sysdeps/aarch64/aarch64-gcs.h b/sysdeps/aarch64/aarch64-gcs.h index 162ef18..8e253ed 100644 --- a/sysdeps/aarch64/aarch64-gcs.h +++ b/sysdeps/aarch64/aarch64-gcs.h @@ -23,6 +23,21 @@ #include <stddef.h> #include <stdbool.h> -void *__alloc_gcs (size_t, void **, size_t *) attribute_hidden; +struct gcs_record +{ + void *gcs_base; + void *gcs_token; + size_t gcs_size; +}; + +void *__alloc_gcs (size_t, struct gcs_record *) attribute_hidden; + +static inline bool +has_gcs (void) +{ + register unsigned long x16 asm ("x16") = 1; + asm ("hint 40" /* chkfeat x16 */ : "+r" (x16)); + return x16 == 0; +} #endif diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure index 4bd5496..f364e65 100755 --- a/sysdeps/aarch64/configure +++ b/sysdeps/aarch64/configure @@ -185,219 +185,14 @@ else default-abi = lp64" fi -# Only consider BTI supported if -mbranch-protection=bti is -# on by default in the compiler and the linker produces -# binaries with GNU property notes in PT_GNU_PROPERTY segment. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BTI support" >&5 -printf %s "checking for BTI support... " >&6; } -if test ${libc_cv_aarch64_bti+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat > conftest.c <<EOF -void foo (void) { } -EOF - libc_cv_aarch64_bti=no - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } \ - && { ac_try='$READELF -lW conftest.so | grep -q GNU_PROPERTY' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } \ - && { ac_try='$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } - then - libc_cv_aarch64_bti=yes - fi - rm -rf conftest.* ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_bti" >&5 -printf "%s\n" "$libc_cv_aarch64_bti" >&6; } -config_vars="$config_vars -aarch64-bti = $libc_cv_aarch64_bti" -if test $libc_cv_aarch64_bti = yes; then - printf "%s\n" "#define HAVE_AARCH64_BTI 1" >>confdefs.h - -fi - -# Check if glibc is built with return address signing, i.e. -# if -mbranch-protection=pac-ret is on. We need this because -# pac-ret relies on unwinder support so it's not safe to use -# it in assembly code unconditionally, but there is no -# feature test macro for it in gcc. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if pac-ret is enabled" >&5 -printf %s "checking if pac-ret is enabled... " >&6; } -if test ${libc_cv_aarch64_pac_ret+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat > conftest.c <<EOF -int bar (void); -int foo (void) { return bar () + 1; } -EOF - libc_cv_aarch64_pac_ret=no - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } \ - && { ac_try='grep -q -E '\''(hint( | )+25|paciasp)'\'' conftest.s' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } - then - libc_cv_aarch64_pac_ret=yes - fi - rm -rf conftest.* ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_pac_ret" >&5 -printf "%s\n" "$libc_cv_aarch64_pac_ret" >&6; } -if test $libc_cv_aarch64_pac_ret = yes; then - printf "%s\n" "#define HAVE_AARCH64_PAC_RET 1" >>confdefs.h - -fi - -# Check if binutils supports variant PCS symbols. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for variant PCS support" >&5 -printf %s "checking for variant PCS support... " >&6; } -if test ${libc_cv_aarch64_variant_pcs+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat > conftest.S <<EOF -.global foo -.type foo, %function -.variant_pcs foo -foo: - ret -.global bar -.type bar, %function -bar: - b foo -EOF - libc_cv_aarch64_variant_pcs=no - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } \ - && { ac_try='$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } - then - libc_cv_aarch64_variant_pcs=yes - fi - rm -rf conftest.* ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_variant_pcs" >&5 -printf "%s\n" "$libc_cv_aarch64_variant_pcs" >&6; } -config_vars="$config_vars -aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs" - -# Check if asm support armv8.2-a+sve -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for SVE support in assembler" >&5 -printf %s "checking for SVE support in assembler... " >&6; } -if test ${libc_cv_aarch64_sve_asm+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat > conftest.s <<\EOF - .arch armv8.2-a+sve - ptrue p0.b -EOF -if { ac_try='${CC-cc} -c conftest.s 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - libc_cv_aarch64_sve_asm=yes -else - libc_cv_aarch64_sve_asm=no -fi -rm -f conftest* ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_asm" >&5 -printf "%s\n" "$libc_cv_aarch64_sve_asm" >&6; } -if test $libc_cv_aarch64_sve_asm = yes; then - printf "%s\n" "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h - -fi - if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -# Check if compiler supports SVE ACLE. -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for availability of SVE ACLE" >&5 -printf %s "checking for availability of SVE ACLE... " >&6; } -if test ${libc_cv_aarch64_sve_acle+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat > conftest.c <<EOF -#include <arm_sve.h> -EOF - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - libc_cv_aarch64_sve_acle=yes - else - libc_cv_aarch64_sve_acle=no - fi - rm conftest.c ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_sve_acle" >&5 -printf "%s\n" "$libc_cv_aarch64_sve_acle" >&6; } - -# Check if compiler is sufficient to build mathvec -if test $build_mathvec = yes; then - fail=no - if test $libc_cv_aarch64_variant_pcs = no; then - fail=yes - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but linker does not support variant PCS." >&5 -printf "%s\n" "$as_me: WARNING: mathvec is enabled but linker does not support variant PCS." >&2;} - fi - if test $libc_cv_aarch64_sve_asm = no; then - fail=yes - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but assembler does not support SVE." >&5 -printf "%s\n" "$as_me: WARNING: mathvec is enabled but assembler does not support SVE." >&2;} - fi - if test $libc_cv_aarch64_sve_acle = no; then - fail=yes - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&5 -printf "%s\n" "$as_me: WARNING: mathvec is enabled but compiler does not have SVE ACLE." >&2;} - fi - if test $fail = yes; then - as_fn_error $? "use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI)." "$LINENO" 5 - fi -else +if test $build_mathvec = no; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: mathvec is disabled, this results in incomplete ABI." >&5 printf "%s\n" "$as_me: WARNING: mathvec is disabled, this results in incomplete ABI." >&2;} fi +libc_cv_support_sframe=yes + diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac index 56d12d6..a9a1b74 100644 --- a/sysdeps/aarch64/configure.ac +++ b/sysdeps/aarch64/configure.ac @@ -24,119 +24,12 @@ else LIBC_CONFIG_VAR([default-abi], [lp64]) fi -# Only consider BTI supported if -mbranch-protection=bti is -# on by default in the compiler and the linker produces -# binaries with GNU property notes in PT_GNU_PROPERTY segment. -AC_CACHE_CHECK([for BTI support], [libc_cv_aarch64_bti], [dnl - cat > conftest.c <<EOF -void foo (void) { } -EOF - libc_cv_aarch64_bti=no - if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.c]) \ - && AC_TRY_COMMAND([$READELF -lW conftest.so | grep -q GNU_PROPERTY]) \ - && AC_TRY_COMMAND([$READELF -nW conftest.so | grep -q "NT_GNU_PROPERTY_TYPE_0.*AArch64 feature:.* BTI"]) - then - libc_cv_aarch64_bti=yes - fi - rm -rf conftest.*]) -LIBC_CONFIG_VAR([aarch64-bti], [$libc_cv_aarch64_bti]) -if test $libc_cv_aarch64_bti = yes; then - AC_DEFINE(HAVE_AARCH64_BTI) -fi - -# Check if glibc is built with return address signing, i.e. -# if -mbranch-protection=pac-ret is on. We need this because -# pac-ret relies on unwinder support so it's not safe to use -# it in assembly code unconditionally, but there is no -# feature test macro for it in gcc. -AC_CACHE_CHECK([if pac-ret is enabled], [libc_cv_aarch64_pac_ret], [dnl - cat > conftest.c <<EOF -int bar (void); -int foo (void) { return bar () + 1; } -EOF - libc_cv_aarch64_pac_ret=no - if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \ - && AC_TRY_COMMAND([grep -q -E '\''(hint( | )+25|paciasp)'\'' conftest.s]) - then - libc_cv_aarch64_pac_ret=yes - fi - rm -rf conftest.*]) -if test $libc_cv_aarch64_pac_ret = yes; then - AC_DEFINE(HAVE_AARCH64_PAC_RET) -fi - -# Check if binutils supports variant PCS symbols. -AC_CACHE_CHECK([for variant PCS support], [libc_cv_aarch64_variant_pcs], [dnl - cat > conftest.S <<EOF -.global foo -.type foo, %function -.variant_pcs foo -foo: - ret -.global bar -.type bar, %function -bar: - b foo -EOF - libc_cv_aarch64_variant_pcs=no - if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostdlib -nostartfiles $no_ssp -shared -fPIC -o conftest.so conftest.S]) \ - && AC_TRY_COMMAND([$READELF -dW conftest.so | grep -q AARCH64_VARIANT_PCS]) - then - libc_cv_aarch64_variant_pcs=yes - fi - rm -rf conftest.*]) -LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) - -# Check if asm support armv8.2-a+sve -AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl -cat > conftest.s <<\EOF - .arch armv8.2-a+sve - ptrue p0.b -EOF -if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then - libc_cv_aarch64_sve_asm=yes -else - libc_cv_aarch64_sve_asm=no -fi -rm -f conftest*]) -if test $libc_cv_aarch64_sve_asm = yes; then - AC_DEFINE(HAVE_AARCH64_SVE_ASM) -fi - if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -# Check if compiler supports SVE ACLE. -AC_CACHE_CHECK(for availability of SVE ACLE, libc_cv_aarch64_sve_acle, [dnl - cat > conftest.c <<EOF -#include <arm_sve.h> -EOF - if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fsyntax-only -ffreestanding conftest.c]); then - libc_cv_aarch64_sve_acle=yes - else - libc_cv_aarch64_sve_acle=no - fi - rm conftest.c]) - -# Check if compiler is sufficient to build mathvec -if test $build_mathvec = yes; then - fail=no - if test $libc_cv_aarch64_variant_pcs = no; then - fail=yes - AC_MSG_WARN([mathvec is enabled but linker does not support variant PCS.]) - fi - if test $libc_cv_aarch64_sve_asm = no; then - fail=yes - AC_MSG_WARN([mathvec is enabled but assembler does not support SVE.]) - fi - if test $libc_cv_aarch64_sve_acle = no; then - fail=yes - AC_MSG_WARN([mathvec is enabled but compiler does not have SVE ACLE.]) - fi - if test $fail = yes; then - AC_MSG_ERROR([use a compatible toolchain or configure with --disable-mathvec (this results in incomplete ABI).]) - fi -else +if test $build_mathvec = no; then AC_MSG_WARN([mathvec is disabled, this results in incomplete ABI.]) fi + +libc_cv_support_sframe=yes diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S index 0c3ee40..e9e530c 100644 --- a/sysdeps/aarch64/crti.S +++ b/sysdeps/aarch64/crti.S @@ -65,7 +65,7 @@ call_weak_fn: cbz x0, 1f b PREINIT_FUNCTION 1: - RET + ret .size call_weak_fn, .-call_weak_fn #endif @@ -75,11 +75,7 @@ call_weak_fn: .hidden _init .type _init, %function _init: -#if HAVE_AARCH64_PAC_RET - PACIASP -#else - BTI_C -#endif + paciasp stp x29, x30, [sp, -16]! mov x29, sp #if PREINIT_FUNCTION_WEAK @@ -94,10 +90,6 @@ _init: .hidden _fini .type _fini, %function _fini: -#if HAVE_AARCH64_PAC_RET - PACIASP -#else - BTI_C -#endif + paciasp stp x29, x30, [sp, -16]! mov x29, sp diff --git a/sysdeps/aarch64/crtn.S b/sysdeps/aarch64/crtn.S index b52b10e..653a548 100644 --- a/sysdeps/aarch64/crtn.S +++ b/sysdeps/aarch64/crtn.S @@ -41,14 +41,10 @@ .section .init,"ax",%progbits ldp x29, x30, [sp], 16 -#if HAVE_AARCH64_PAC_RET - AUTIASP -#endif - RET + autiasp + ret .section .fini,"ax",%progbits ldp x29, x30, [sp], 16 -#if HAVE_AARCH64_PAC_RET - AUTIASP -#endif - RET + autiasp + ret diff --git a/sysdeps/aarch64/dl-irel.h b/sysdeps/aarch64/dl-irel.h index ae402bc..7bae3c3 100644 --- a/sysdeps/aarch64/dl-irel.h +++ b/sysdeps/aarch64/dl-irel.h @@ -21,11 +21,26 @@ #define _DL_IREL_H #include <stdio.h> -#include <unistd.h> #include <ldsodefs.h> -#include <sysdep.h> #include <sys/ifunc.h> +#define _IFUNC_ARG_SIZE_VER0 24 /* sizeof 1st published __ifunc_arg_t */ +#define _IFUNC_ARG_SIZE_VER1 40 /* sizeof 2nd published __ifunc_arg_t */ + +#define sizeof_field(TYPE, MEMBER) sizeof ((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof (TYPE, MEMBER) + sizeof_field (TYPE, MEMBER)) + +_Static_assert (sizeof (__ifunc_arg_t) == _IFUNC_ARG_SIZE_VER1, + "sizeof (__ifunc_arg_t) != _IFUNC_ARG_SIZE_VER1"); + +_Static_assert (_IFUNC_ARG_SIZE_VER1 + == (_IFUNC_HWCAP_MAX + 1) * sizeof (unsigned long), + "_IFUNC_ARG_SIZE_VER1 and _IFUNC_HWCAP_MAX mismatch"); + +#undef offsetofend +#undef sizeof_field + #define ELF_MACHINE_IRELA 1 static inline ElfW(Addr) @@ -37,6 +52,8 @@ elf_ifunc_invoke (ElfW(Addr) addr) arg._size = sizeof (arg); arg._hwcap = GLRO(dl_hwcap); arg._hwcap2 = GLRO(dl_hwcap2); + arg._hwcap3 = GLRO(dl_hwcap3); + arg._hwcap4 = GLRO(dl_hwcap4); return ((ElfW(Addr) (*) (uint64_t, const __ifunc_arg_t *)) (addr)) (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg); } diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S index fc40d66..2ff8d95 100644 --- a/sysdeps/aarch64/dl-tlsdesc.S +++ b/sysdeps/aarch64/dl-tlsdesc.S @@ -74,9 +74,9 @@ cfi_startproc .align 2 _dl_tlsdesc_return: - BTI_C + bti c ldr x0, [x0, 8] - RET + ret cfi_endproc .size _dl_tlsdesc_return, .-_dl_tlsdesc_return @@ -95,7 +95,7 @@ _dl_tlsdesc_return: cfi_startproc .align 2 _dl_tlsdesc_undefweak: - BTI_C + bti c str x1, [sp, #-16]! cfi_adjust_cfa_offset (16) ldr x0, [x0, 8] @@ -103,7 +103,7 @@ _dl_tlsdesc_undefweak: sub x0, x0, x1 ldr x1, [sp], #16 cfi_adjust_cfa_offset (-16) - RET + ret cfi_endproc .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak @@ -141,12 +141,8 @@ _dl_tlsdesc_undefweak: cfi_startproc .align 2 _dl_tlsdesc_dynamic: -# if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -# else - BTI_C -# endif + paciasp + cfi_negate_ra_state /* Save just enough registers to support fast path, if we fall into slow path we will save additional registers. */ @@ -177,12 +173,10 @@ _dl_tlsdesc_dynamic: 1: ldp x3, x4, [sp, #16] ldp x1, x2, [sp], #32 -# if HAVE_AARCH64_PAC_RET - AUTIASP - cfi_window_save -# endif + autiasp + cfi_negate_ra_state cfi_adjust_cfa_offset (-32) - RET + ret 2: /* This is the slow path. We need to call __tls_get_addr() which means we need to save and restore all the register that the diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S index d6bed96..d628b01 100644 --- a/sysdeps/aarch64/dl-trampoline.S +++ b/sysdeps/aarch64/dl-trampoline.S @@ -34,7 +34,7 @@ cfi_startproc .align 2 _dl_runtime_resolve: - BTI_C + bti c /* AArch64 we get called with: ip0 &PLTGOT[2] ip1 temp(dl resolver entry point) @@ -127,12 +127,8 @@ _dl_runtime_resolve: cfi_startproc .align 2 _dl_runtime_profile: -# if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -# else - BTI_C -# endif + paciasp + cfi_negate_ra_state /* AArch64 we get called with: ip0 &PLTGOT[2] ip1 temp(dl resolver entry point) @@ -251,17 +247,12 @@ _dl_runtime_profile: cfi_restore(x29) cfi_restore(x30) -# if HAVE_AARCH64_PAC_RET add sp, sp, SF_SIZE cfi_adjust_cfa_offset (-SF_SIZE) - AUTIASP - cfi_window_save + autiasp + cfi_negate_ra_state add sp, sp, 16 cfi_adjust_cfa_offset (-16) -# else - add sp, sp, SF_SIZE + 16 - cfi_adjust_cfa_offset (- SF_SIZE - 16) -# endif /* Jump to the newly found address. */ br ip0 @@ -321,10 +312,8 @@ _dl_runtime_profile: /* LR from within La_aarch64_reg */ ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] cfi_restore(lr) -# if HAVE_AARCH64_PAC_RET /* Note: LR restored from La_aarch64_reg has no PAC. */ - cfi_window_save -# endif + cfi_negate_ra_state mov sp, x29 cfi_def_cfa_register (sp) ldr x29, [x29, #0] diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index aadedf1..068c11c 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -1,10 +1,14 @@ libmvec-supported-funcs = acos \ acosh \ + acospi \ asin \ asinh \ + asinpi \ atan \ atanh \ + atanpi \ atan2 \ + atan2pi \ cbrt \ cos \ cosh \ @@ -52,8 +56,11 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \ v_powf_data endif -sve-cflags = -march=armv8-a+sve +# Enable SVE for building libmvec. Since CFLAGS may contain a -mcpu or -march, +# add a generic -mcpu and -march with SVE enabled. Also use a tune for a modern +# SVE core. +sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1 ifeq ($(build-mathvec),yes) bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \ diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index 0f9503f..2980cb7 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -157,4 +157,26 @@ libmvec { _ZGVsMxv_tanpi; _ZGVsMxv_tanpif; } + GLIBC_2.42 { + _ZGVnN2v_acospi; + _ZGVnN2v_acospif; + _ZGVnN4v_acospif; + _ZGVsMxv_acospi; + _ZGVsMxv_acospif; + _ZGVnN2v_asinpi; + _ZGVnN2v_asinpif; + _ZGVnN4v_asinpif; + _ZGVsMxv_asinpi; + _ZGVsMxv_asinpif; + _ZGVnN2v_atanpi; + _ZGVnN2v_atanpif; + _ZGVnN4v_atanpif; + _ZGVsMxv_atanpi; + _ZGVsMxv_atanpif; + _ZGVnN2vv_atan2pi; + _ZGVnN2vv_atan2pif; + _ZGVnN4vv_atan2pif; + _ZGVsMxvv_atan2pi; + _ZGVsMxvv_atan2pif; + } } diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c index 7709b54..453f780 100644 --- a/sysdeps/aarch64/fpu/acos_advsimd.c +++ b/sysdeps/aarch64/fpu/acos_advsimd.c @@ -18,24 +18,23 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { - float64x2_t poly[12]; - float64x2_t pi, pi_over_2; + double c1, c3, c5, c7, c9, c11; + float64x2_t c0, c2, c4, c6, c8, c10; uint64x2_t abs_mask; + float64x2_t pi, pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), - V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), - V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), - V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), - V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), - V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, - .pi = V2 (0x1.921fb54442d18p+1), - .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, + .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, + .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, + .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, + .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, + .pi = V2 (0x1.921fb54442d18p+1), .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), }; @@ -63,7 +62,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) acos(x) ~ pi/2 - (x + x^3 P(x^2)). - The largest observed error in this region is 1.18 ulps, + The largest observed error in this region is 1.18 ulp: _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0 want 0x1.0d54d1985c069p+0. @@ -71,9 +70,9 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 1.52 ulps, - _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1 - want 0x1.edbbedf8a7d6cp-1. */ + The largest observed error in this region is 1.50 ulp: + _ZGVnN2v_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 + want 0x1.ec1a46aa829p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -99,13 +98,32 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2)); /* Use a single polynomial approximation P for both intervals. */ + float64x2_t z3 = vmulq_f64 (z2, z); float64x2_t z4 = vmulq_f64 (z2, z2); float64x2_t z8 = vmulq_f64 (z4, z4); - float64x2_t z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); - /* Finalize polynomial: z + z * z2 * P(z2). */ - p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); + /* Order-11 Estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p411 = vfmaq_f64 (p47, z8, p811); + float64x2_t p = vfmaq_f64 (p03, z8, p411); + + /* Finalize polynomial: z + z3 * P(z2). */ + p = vfmaq_f64 (z, z3, p); /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 = 2 Q(|x|) , for 0.5 < x < 1.0 diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c index 74e2f7d..104f0d7 100644 --- a/sysdeps/aarch64/fpu/acos_sve.c +++ b/sysdeps/aarch64/fpu/acos_sve.c @@ -18,20 +18,21 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[12]; - float64_t pi, pi_over_2; + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5, - 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, - 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8, - 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, - .pi = 0x1.921fb54442d18p+1, + .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, + .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, + .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, + .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, + .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, .pi_over_2 = 0x1.921fb54442d18p+0, }; @@ -42,20 +43,21 @@ static const struct data acos(x) ~ pi/2 - (x + x^3 P(x^2)). - The largest observed error in this region is 1.18 ulps, - _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0 - want 0x1.0d4d0f55667f7p+0. + The largest observed error in this region is 1.18 ulp: + _ZGVsMxv_acos (0x1.fbb7c9079b429p-2) got 0x1.0d51266607582p+0 + want 0x1.0d51266607583p+0. For |x| in [0.5, 1.0], use same approximation with a change of variable acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 1.52 ulps, - _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1 - want 0x1.ed82df4243f0bp-1. */ + The largest observed error in this region is 1.50 ulp: + _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 + want 0x1.ec1a46aa829p-1. */ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); svfloat64_t ax = svabs_x (pg, x); @@ -70,24 +72,41 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svsqrt_m (ax, a_gt_half, z2); /* Use a single polynomial approximation P for both intervals. */ - svfloat64_t z4 = svmul_x (pg, z2, z2); - svfloat64_t z8 = svmul_x (pg, z4, z4); - svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); + svfloat64_t z3 = svmul_x (ptrue, z2, z); + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmad_x (pg, p411, z8, p03); /* Finalize polynomial: z + z * z2 * P(z2). */ - p = svmla_x (pg, z, svmul_x (pg, z, z2), p); + p = svmad_x (pg, p, z3, z); /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 = 2 Q(|x|) , for 0.5 < x < 1.0 = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */ - svfloat64_t y - = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign)); - - svbool_t is_neg = svcmplt (pg, x, 0.0); - svfloat64_t off = svdup_f64_z (is_neg, d->pi); - svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0)); - svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2)); - - return svmla_x (pg, add, mul, y); + svfloat64_t mul = svreinterpret_f64 ( + svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10)); + mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign)); + svfloat64_t add = svreinterpret_f64 ( + svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->pi_over_2)))); + add = svsub_m (a_gt_half, sv_f64 (d->pi_over_2), add); + + return svmsb_x (pg, p, mul, add); } diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c index 326b2cc..3a84959 100644 --- a/sysdeps/aarch64/fpu/acosh_sve.c +++ b/sysdeps/aarch64/fpu/acosh_sve.c @@ -30,10 +30,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) } /* SVE approximation for double-precision acosh, based on log1p. - The largest observed error is 3.19 ULP in the region where the + The largest observed error is 3.14 ULP in the region where the argument to log1p falls in the k=0 interval, i.e. x close to 1: - SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2 - want 0x1.ed23399f51373p-2. */ + SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2 + want 0x1.ef0cee7c33ce4p-2. */ svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg) { /* (ix - One) >= (BigBound - One). */ diff --git a/sysdeps/aarch64/fpu/acospi_advsimd.c b/sysdeps/aarch64/fpu/acospi_advsimd.c new file mode 100644 index 0000000..bb6c209 --- /dev/null +++ b/sysdeps/aarch64/fpu/acospi_advsimd.c @@ -0,0 +1,118 @@ +/* Double-Precision vector (Advanced SIMD) inverse cospi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float64x2_t c0, c2, c4, c6, c8, c10; + uint64x2_t abs_mask; + float64x2_t one, inv_pi; + double c1, c3, c5, c7, c9, c11; +} data = { + /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2) + on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using + iterative approach for minimisation of relative error in asinpif Sollya + file. */ + .c0 = V2 (0x1.b2995e7b7b5fbp-5), .c1 = 0x1.8723a1d58d83p-6, + .c2 = V2 (0x1.d1a452eacf2fep-7), .c3 = 0x1.3ce52c4d75582p-7, + .c4 = V2 (0x1.d2b2a0aea27d5p-8), .c5 = 0x1.6a0b9b92cad8bp-8, + .c6 = V2 (0x1.2290c84438caep-8), .c7 = 0x1.efba896580d02p-9, + .c8 = V2 (0x1.44446707af38p-9), .c9 = 0x1.5070b3e7aa03ep-8, + .c10 = V2 (-0x1.c70015d0ebdafp-9), .c11 = 0x1.27029c383fed9p-7, + .abs_mask = V2 (0x7fffffffffffffff), .one = V2 (1.0), + .inv_pi = V2 (0x1.45f306dc9c883p-2), +}; + +/* Double-precision implementation of vector acospi(x). + + For |x| in [0, 0.5], use order-11 polynomial P to approximate asinpi + such that the final approximation of acospi is an odd polynomial: + + acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)). + + The largest observed error in this region is 1.35 ulp: + _ZGVnN2v_acospi (0x1.fb16ed35a6d64p-2) got 0x1.5722a3dbcafb4p-2 + want 0x1.5722a3dbcafb5p-2. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + acospi(x) = y/pi + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 2.55 ulp: + _ZGVnN2v_acospi (0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4 + want 0x1.ffd43d5dd3a9bp-4. */ +float64x2_t VPCS_ATTR NOINLINE V_NAME_D1 (acospi) (float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint64x2_t ix = vreinterpretq_u64_f64 (x); + uint64x2_t ia = vandq_u64 (ix, d->abs_mask); + + float64x2_t ax = vreinterpretq_f64_u64 (ia); + uint64x2_t a_le_half = vcaltq_f64 (x, v_f64 (0.5)); + + /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with + z2 = x ^ 2 and z = |x| , if |x| < 0.5 + z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */ + float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x), + vfmsq_n_f64 (v_f64 (0.5), ax, 0.5)); + float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + + /* Order-11 Estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p411 = vfmaq_f64 (p47, z8, p811); + float64x2_t p = vfmaq_f64 (p03, z8, p411); + + /* Finalize polynomial: z + z * z2 * P(z2). */ + p = vfmaq_f64 (d->inv_pi, z2, p); + p = vmulq_f64 (p, z); + + /* acospi(|x|) + = 1/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 + = 1 - 2 Q(|x|) , for -1.0 < x < -0.5. */ + float64x2_t y = vbslq_f64 (d->abs_mask, p, x); + uint64x2_t is_neg = vcltzq_f64 (x); + float64x2_t off = vreinterpretq_f64_u64 ( + vandq_u64 (is_neg, vreinterpretq_u64_f64 (d->one))); + float64x2_t mul = vbslq_f64 (a_le_half, d->one, v_f64 (-2.0)); + float64x2_t add = vbslq_f64 (a_le_half, v_f64 (0.5), off); + + return vfmsq_f64 (add, mul, y); +} diff --git a/sysdeps/aarch64/fpu/acospi_sve.c b/sysdeps/aarch64/fpu/acospi_sve.c new file mode 100644 index 0000000..e41eaad --- /dev/null +++ b/sysdeps/aarch64/fpu/acospi_sve.c @@ -0,0 +1,112 @@ +/* Double-Precision vector (SVE) inverse cospi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t inv_pi, half; +} data = { + /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2) + on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using + iterative approach for minimisation of relative error in asinpif Sollya + file. */ + .c0 = 0x1.b2995e7b7b5fbp-5, .c1 = 0x1.8723a1d58d83p-6, + .c2 = 0x1.d1a452eacf2fep-7, .c3 = 0x1.3ce52c4d75582p-7, + .c4 = 0x1.d2b2a0aea27d5p-8, .c5 = 0x1.6a0b9b92cad8bp-8, + .c6 = 0x1.2290c84438caep-8, .c7 = 0x1.efba896580d02p-9, + .c8 = 0x1.44446707af38p-9, .c9 = 0x1.5070b3e7aa03ep-8, + .c10 = -0x1.c70015d0ebdafp-9, .c11 = 0x1.27029c383fed9p-7, + .inv_pi = 0x1.45f306dc9c883p-2, .half = 0.5, +}; + +/* Double-precision SVE implementation of vector acospi(x). + + For |x| in [0, 0.5], use order 11 polynomial P to approximate asinpi + such that the final approximation of acospi is: + + acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)). + + The largest observed error in this region is 1.35 ulp: + _ZGVsMxv_acospi (0x1.fb014996aea18p-2) got 0x1.572a91755bbf6p-2 + want 0x1.572a91755bbf7p-2. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + acospi(x) = y/pi + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 2.55 ulp: + _ZGVsMxv_acospi(0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4 + want 0x1.ffd43d5dd3a9bp-4. */ +svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); + + svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); + svfloat64_t ax = svabs_x (pg, x); + svbool_t a_gt_half = svacgt (pg, x, 0.5f); + + /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with + z2 = x ^ 2 and z = |x| , if |x| < 0.5 + z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */ + svfloat64_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5), + svmul_x (ptrue, x, x)); + svfloat64_t z = svsqrt_m (ax, a_gt_half, z2); + + /* Order-11 Estrin. */ + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmla_x (pg, p03, z8, p411); + + p = svmla_x (pg, sv_f64 (d->inv_pi), z2, p); + p = svmul_x (ptrue, p, z); + + /* acospi(|x|) = 1/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 + = 1 - 2 Q(|x|) , for -1.0 < x < -0.5. */ + svfloat64_t mul = svreinterpret_f64 ( + svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10)); + mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign)); + svfloat64_t add = svreinterpret_f64 ( + svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->half)))); + add = svsub_m (a_gt_half, sv_f64 (d->half), add); + + return svmsb_x (pg, p, mul, add); +} diff --git a/sysdeps/aarch64/fpu/acospif_advsimd.c b/sysdeps/aarch64/fpu/acospif_advsimd.c new file mode 100644 index 0000000..8486b62 --- /dev/null +++ b/sysdeps/aarch64/fpu/acospif_advsimd.c @@ -0,0 +1,106 @@ +/* Single-Precision vector (Advanced SIMD) inverse cospi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float32x4_t c0, c2, c4, inv_pi; + float c1, c3, c5, null; +} data = { + /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2) + on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using + iterative approach for minimisation of relative error in asinpif Sollya + file. */ + .c0 = V4 (0x1.b2995ep-5f), .c1 = 0x1.8724ep-6f, + .c2 = V4 (0x1.d1301ep-7f), .c3 = 0x1.446d3cp-7f, + .c4 = V4 (0x1.654848p-8f), .c5 = 0x1.5fdaa8p-7f, + .inv_pi = V4 (0x1.45f306p-2f), +}; + +#define AbsMask 0x7fffffff + +/* Single-precision implementation of vector acospi(x). + + For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi + such that the final approximation of acospi is an odd polynomial: + + acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)). + + The largest observed error in this region is 1.23 ulps, + _ZGVnN4v_acospif (0x1.fee13ep-2) got 0x1.55beb4p-2 want 0x1.55beb2p-2. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + acospi(x) = y/pi + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 2.53 ulps, + _ZGVnN4v_acospif (0x1.6ad644p-1) got 0x1.fe8f96p-3 + want 0x1.fe8f9cp-3. */ +float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (acospi) (float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint32x4_t ix = vreinterpretq_u32_f32 (x); + uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask)); + + float32x4_t ax = vreinterpretq_f32_u32 (ia); + uint32x4_t a_le_half = vcaltq_f32 (x, v_f32 (0.5f)); + + /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with + z2 = x ^ 2 and z = |x| , if |x| < 0.5 + z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */ + + float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x), + vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f)); + float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ + + /* Order-5 Estrin evaluation scheme. */ + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); + float32x4_t c135 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2); + float32x4_t p = vfmaq_f32 (p03, z8, p45); + /* Add 1/pi as final coeff. */ + p = vfmaq_f32 (d->inv_pi, z2, p); + + /* Finalize polynomial: z * P(z^2). */ + p = vmulq_f32 (z, p); + + /* acospi(|x|) + = 1/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 + = 1 - 2 Q(|x|) , for -1.0 < x < -0.5. */ + + float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x); + uint32x4_t is_neg = vcltzq_f32 (x); + float32x4_t off = vreinterpretq_f32_u32 ( + vandq_u32 (vreinterpretq_u32_f32 (v_f32 (1.0f)), is_neg)); + float32x4_t mul = vbslq_f32 (a_le_half, v_f32 (1.0f), v_f32 (-2.0f)); + float32x4_t add = vbslq_f32 (a_le_half, v_f32 (0.5f), off); + + return vfmsq_f32 (add, mul, y); +} +libmvec_hidden_def (V_NAME_F1 (acospi)) +HALF_WIDTH_ALIAS_F1 (acospi) diff --git a/sysdeps/aarch64/fpu/acospif_sve.c b/sysdeps/aarch64/fpu/acospif_sve.c new file mode 100644 index 0000000..ea4fc4a --- /dev/null +++ b/sysdeps/aarch64/fpu/acospif_sve.c @@ -0,0 +1,91 @@ +/* Single-Precision vector (SVE) inverse cospi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float32_t c0, c1, c2, c3, c4, inv_pi, half; +} data = { + /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2) + on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using + iterative approach for minimisation of relative error. */ + .c0 = 0x1.b29968p-5f, .c1 = 0x1.871424p-6f, .c2 = 0x1.d56e44p-7f, + .c3 = 0x1.149bb8p-7f, .c4 = 0x1.8e07fep-7f, .inv_pi = 0x1.45f306p-2f, + .half = 0.5f, +}; + +/* Single-precision SVE implementation of vector acospi(x). + + For |x| in [0, 0.5], use order 5 polynomial P to approximate asinpi + such that the final approximation of acospi is: + + acospi(x) ~ 1/2 - (x/pi + x^3 P(x^2)). + + The largest observed error in this region is 1.3 ulps, + _ZGVsMxv_acospif(0x1.ffa9d2p-2) got 0x1.557504p-2 + want 0x1.557502p-2. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + acospi(x) = y/pi + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 2.61 ulps, + _ZGVsMxv_acospif (0x1.6b232ep-1) got 0x1.fe04bap-3 + want 0x1.fe04cp-3. */ +svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + svbool_t ptrue = svptrue_b32 (); + + svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000); + svfloat32_t ax = svabs_x (pg, x); + svbool_t a_gt_half = svacgt (pg, x, 0.5f); + + /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with + z2 = x ^ 2 and z = |x| , if |x| < 0.5 + z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */ + svfloat32_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f32 (0.5f), ax, 0.5f), + svmul_x (ptrue, x, x)); + svfloat32_t z = svsqrt_m (ax, a_gt_half, z2); + + /* Use a single polynomial approximation P for both intervals. */ + svfloat32_t p = svmla_x (pg, sv_f32 (d->c3), z2, d->c4); + p = svmad_x (pg, z2, p, d->c2); + p = svmad_x (pg, z2, p, d->c1); + p = svmad_x (pg, z2, p, d->c0); + /* Add 1/pi as final coeff. */ + p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p); + /* Finalize polynomial: z * P(z^2). */ + p = svmul_x (ptrue, z, p); + + /* acospi(|x|) + = 1/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 + = 1 - 2 Q(|x|) , for -1.0 < x < -0.5. */ + svfloat32_t y + = svreinterpret_f32 (svorr_x (ptrue, svreinterpret_u32 (p), sign)); + svfloat32_t mul = svsel (a_gt_half, sv_f32 (2.0f), sv_f32 (-1.0f)); + svfloat32_t add = svreinterpret_f32 ( + svorr_x (ptrue, sign, svreinterpret_u32 (sv_f32 (d->half)))); + add = svsub_m (a_gt_half, sv_f32 (d->half), add); + + return svmad_x (pg, y, mul, add); +} diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 38681a4..c202bda 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -19,10 +19,13 @@ libmvec_hidden_proto (V_NAME_F1(acos)); libmvec_hidden_proto (V_NAME_F1(acosh)); +libmvec_hidden_proto (V_NAME_F1(acospi)); libmvec_hidden_proto (V_NAME_F1(asin)); libmvec_hidden_proto (V_NAME_F1(asinh)); +libmvec_hidden_proto (V_NAME_F1(asinpi)); libmvec_hidden_proto (V_NAME_F1(atan)); libmvec_hidden_proto (V_NAME_F1(atanh)); +libmvec_hidden_proto (V_NAME_F1(atanpi)); libmvec_hidden_proto (V_NAME_F1(cbrt)); libmvec_hidden_proto (V_NAME_F1(cos)); libmvec_hidden_proto (V_NAME_F1(cosh)); @@ -47,3 +50,4 @@ libmvec_hidden_proto (V_NAME_F1(tan)); libmvec_hidden_proto (V_NAME_F1(tanh)); libmvec_hidden_proto (V_NAME_F1(tanpi)); libmvec_hidden_proto (V_NAME_F2(atan2)); +libmvec_hidden_proto (V_NAME_F2(atan2pi)); diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c index 4142116..f74141c 100644 --- a/sysdeps/aarch64/fpu/asin_advsimd.c +++ b/sysdeps/aarch64/fpu/asin_advsimd.c @@ -18,24 +18,23 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { - float64x2_t poly[12]; + float64x2_t c0, c2, c4, c6, c8, c10; float64x2_t pi_over_2; uint64x2_t abs_mask; + double c1, c3, c5, c7, c9, c11; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), - V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), - V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), - V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), - V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), - V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, - .pi_over_2 = V2 (0x1.921fb54442d18p+0), - .abs_mask = V2 (0x7fffffffffffffff), + .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, + .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, + .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, + .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, + .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), }; #define AllMask v_u64 (0xffffffffffffffff) @@ -68,8 +67,8 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). The largest observed error in this region is 2.69 ulps, - _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 - want 0x1.110d7e85fdd53p-1. */ + _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1 + want 0x1.1111dd54ddf99p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -86,7 +85,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) return special_case (x, x, AllMask); #endif - uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5)); + uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5)); /* Evaluate polynomial Q(x) = y + y * z * P(z) with z = x ^ 2 and y = |x| , if |x| < 0.5 @@ -99,7 +98,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) float64x2_t z4 = vmulq_f64 (z2, z2); float64x2_t z8 = vmulq_f64 (z4, z4); float64x2_t z16 = vmulq_f64 (z8, z8); - float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); + + /* order-11 estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + float64x2_t p = vfmaq_f64 (p07, z16, p811); /* Finalize polynomial: z + z * z2 * P(z2). */ p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c index 9314466..975f408 100644 --- a/sysdeps/aarch64/fpu/asin_sve.c +++ b/sysdeps/aarch64/fpu/asin_sve.c @@ -18,45 +18,43 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[12]; - float64_t pi_over_2f; + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t pi_over_2; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ - .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, - 0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6, - 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, - 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, - 0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6, - -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, - .pi_over_2f = 0x1.921fb54442d18p+0, + .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, + .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, + .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, + .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, + .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = 0x1.921fb54442d18p+0, }; -#define P(i) sv_f64 (d->poly[i]) - /* Double-precision SVE implementation of vector asin(x). For |x| in [0, 0.5], use an order 11 polynomial P such that the final approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). - The largest observed error in this region is 0.52 ulps, - _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2 - want 0x1.ec13757305f26p-2. - - For |x| in [0.5, 1.0], use same approximation with a change of variable + The largest observed error in this region is 0.98 ulp: + _ZGVsMxv_asin (0x1.d98f6a748ed8ap-2) got 0x1.ec4eb661a73d3p-2 + want 0x1.ec4eb661a73d2p-2. - asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + For |x| in [0.5, 1.0], use same approximation with a change of variable: + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). - The largest observed error in this region is 2.69 ulps, - _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 - want 0x1.110d7e85fdd53p-1. */ + The largest observed error in this region is 2.66 ulp: + _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1 + want 0x1.10b9586f087abp-1. */ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); svfloat64_t ax = svabs_x (pg, x); @@ -70,17 +68,37 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svsqrt_m (ax, a_ge_half, z2); /* Use a single polynomial approximation P for both intervals. */ + svfloat64_t z3 = svmul_x (pg, z2, z); svfloat64_t z4 = svmul_x (pg, z2, z2); svfloat64_t z8 = svmul_x (pg, z4, z4); - svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + /* Order-11 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmla_x (pg, p03, z8, p411); + /* Finalize polynomial: z + z * z2 * P(z2). */ - p = svmla_x (pg, z, svmul_x (pg, z, z2), p); + p = svmla_x (pg, z, z3, p); - /* asin(|x|) = Q(|x|) , for |x| < 0.5 - = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ - svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f); + /* asin(|x|) = Q(|x|), for |x| < 0.5 + = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ + svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2); - /* Copy sign. */ + /* Reinsert the sign from the argument. */ return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)); } diff --git a/sysdeps/aarch64/fpu/asinf_advsimd.c b/sysdeps/aarch64/fpu/asinf_advsimd.c index 52c7c0e..013936c 100644 --- a/sysdeps/aarch64/fpu/asinf_advsimd.c +++ b/sysdeps/aarch64/fpu/asinf_advsimd.c @@ -18,22 +18,21 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - float32x4_t poly[5]; + float32x4_t c0, c2, c4; + float c1, c3; float32x4_t pi_over_2f; } data = { /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */ - .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5), - V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) }, - .pi_over_2f = V4 (0x1.921fb6p+0f), + .c0 = V4 (0x1.55555ep-3f), .c1 = 0x1.33261ap-4f, + .c2 = V4 (0x1.70d7dcp-5f), .c3 = 0x1.b059dp-6f, + .c4 = V4 (0x1.3af7d8p-5f), .pi_over_2f = V4 (0x1.921fb6p+0f), }; #define AbsMask 0x7fffffff -#define Half 0x3f000000 #define One 0x3f800000 #define Small 0x39800000 /* 2^-12. */ @@ -47,11 +46,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) /* Single-precision implementation of vector asin(x). - For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct - rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the - following approximation. - For |x| in [Small, 0.5], use order 4 polynomial P such that the final + For |x| <0.5, use order 4 polynomial P such that the final approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). The largest observed error in this region is 0.83 ulps, @@ -80,24 +76,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x) #endif float32x4_t ax = vreinterpretq_f32_u32 (ia); - uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half)); + uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f)); /* Evaluate polynomial Q(x) = y + y * z * P(z) with z = x ^ 2 and y = |x| , if |x| < 0.5 z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x), - vfmsq_n_f32 (v_f32 (0.5), ax, 0.5)); + vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f)); float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2)); /* Use a single polynomial approximation P for both intervals. */ - float32x4_t p = v_horner_4_f32 (z2, d->poly); + + /* PW Horner 3 evaluation scheme. */ + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t c13 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c13, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c13, 1); + float32x4_t p = vfmaq_f32 (p23, d->c4, z4); + p = vfmaq_f32 (p01, p, z4); /* Finalize polynomial: z + z * z2 * P(z2). */ p = vfmaq_f32 (z, vmulq_f32 (z, z2), p); /* asin(|x|) = Q(|x|) , for |x| < 0.5 = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ float32x4_t y - = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0)); + = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0f)); /* Copy sign. */ return vbslq_f32 (v_u32 (AbsMask), y, x); diff --git a/sysdeps/aarch64/fpu/asinpi_advsimd.c b/sysdeps/aarch64/fpu/asinpi_advsimd.c new file mode 100644 index 0000000..b11f98b --- /dev/null +++ b/sysdeps/aarch64/fpu/asinpi_advsimd.c @@ -0,0 +1,109 @@ +/* Double-Precision vector (Advanced SIMD) inverse sinpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float64x2_t c0, c2, c4, c6, c8, c10; + float64x2_t pi_over_2, inv_pi; + uint64x2_t abs_mask; + double c1, c3, c5, c7, c9, c11; +} data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ + .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, + .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, + .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, + .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, + .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), + .inv_pi = V2 (0x1.45f306dc9c883p-2), +}; + +/* Double-precision implementation of vector asinpi(x). + + For |x| in [0, 0.5], use an order 11 polynomial P such that the final + approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). + asinpi(x) = asin(x) * 1/pi. + + The largest observed error in this region is 1.63 ulps, + _ZGVnN2v_asinpi (0x1.9125919fa617p-19) got 0x1.fec183497ea53p-21 + want 0x1.fec183497ea51p-21. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 3.04 ulps, + _ZGVnN2v_asinpi (0x1.0479b7bd98553p-1) got 0x1.5beebec797326p-3 + want 0x1.5beebec797329p-3. */ + +float64x2_t VPCS_ATTR V_NAME_D1 (asinpi) (float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + float64x2_t ax = vabsq_f64 (x); + + uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5)); + + /* Evaluate polynomial Q(x) = y + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 + z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ + float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x), + vfmsq_n_f64 (v_f64 (0.5), ax, 0.5)); + float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); + + /* order-11 Estrin. */ + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); + float64x2_t c911 = vld1q_f64 (&d->c9); + + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + float64x2_t p = vfmaq_f64 (p07, z16, p811); + + /* Finalize polynomial: z + z * z2 * P(z2). */ + p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); + + /* asin(|x|) = Q(|x|) , for |x| < 0.5 + = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ + float64x2_t y = vbslq_f64 (a_lt_half, p, vfmsq_n_f64 (d->pi_over_2, p, 2.0)); + /* asinpi(|x|) = asin(|x|) /pi. */ + y = vmulq_f64 (y, d->inv_pi); + + /* Copy sign. */ + return vbslq_f64 (d->abs_mask, y, x); +} diff --git a/sysdeps/aarch64/fpu/asinpi_sve.c b/sysdeps/aarch64/fpu/asinpi_sve.c new file mode 100644 index 0000000..71ef8ce --- /dev/null +++ b/sysdeps/aarch64/fpu/asinpi_sve.c @@ -0,0 +1,107 @@ +/* Double-Precision vector (SVE) inverse sinpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float64_t c1, c3, c5, c7, c9, c11; + float64_t c0, c2, c4, c6, c8, c10; + float64_t pi_over_2, inv_pi; +} data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ + .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, + .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, + .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, + .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, + .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, + .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = 0x1.921fb54442d18p+0, .inv_pi = 0x1.45f306dc9c883p-2, +}; + +/* Double-precision SVE implementation of vector asinpi(x). + + For |x| in [0, 0.5], use an order 11 polynomial P such that the final + approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). + + The largest observed error in this region is 1.32 ulp: + _ZGVsMxv_asinpi (0x1.fc12356dbdefbp-2) got 0x1.5272e9658ba66p-3 + want 0x1.5272e9658ba64p-3 + + For |x| in [0.5, 1.0], use same approximation with a change of variable: + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 3.48 ulp: + _ZGVsMxv_asinpi (0x1.03da0c2295424p-1) got 0x1.5b02b3dcafaefp-3 + want 0x1.5b02b3dcafaf2p-3. */ +svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); + + svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); + svfloat64_t ax = svabs_x (pg, x); + svbool_t a_ge_half = svacge (pg, x, 0.5); + + /* Evaluate polynomial Q(x) = y + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 + z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ + svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5), + svmul_x (ptrue, x, x)); + svfloat64_t z = svsqrt_m (ax, a_ge_half, z2); + + /* Use a single polynomial approximation P for both intervals. */ + svfloat64_t z3 = svmul_x (pg, z2, z); + svfloat64_t z4 = svmul_x (pg, z2, z2); + svfloat64_t z8 = svmul_x (pg, z4, z4); + + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + + /* Order-11 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p411 = svmla_x (pg, p47, z8, p811); + svfloat64_t p = svmla_x (pg, p03, z8, p411); + + /* Finalize polynomial: z + z3 * P(z2). */ + p = svmla_x (pg, z, z3, p); + + /* asin(|x|) = Q(|x|) , for |x| < 0.5 + = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ + svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2); + + /* Reinsert the sign from the argument. */ + svfloat64_t inv_pi = svreinterpret_f64 ( + svorr_x (pg, svreinterpret_u64 (sv_f64 (d->inv_pi)), sign)); + + return svmul_x (pg, y, inv_pi); +} diff --git a/sysdeps/aarch64/fpu/asinpif_advsimd.c b/sysdeps/aarch64/fpu/asinpif_advsimd.c new file mode 100644 index 0000000..1483ea8 --- /dev/null +++ b/sysdeps/aarch64/fpu/asinpif_advsimd.c @@ -0,0 +1,95 @@ +/* Single-Precision vector (Advanced SIMD) inverse sinpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float32x4_t c0, c2, c4, inv_pi; + float c1, c3, c5, null; +} data = { + /* Coefficients of polynomial P such that asin(x)/pi~ x/pi + x^3 * poly(x^2) + on [ 0x1p-126 0x1p-2 ]. rel error: 0x1.ef9f94b1p-33. Generated using + iterative approach for minimisation of relative error in Sollya file. */ + .c0 = V4 (0x1.b2995ep-5f), .c1 = 0x1.8724ep-6f, + .c2 = V4 (0x1.d1301ep-7f), .c3 = 0x1.446d3cp-7f, + .c4 = V4 (0x1.654848p-8f), .c5 = 0x1.5fdaa8p-7f, + .inv_pi = V4 (0x1.45f306p-2f), +}; + +#define AbsMask 0x7fffffff + +/* Single-precision implementation of vector asinpi(x). + + For |x| < 0.5, use order 5 polynomial P such that the final + approximation is an odd polynomial: asinpif(x) ~ x/pi + x^3 P(x^2). + + The largest observed error in this region is 1.68 ulps, + _ZGVnN4v_asinpif (0x1.86e514p-2) got 0x1.fea8c8p-4 want 0x1.fea8ccp-4. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 3.49 ulps, + _ZGVnN4v_asinpif(0x1.0d93fep-1) got 0x1.697aap-3 want 0x1.697a9ap-3. */ +float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asinpi) (float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint32x4_t ix = vreinterpretq_u32_f32 (x); + uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask)); + + float32x4_t ax = vreinterpretq_f32_u32 (ia); + uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f)); + + /* Evaluate polynomial Q(x) = y/pi + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 + z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ + float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x), + vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f)); + float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ + + /* Order-5 Estrin evaluation scheme. */ + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); + float32x4_t c135 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c135, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c135, 1); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c135, 2); + float32x4_t p = vfmaq_f32 (p03, z8, p45); + /* Add 1/pi as final coeff. */ + p = vfmaq_f32 (d->inv_pi, z2, p); + + /* Finalize polynomial: z * P(z2). */ + p = vmulq_f32 (z, p); + + /* asinpi(|x|) = Q(|x|), for |x| < 0.5 + = 1/2 - 2 Q(|x|), for |x| >= 0.5. */ + float32x4_t y + = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (v_f32 (0.5f), p, 2.0f)); + + /* Copy sign. */ + return vbslq_f32 (v_u32 (AbsMask), y, x); +} +libmvec_hidden_def (V_NAME_F1 (asinpi)) +HALF_WIDTH_ALIAS_F1 (asinpi) diff --git a/sysdeps/aarch64/fpu/asinpif_sve.c b/sysdeps/aarch64/fpu/asinpif_sve.c new file mode 100644 index 0000000..046b258 --- /dev/null +++ b/sysdeps/aarch64/fpu/asinpif_sve.c @@ -0,0 +1,88 @@ +/* Single-Precision vector (SVE) inverse sinpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float32_t c1, c3, c5; + float32_t c0, c2, c4, inv_pi; +} data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on + [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */ + .c0 = 0x1.b2995ep-5f, .c1 = 0x1.8724ep-6f, .c2 = 0x1.d1301ep-7f, + .c3 = 0x1.446d3cp-7f, .c4 = 0x1.654848p-8f, .c5 = 0x1.5fdaa8p-7f, + .inv_pi = 0x1.45f306p-2f, +}; + +/* Single-precision SVE implementation of vector asin(x). + + For |x| in [0, 0.5], use order 5 polynomial P such that the final + approximation is an odd polynomial: asinpi(x) ~ x/pi + x^3 P(x^2). + + The largest observed error in this region is 1.96 ulps: + _ZGVsMxv_asinpif (0x1.8e534ep-3) got 0x1.fe6ab4p-5 + want 0x1.fe6ab8p-5. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + asinpi(x) = 1/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 3.46 ulps: + _ZGVsMxv_asinpif (0x1.0df892p-1) got 0x1.6a114cp-3 + want 0x1.6a1146p-3. */ +svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); + + svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000); + + svfloat32_t ax = svabs_x (pg, x); + svbool_t a_ge_half = svacge (pg, x, 0.5); + + /* Evaluate polynomial Q(x) = y + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 + z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ + svfloat32_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5), + svmul_x (pg, x, x)); + svfloat32_t z = svsqrt_m (ax, a_ge_half, z2); + + svfloat32_t z4 = svmul_x (ptrue, z2, z2); + svfloat32_t c135_two = svld1rq (ptrue, &d->c1); + + /* Order-5 Pairwise Horner evaluation scheme. */ + svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, c135_two, 0); + svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, c135_two, 1); + svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, c135_two, 2); + + svfloat32_t p25 = svmla_x (pg, p23, z4, p45); + svfloat32_t p = svmla_x (pg, p01, z4, p25); + + /* Add 1/pi as final coeff. */ + p = svmla_x (pg, sv_f32 (d->inv_pi), z2, p); + p = svmul_x (pg, p, z); + + /* asinpi(|x|) = Q(|x|), for |x| < 0.5 + = 1/2 - 2 Q(|x|), for |x| >= 0.5. */ + svfloat32_t y = svmsb_m (a_ge_half, p, sv_f32 (2.0), 0.5); + + /* Reinsert sign from argument. */ + return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign)); +} diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c index 00b4a4f..a31d52f 100644 --- a/sysdeps/aarch64/fpu/atan2_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2_advsimd.c @@ -19,40 +19,38 @@ #include "math_config.h" #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; float64x2_t pi_over_2; - double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; - uint64x2_t zeroinfnan, minustwo; + uint64x2_t zeroinfnan; } data = { - /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on - [2**-1022, 1.0]. */ - .c0 = V2 (-0x1.5555555555555p-2), - .c1 = 0x1.99999999996c1p-3, - .c2 = V2 (-0x1.2492492478f88p-3), - .c3 = 0x1.c71c71bc3951cp-4, - .c4 = V2 (-0x1.745d160a7e368p-4), - .c5 = 0x1.3b139b6a88ba1p-4, - .c6 = V2 (-0x1.11100ee084227p-4), - .c7 = 0x1.e1d0f9696f63bp-5, - .c8 = V2 (-0x1.aebfe7b418581p-5), - .c9 = 0x1.842dbe9b0d916p-5, - .c10 = V2 (-0x1.5d30140ae5e99p-5), - .c11 = 0x1.338e31eb2fbbcp-5, - .c12 = V2 (-0x1.00e6eece7de8p-5), - .c13 = 0x1.860897b29e5efp-6, - .c14 = V2 (-0x1.0051381722a59p-6), - .c15 = 0x1.14e9dc19a4a4ep-7, - .c16 = V2 (-0x1.d0062b42fe3bfp-9), - .c17 = 0x1.17739e210171ap-10, - .c18 = V2 (-0x1.ab24da7be7402p-13), - .c19 = 0x1.358851160a528p-16, + /* Coefficients of polynomial P such that + atan(x)~x+x*P(x^2) on [2^-1022, 1.0]. */ + .c0 = V2 (-0x1.555555555552ap-2), + .c1 = 0x1.9999999995aebp-3, + .c2 = V2 (-0x1.24924923923f6p-3), + .c3 = 0x1.c71c7184288a2p-4, + .c4 = V2 (-0x1.745d11fb3d32bp-4), + .c5 = 0x1.3b136a18051b9p-4, + .c6 = V2 (-0x1.110e6d985f496p-4), + .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = V2 (-0x1.ae644e28058c3p-5), + .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = V2 (-0x1.59d7f901566cbp-5), + .c11 = 0x1.2c982855ab069p-5, + .c12 = V2 (-0x1.eb49592998177p-6), + .c13 = 0x1.69d8b396e3d38p-6, + .c14 = V2 (-0x1.ca980345c4204p-7), + .c15 = 0x1.dc050eafde0b3p-8, + .c16 = V2 (-0x1.7ea70755b8eccp-9), + .c17 = 0x1.ba3da3de903e8p-11, + .c18 = V2 (-0x1.44a4b059b6f67p-13), + .c19 = 0x1.c4a45029e5a91p-17, .pi_over_2 = V2 (0x1.921fb54442d18p+0), .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1), - .minustwo = V2 (0xc000000000000000), }; #define SignMask v_u64 (0x8000000000000000) @@ -77,10 +75,9 @@ zeroinfnan (uint64x2_t i, const struct data *d) } /* Fast implementation of vector atan2. - Maximum observed error is 2.8 ulps: - _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5) - got 0x1.92d628ab678ccp-1 - want 0x1.92d628ab678cfp-1. */ + Maximum observed error is 1.97 ulps: + _ZGVnN2vv_atan2 (0x1.42337dba73768p+5, 0x1.422d748cd3e29p+5) + got 0x1.9224810264efcp-1 want 0x1.9224810264efep-1. */ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -101,26 +98,29 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) uint64x2_t pred_xlt0 = vcltzq_f64 (x); uint64x2_t pred_aygtax = vcagtq_f64 (y, x); - /* Set up z for call to atan. */ - float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); - float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax); - float64x2_t z = vdivq_f64 (n, q); - - /* Work out the correct shift. */ - float64x2_t shift - = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo)); - shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift); - shift = vmulq_f64 (shift, d->pi_over_2); - - /* Calculate the polynomial approximation. - Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of - full scheme to avoid underflow in x^16. - The order 19 polynomial P approximates - (atan(sqrt(x))-sqrt(x))/x^(3/2). */ + /* Set up z for evaluation of atan. */ + float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); + float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax); + float64x2_t z = vdivq_f64 (num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ + float64x2_t shift = vreinterpretq_f64_u64 ( + vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0)))); + float64x2_t shift2 = vreinterpretq_f64_u64 ( + vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (1.0)))); + shift = vaddq_f64 (shift, shift2); + + /* Calculate the polynomial approximation. */ float64x2_t z2 = vmulq_f64 (z, z); - float64x2_t x2 = vmulq_f64 (z2, z2); - float64x2_t x4 = vmulq_f64 (x2, x2); - float64x2_t x8 = vmulq_f64 (x4, x4); + float64x2_t z3 = vmulq_f64 (z2, z); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); float64x2_t c13 = vld1q_f64 (&d->c1); float64x2_t c57 = vld1q_f64 (&d->c5); @@ -128,45 +128,43 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) float64x2_t c1315 = vld1q_f64 (&d->c13); float64x2_t c1719 = vld1q_f64 (&d->c17); - /* estrin_7. */ + /* Order-7 Estrin. */ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); - float64x2_t p03 = vfmaq_f64 (p01, x2, p23); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); - float64x2_t p47 = vfmaq_f64 (p45, x2, p67); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); - float64x2_t p07 = vfmaq_f64 (p03, x4, p47); + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); - /* estrin_11. */ + /* Order-11 Estrin. */ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); - float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); - float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); + float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); - float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); + float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); - float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); - float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); + float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); + float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); - float64x2_t ret = vfmaq_f64 (p07, p819, x8); + float64x2_t poly = vfmaq_f64 (p07, p819, z16); /* Finalize. y = shift + z + z^3 * P(z^2). */ - ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z)); - ret = vaddq_f64 (ret, shift); + float64x2_t ret = vfmaq_f64 (z, shift, d->pi_over_2); + ret = vfmaq_f64 (ret, z3, poly); if (__glibc_unlikely (v_any_u64 (special_cases))) return special_case (y, x, ret, sign_xy, special_cases); /* Account for the sign of x and y. */ - ret = vreinterpretq_f64_u64 ( + return vreinterpretq_f64_u64 ( veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); - - return ret; } diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c index 163f613..9e2dd24 100644 --- a/sysdeps/aarch64/fpu/atan2_sve.c +++ b/sysdeps/aarch64/fpu/atan2_sve.c @@ -19,25 +19,25 @@ #include "math_config.h" #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[20]; - float64_t pi_over_2; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, - 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, - -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, - 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, - -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, - 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, - -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, - .pi_over_2 = 0x1.921fb54442d18p+0, + .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, + .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, + .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, + .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, + .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, + .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, + .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, + .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, }; - /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ static svfloat64_t NOINLINE special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, @@ -56,15 +56,17 @@ zeroinfnan (svuint64_t i, const svbool_t pg) } /* Fast implementation of SVE atan2. Errors are greatest when y and - x are reasonably close together. The greatest observed error is 2.28 ULP: - _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732) - got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */ -svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + x are reasonably close together. The greatest observed error is 1.94 ULP: + _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5) + got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1. */ +svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, + const svbool_t pg) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); svuint64_t ix = svreinterpret_u64 (x); svuint64_t iy = svreinterpret_u64 (y); + svbool_t ptrue = svptrue_b64 (); svbool_t cmp_x = zeroinfnan (ix, pg); svbool_t cmp_y = zeroinfnan (iy, pg); @@ -81,32 +83,67 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) svbool_t pred_aygtax = svcmpgt (pg, ay, ax); - /* Set up z for call to atan. */ - svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); - svfloat64_t d = svsel (pred_aygtax, ay, ax); - svfloat64_t z = svdiv_x (pg, n, d); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atan. */ + svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat64_t den = svsel (pred_aygtax, ay, ax); + svfloat64_t z = svdiv_x (pg, num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); + svfloat64_t shift_mul = svreinterpret_f64 ( + svorr_x (pg, sign_x, svreinterpret_u64 (sv_f64 (0x1.921fb54442d18p+0)))); shift = svsel (pred_aygtax, sv_f64 (1.0), shift); - shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); - shift = svmul_x (pg, shift, data_ptr->pi_over_2); + shift = svmla_x (pg, z, shift, shift_mul); /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ svfloat64_t z2 = svmul_x (pg, z, z); - svfloat64_t x2 = svmul_x (pg, z2, z2); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t x8 = svmul_x (pg, x4, x4); + svfloat64_t z3 = svmul_x (pg, z2, z); + svfloat64_t z4 = svmul_x (pg, z2, z2); + svfloat64_t z8 = svmul_x (pg, z4, z4); + svfloat64_t z16 = svmul_x (pg, z8, z8); - svfloat64_t ret = svmla_x ( - pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly), - sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8); + /* Order-7 Estrin. */ + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); - /* y = shift + z + z^3 * P(z^2). */ - svfloat64_t z3 = svmul_x (pg, z2, z); - ret = svmla_x (pg, z, z3, ret); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + svfloat64_t p07 = svmla_x (pg, p03, z8, p47); + + /* Order-11 Estrin. */ + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + svfloat64_t c1315 = svld1rq (ptrue, &d->c13); + svfloat64_t c1719 = svld1rq (ptrue, &d->c17); - ret = svadd_m (pg, ret, shift); + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); + svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); + + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); + svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); + svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); + + svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); + svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); + + svfloat64_t poly = svmla_x (pg, p07, z16, p819); + + /* y = shift + z + z^3 * P(z^2). */ + svfloat64_t ret = svmla_x (pg, shift, z3, poly); /* Account for the sign of x and y. */ if (__glibc_unlikely (svptest_any (pg, cmp_xy))) diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c index e65406f..75d8738 100644 --- a/sysdeps/aarch64/fpu/atan2f_advsimd.c +++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c @@ -18,22 +18,22 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f32.h" static const struct data { - float32x4_t c0, pi_over_2, c4, c6, c2; + float32x4_t c0, c4, c6, c2; float c1, c3, c5, c7; uint32x4_t comp_const; + float32x4_t pi; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. Generated using fpminimax between FLT_MIN and 1. */ - .c0 = V4 (-0x1.55555p-2f), .c1 = 0x1.99935ep-3f, - .c2 = V4 (-0x1.24051ep-3f), .c3 = 0x1.bd7368p-4f, - .c4 = V4 (-0x1.491f0ep-4f), .c5 = 0x1.93a2c0p-5f, - .c6 = V4 (-0x1.4c3c60p-6f), .c7 = 0x1.01fd88p-8f, - .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1), + .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, + .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, + .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, + .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, + .pi = V4 (0x1.921fb6p+1f), .comp_const = V4 (2 * 0x7f800000lu - 1), }; #define SignMask v_u32 (0x80000000) @@ -54,13 +54,13 @@ static inline uint32x4_t zeroinfnan (uint32x4_t i, const struct data *d) { /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */ - return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const); + return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const); } /* Fast implementation of vector atan2f. Maximum observed error is - 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: - _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 - want 0x1.967f00p-1. */ + 2.13 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: + _ZGVnN4vv_atan2f (0x1.14a9d4p-87, 0x1.0eb886p-87) got 0x1.97aea2p-1 + want 0x1.97ae9ep-1. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) { const struct data *d = ptr_barrier (&data); @@ -81,28 +81,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) uint32x4_t pred_xlt0 = vcltzq_f32 (x); uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax); - /* Set up z for call to atanf. */ - float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); - float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax); - float32x4_t z = vdivq_f32 (n, q); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atanf. */ + float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); + float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax); + float32x4_t z = vdivq_f32 (num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ float32x4_t shift = vreinterpretq_f32_u32 ( - vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f)))); - shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift); - shift = vmulq_f32 (shift, d->pi_over_2); - - /* Calculate the polynomial approximation. - Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, - a standard implementation using z8 creates spurious underflow - in the very last fma (when z^8 is small enough). - Therefore, we split the last fma into a mul and an fma. - Horner and single-level Estrin have higher errors that exceed - threshold. */ + vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f)))); + float32x4_t shift2 = vreinterpretq_f32_u32 ( + vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f)))); + shift = vaddq_f32 (shift, shift2); + + /* Calculate the polynomial approximation. */ float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z3 = vmulq_f32 (z2, z); float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); float32x4_t c1357 = vld1q_f32 (&d->c1); + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0); float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1); float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2); @@ -110,10 +113,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) float32x4_t p03 = vfmaq_f32 (p01, z4, p23); float32x4_t p47 = vfmaq_f32 (p45, z4, p67); - float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47)); + float32x4_t poly = vfmaq_f32 (p03, z8, p47); /* y = shift + z * P(z^2). */ - ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift); + float32x4_t ret = vfmaq_f32 (z, shift, d->pi); + ret = vfmaq_f32 (ret, z3, poly); if (__glibc_unlikely (v_any_u32 (special_cases))) { diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c index 5f26e2a..4d93419 100644 --- a/sysdeps/aarch64/fpu/atan2f_sve.c +++ b/sysdeps/aarch64/fpu/atan2f_sve.c @@ -18,18 +18,18 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f32.h" static const struct data { - float32_t poly[8]; + float32_t c0, c2, c4, c6; + float32_t c1, c3, c5, c7; float32_t pi_over_2; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. */ - .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, - -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, - .pi_over_2 = 0x1.921fb6p+0f, + .c0 = -0x1.5554dcp-2, .c1 = 0x1.9978ecp-3, .c2 = -0x1.230a94p-3, + .c3 = 0x1.b4debp-4, .c4 = -0x1.3550dap-4, .c5 = 0x1.61eebp-5, + .c6 = -0x1.0c17d4p-6, .c7 = 0x1.7ea694p-9, .pi_over_2 = 0x1.921fb6p+0f, }; /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ @@ -51,12 +51,14 @@ zeroinfnan (svuint32_t i, const svbool_t pg) /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum - observed error is 2.95 ULP: - _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 - want 0x1.967f00p-1. */ -svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + observed error is 2.21 ULP: + _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ +svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, + const svbool_t pg) { - const struct data *data_ptr = ptr_barrier (&data); + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); svuint32_t ix = svreinterpret_u32 (x); svuint32_t iy = svreinterpret_u32 (y); @@ -76,29 +78,42 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) svbool_t pred_aygtax = svcmpgt (pg, ay, ax); - /* Set up z for call to atan. */ - svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); - svfloat32_t d = svsel (pred_aygtax, ay, ax); - svfloat32_t z = svdiv_x (pg, n, d); - - /* Work out the correct shift. */ + /* Set up z for evaluation of atanf. */ + svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat32_t den = svsel (pred_aygtax, ay, ax); + svfloat32_t z = svdiv_x (ptrue, num, den); + + /* Work out the correct shift for atan2: + Multiplication by pi is done later. + -pi when x < 0 and ax < ay + -pi/2 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + pi/2 when x >= 0 and ax > ay. */ svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); shift = svsel (pred_aygtax, sv_f32 (1.0), shift); shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); - shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ - svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z2 = svmul_x (ptrue, z, z); + svfloat32_t z3 = svmul_x (pg, z2, z); svfloat32_t z4 = svmul_x (pg, z2, z2); svfloat32_t z8 = svmul_x (pg, z4, z4); - svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly); + svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); - /* ret = shift + z + z^3 * P(z^2). */ - svfloat32_t z3 = svmul_x (pg, z2, z); - ret = svmla_x (pg, z, z3, ret); + svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); + svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); + svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); + svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); - ret = svadd_m (pg, ret, shift); + svfloat32_t p03 = svmla_x (pg, p01, z4, p23); + svfloat32_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat32_t poly = svmla_x (pg, p03, z8, p47); + + /* ret = shift + z + z^3 * P(z^2). */ + svfloat32_t ret = svmla_x (pg, z, shift, sv_f32 (d->pi_over_2)); + ret = svmla_x (pg, ret, z3, poly); /* Account for the sign of x and y. */ diff --git a/sysdeps/aarch64/fpu/atan2pi_advsimd.c b/sysdeps/aarch64/fpu/atan2pi_advsimd.c new file mode 100644 index 0000000..3cf231b --- /dev/null +++ b/sysdeps/aarch64/fpu/atan2pi_advsimd.c @@ -0,0 +1,175 @@ +/* Double-Precision vector (Advanced SIMD) inverse tan2pi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20; + float64x2_t c0; + uint64x2_t zeroinfnan; + float64x2_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; +} data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-1022, 1.0]. */ + .c0 = V2 (0x1.45f306dc9c883p-2), + .c1 = V2 (-0x1.b2995e7b7ba4ap-4), + .c2 = 0x1.04c26be3d2c1p-4, + .c3 = V2 (-0x1.7483759c17ea1p-5), + .c4 = 0x1.21bb95c315d57p-5, + .c5 = V2 (-0x1.da1bdc3d453f3p-6), + .c6 = 0x1.912d20459b4bfp-6, + .c7 = V2 (-0x1.5bbd4545cad1fp-6), + .c8 = 0x1.331b83bec30a1p-6, + .c9 = V2 (-0x1.13d6457f44de3p-6), + .c10 = 0x1.f8e802974db94p-7, + .c11 = V2 (-0x1.d7e173ab04a1ap-7), + .c12 = 0x1.bdfa47d6a4f28p-7, + .c13 = V2 (-0x1.9ba78f3232ceep-7), + .c14 = 0x1.5e6044590ab4fp-7, + .c15 = V2 (-0x1.01ccfdeb9f77fp-7), + .c16 = 0x1.345cf0d4eb1c1p-8, + .c17 = V2 (-0x1.19e5f00f67e3ap-9), + .c18 = 0x1.6d3035ac7625bp-11, + .c19 = V2 (-0x1.286bb9ae4ed79p-13), + .c20 = 0x1.c37ec36da0e1ap-17, + .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1), +}; + +#define SignMask v_u64 (0x8000000000000000) +#define OneOverPi v_f64 (0x1.45f306dc9c883p-2) + +/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls). */ +static float64x2_t VPCS_ATTR NOINLINE +special_case (float64x2_t y, float64x2_t x, float64x2_t ret, + uint64x2_t sign_xy, uint64x2_t cmp) +{ + /* Account for the sign of x and y. */ + ret = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); + + /* Since we have no scalar fallback for atan2pi, + we can instead make a call to atan2f and divide by pi. */ + ret = v_call2_f64 (atan2, y, x, ret, cmp); + + /* Only divide the special cases by pi, and leave the rest unchanged. */ + return vbslq_f64 (cmp, vmulq_f64 (ret, OneOverPi), ret); +} + +/* Returns 1 if input is the bit representation of 0, infinity or nan. */ +static inline uint64x2_t +zeroinfnan (uint64x2_t i, const struct data *d) +{ + /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1). */ + return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)), d->zeroinfnan); +} + +/* Fast implementation of vector atan2pi. + Maximum observed error is 3.04 ulps: + _ZGVnN2vv_atan2pi (0x1.1e0733532ce28p+5, 0x1.2d803379cca1fp+5) + got 0x1.eed60c1e89317p-3 want 0x1.eed60c1e89314p-3. */ +float64x2_t VPCS_ATTR V_NAME_D2 (atan2pi) (float64x2_t y, float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint64x2_t ix = vreinterpretq_u64_f64 (x); + uint64x2_t iy = vreinterpretq_u64_f64 (y); + + uint64x2_t special_cases + = vorrq_u64 (zeroinfnan (ix, d), zeroinfnan (iy, d)); + + uint64x2_t sign_x = vandq_u64 (ix, SignMask); + uint64x2_t sign_y = vandq_u64 (iy, SignMask); + uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y); + + float64x2_t ax = vabsq_f64 (x); + float64x2_t ay = vabsq_f64 (y); + + uint64x2_t pred_xlt0 = vcltzq_f64 (x); + uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax); + + /* Set up z for evaluation of atanpi. */ + float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); + float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax); + float64x2_t z = vdivq_f64 (num, den); + + /* Work out the correct shift for atan2pi: + -1.0 when x < 0 and ax < ay + -0.5 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + 0.5 when x >= 0 and ax > ay. */ + float64x2_t shift = vreinterpretq_f64_u64 ( + vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-1.0)))); + float64x2_t shift2 = vreinterpretq_f64_u64 ( + vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (0.5)))); + shift = vaddq_f64 (shift, shift2); + + /* Calculate the polynomial approximation. */ + float64x2_t z2 = vmulq_f64 (z, z); + float64x2_t z3 = vmulq_f64 (z2, z); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); + + float64x2_t c24 = vld1q_f64 (&d->c2); + float64x2_t c68 = vld1q_f64 (&d->c6); + + /* Order-7 Estrin. */ + float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0); + float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1); + float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0); + float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1); + + float64x2_t p14 = vfmaq_f64 (p12, z4, p34); + float64x2_t p58 = vfmaq_f64 (p56, z4, p78); + float64x2_t p18 = vfmaq_f64 (p14, z8, p58); + + /* Order-11 Estrin. */ + float64x2_t c1012 = vld1q_f64 (&d->c10); + float64x2_t c1416 = vld1q_f64 (&d->c14); + float64x2_t c1820 = vld1q_f64 (&d->c18); + + float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0); + float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1); + float64x2_t p912 = vfmaq_f64 (p910, z4, p1112); + + float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0); + float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1); + float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516); + + float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0); + float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1); + float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920); + + float64x2_t p916 = vfmaq_f64 (p912, z8, p1316); + float64x2_t p920 = vfmaq_f64 (p916, z16, p1720); + + float64x2_t poly = vfmaq_f64 (p18, z16, p920); + + /* y = shift + z * P(z^2). */ + float64x2_t ret = vfmaq_f64 (shift, z, d->c0); + ret = vfmaq_f64 (ret, z3, poly); + + if (__glibc_unlikely (v_any_u64 (special_cases))) + return special_case (y, x, ret, sign_xy, special_cases); + + /* Account for the sign of x and y. */ + return vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); +} diff --git a/sysdeps/aarch64/fpu/atan2pi_sve.c b/sysdeps/aarch64/fpu/atan2pi_sve.c new file mode 100644 index 0000000..f1d1f1c --- /dev/null +++ b/sysdeps/aarch64/fpu/atan2pi_sve.c @@ -0,0 +1,159 @@ +/* Double-Precision vector (SVE) inverse tan2pi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "math_config.h" +#include "sv_math.h" + +static const struct data +{ + float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20; + float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64_t shift_val; +} data = { + /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on + [2^-1022, 1.0]. */ + .c0 = 0x1.45f306dc9c883p-2, .c1 = -0x1.b2995e7b7ba4ap-4, + .c2 = 0x1.04c26be3d2c1p-4, .c3 = -0x1.7483759c17ea1p-5, + .c4 = 0x1.21bb95c315d57p-5, .c5 = -0x1.da1bdc3d453f3p-6, + .c6 = 0x1.912d20459b4bfp-6, .c7 = -0x1.5bbd4545cad1fp-6, + .c8 = 0x1.331b83bec30a1p-6, .c9 = -0x1.13d6457f44de3p-6, + .c10 = 0x1.f8e802974db94p-7, .c11 = -0x1.d7e173ab04a1ap-7, + .c12 = 0x1.bdfa47d6a4f28p-7, .c13 = -0x1.9ba78f3232ceep-7, + .c14 = 0x1.5e6044590ab4fp-7, .c15 = -0x1.01ccfdeb9f77fp-7, + .c16 = 0x1.345cf0d4eb1c1p-8, .c17 = -0x1.19e5f00f67e3ap-9, + .c18 = 0x1.6d3035ac7625bp-11, .c19 = -0x1.286bb9ae4ed79p-13, + .c20 = 0x1.c37ec36da0e1ap-17, .shift_val = 0.5, +}; + +#define OneOverPi sv_f64 (0x1.45f306dc9c883p-2) + +/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ +static svfloat64_t NOINLINE +special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, + const svbool_t cmp) +{ + ret = sv_call2_f64 (atan2, y, x, ret, cmp); + return svmul_f64_m (cmp, ret, OneOverPi); +} + +/* Returns a predicate indicating true if the input is the bit representation + of 0, infinity or nan. */ +static inline svbool_t +zeroinfnan (svuint64_t i, const svbool_t pg) +{ + return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1), + sv_u64 (2 * asuint64 (INFINITY) - 1)); +} + +/* Fast implementation of SVE atan2pi. + Maximum observed error is 3.11 ulps: + _ZGVsMxvv_atan2pi (0x1.ef284a877f6b5p+6, 0x1.03fdde8242b17p+7) + got 0x1.f00f800163079p-3 want 0x1.f00f800163076p-3. */ +svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x, + const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b64 (); + + svuint64_t ix = svreinterpret_u64 (x); + svuint64_t iy = svreinterpret_u64 (y); + + svbool_t cmp_x = zeroinfnan (ix, pg); + svbool_t cmp_y = zeroinfnan (iy, pg); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + + svfloat64_t ax = svabs_x (pg, x); + svfloat64_t ay = svabs_x (pg, y); + svuint64_t iax = svreinterpret_u64 (ax); + svuint64_t iay = svreinterpret_u64 (ay); + + svuint64_t sign_x = sveor_x (pg, ix, iax); + svuint64_t sign_y = sveor_x (pg, iy, iay); + svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); + + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for evaluation of atanpi. */ + svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat64_t den = svsel (pred_aygtax, ay, ax); + svfloat64_t z = svdiv_x (pg, num, den); + + /* Work out the correct shift for atan2pi: + -1.0 when x < 0 and ax < ay + -0.5 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + 0.5 when x >= 0 and ax > ay. */ + svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); + shift = svmul_x (ptrue, shift, sv_f64 (d->shift_val)); + shift = svsel (pred_aygtax, sv_f64 (d->shift_val), shift); + shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ + svfloat64_t z2 = svmul_x (pg, z, z); + svfloat64_t z3 = svmul_x (pg, z2, z); + svfloat64_t z4 = svmul_x (pg, z2, z2); + svfloat64_t z8 = svmul_x (pg, z4, z4); + svfloat64_t z16 = svmul_x (pg, z8, z8); + + /* Order-7 Estrin. */ + svfloat64_t c24 = svld1rq (ptrue, &d->c2); + svfloat64_t c68 = svld1rq (ptrue, &d->c6); + + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1); + svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0); + svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1); + + svfloat64_t p14 = svmla_x (pg, p12, z4, p34); + svfloat64_t p58 = svmla_x (pg, p56, z4, p78); + svfloat64_t p18 = svmla_x (pg, p14, z8, p58); + + /* Order-11 Estrin. */ + svfloat64_t c1012 = svld1rq (ptrue, &d->c10); + svfloat64_t c1416 = svld1rq (ptrue, &d->c14); + svfloat64_t c1820 = svld1rq (ptrue, &d->c18); + + svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0); + svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1); + svfloat64_t p912 = svmla_x (pg, p910, z4, p1112); + + svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0); + svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1); + svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516); + + svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0); + svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1); + svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920); + + svfloat64_t p916 = svmla_x (pg, p912, z8, p1316); + svfloat64_t p920 = svmla_x (pg, p916, z16, p1720); + + svfloat64_t poly = svmla_x (pg, p18, z16, p920); + + svfloat64_t ret = svmla_x (pg, shift, z, sv_f64 (d->c0)); + ret = svmla_x (pg, ret, z3, poly); + + /* Account for the sign of x and y. */ + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) + return special_case ( + y, x, + svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)), + cmp_xy); + return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); +} diff --git a/sysdeps/aarch64/fpu/atan2pif_advsimd.c b/sysdeps/aarch64/fpu/atan2pif_advsimd.c new file mode 100644 index 0000000..f1f542b --- /dev/null +++ b/sysdeps/aarch64/fpu/atan2pif_advsimd.c @@ -0,0 +1,138 @@ +/* Single-Precision vector (Advanced SIMD) inverse tan2pi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + float32x4_t c1, c3, c5, c7; + float c2, c4, c6, c8; + float32x4_t c0; + uint32x4_t comp_const; +} data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2^-128, 1.0]. + Generated using fpminimax between FLT_MIN and 1. */ + .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4), + .c2 = 0x1.0490e4p-4, .c3 = V4 (-0x1.70c272p-5), + .c4 = 0x1.0eef52p-5, .c5 = V4 (-0x1.6abbbap-6), + .c6 = 0x1.78157p-7, .c7 = V4 (-0x1.f0b406p-9), + .c8 = 0x1.2ae7fep-11, .comp_const = V4 (2 * 0x7f800000lu - 1), +}; + +#define SignMask v_u32 (0x80000000) +#define OneOverPi v_f32 (0x1.45f307p-2) + +/* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */ +static float32x4_t VPCS_ATTR NOINLINE +special_case (float32x4_t y, float32x4_t x, float32x4_t ret, + uint32x4_t sign_xy, uint32x4_t cmp) +{ + /* Account for the sign of y. */ + ret = vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); + + /* Since we have no scalar fallback for atan2pif, + we can instead make a call to atan2f and divide by pi. */ + ret = v_call2_f32 (atan2f, y, x, ret, cmp); + + /* Only divide the special cases by pi, and leave the rest unchanged. */ + return vbslq_f32 (cmp, vmulq_f32 (ret, OneOverPi), ret); +} + +/* Returns 1 if input is the bit representation of 0, infinity or nan. */ +static inline uint32x4_t +zeroinfnan (uint32x4_t i, const struct data *d) +{ + /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */ + return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const); +} + +/* Fast implementation of vector atan2f. Maximum observed error is 2.89 ULP: + _ZGVnN4vv_atan2pif (0x1.bd397p+54, 0x1.e79a4ap+54) got 0x1.e2678ep-3 + want 0x1.e26794p-3. */ +float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2pi) (float32x4_t y, + float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint32x4_t ix = vreinterpretq_u32_f32 (x); + uint32x4_t iy = vreinterpretq_u32_f32 (y); + + uint32x4_t special_cases + = vorrq_u32 (zeroinfnan (ix, d), zeroinfnan (iy, d)); + + uint32x4_t sign_x = vandq_u32 (ix, SignMask); + uint32x4_t sign_y = vandq_u32 (iy, SignMask); + uint32x4_t sign_xy = veorq_u32 (sign_x, sign_y); + + float32x4_t ax = vabsq_f32 (x); + float32x4_t ay = vabsq_f32 (y); + + uint32x4_t pred_xlt0 = vcltzq_f32 (x); + uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax); + + /* Set up z for evaluation of atanpif. */ + float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); + float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax); + float32x4_t z = vdivq_f32 (num, den); + + /* Work out the correct shift for atan2pi: + -1.0 when x < 0 and ax < ay + -0.5 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + 0.5 when x >= 0 and ax > ay. */ + float32x4_t shift = vreinterpretq_f32_u32 ( + vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f)))); + float32x4_t shift2 = vreinterpretq_f32_u32 ( + vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f)))); + shift = vaddq_f32 (shift, shift2); + + /* Calculate the polynomial approximation. */ + float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z3 = vmulq_f32 (z2, z); + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); + + float32x4_t c2468 = vld1q_f32 (&d->c2); + + float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, c2468, 0); + float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, c2468, 1); + float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, c2468, 2); + float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, c2468, 3); + float32x4_t p14 = vfmaq_f32 (p12, z4, p34); + float32x4_t p58 = vfmaq_f32 (p56, z4, p78); + + float32x4_t poly = vfmaq_f32 (p14, z8, p58); + + /* y = shift + z * P(z^2). */ + float32x4_t ret = vfmaq_f32 (shift, z, d->c0); + ret = vfmaq_f32 (ret, z3, poly); + + if (__glibc_unlikely (v_any_u32 (special_cases))) + { + return special_case (y, x, ret, sign_xy, special_cases); + } + + /* Account for the sign of y. */ + return vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy)); +} +libmvec_hidden_def (V_NAME_F2 (atan2pi)) +HALF_WIDTH_ALIAS_F2 (atan2pi) diff --git a/sysdeps/aarch64/fpu/atan2pif_sve.c b/sysdeps/aarch64/fpu/atan2pif_sve.c new file mode 100644 index 0000000..d5ac4b7 --- /dev/null +++ b/sysdeps/aarch64/fpu/atan2pif_sve.c @@ -0,0 +1,137 @@ +/* Single-Precision vector (SVE) inverse tan2pi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float32_t c0, c1, c3, c5, c7; + float32_t c2, c4, c6, c8; + float32_t shift_val; + uint32_t comp_const; +} data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. */ + .c0 = 0x1.45f306p-2, + .c1 = -0x1.b2975ep-4, + .c2 = 0x1.0490e4p-4, + .c3 = -0x1.70c272p-5, + .c4 = 0x1.0eef52p-5, + .c5 = -0x1.6abbbap-6, + .c6 = 0x1.78157p-7, + .c7 = -0x1.f0b406p-9, + .c8 = 0x1.2ae7fep-11, + .shift_val = 0.5f, + .comp_const = 2 * 0x7f800000lu - 1, +}; + +#define OneOverPi sv_f32 (0x1.45f307p-2) + +/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ +static svfloat32_t NOINLINE +special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret, + const svbool_t cmp) +{ + ret = sv_call2_f32 (atan2f, y, x, ret, cmp); + return svmul_f32_x (cmp, ret, OneOverPi); +} + +/* Returns a predicate indicating true if the input is the bit representation + of 0, infinity or nan. */ +static inline svbool_t +zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d) +{ + return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1), + sv_u32 (d->comp_const)); +} + +/* Fast implementation of SVE atan2pif based on atan(x) ~ shift + z + z^3 * + P(z^2) with reduction to [0,1] using z=1/x and shift = 1/2. Maximum + observed error is 2.90 ULP: + _ZGVsMxvv_atan2pif (0x1.a28542p+5, 0x1.adb7c6p+5) got 0x1.f76524p-3 + want 0x1.f7651ep-3. */ +svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x, + const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); + + svuint32_t ix = svreinterpret_u32 (x); + svuint32_t iy = svreinterpret_u32 (y); + + svbool_t cmp_x = zeroinfnan (ix, pg, d); + svbool_t cmp_y = zeroinfnan (iy, pg, d); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + + svfloat32_t ax = svabs_x (pg, x); + svfloat32_t ay = svabs_x (pg, y); + svuint32_t iax = svreinterpret_u32 (ax); + svuint32_t iay = svreinterpret_u32 (ay); + + svuint32_t sign_x = sveor_x (pg, ix, iax); + svuint32_t sign_y = sveor_x (pg, iy, iay); + svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); + + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for evaluation of atanpif. */ + svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); + svfloat32_t den = svsel (pred_aygtax, ay, ax); + svfloat32_t z = svdiv_x (ptrue, num, den); + + /* Work out the correct shift for atan2pi: + -1.0 when x < 0 and ax < ay + -0.5 when x < 0 and ax > ay + 0 when x >= 0 and ax < ay + 0.5 when x >= 0 and ax > ay. */ + svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); + shift = svmul_x (ptrue, shift, sv_f32 (d->shift_val)); + shift = svsel (pred_aygtax, sv_f32 (d->shift_val), shift); + shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); + + /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ + svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z4 = svmul_x (pg, z2, z2); + svfloat32_t z8 = svmul_x (pg, z4, z4); + + svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2); + + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1); + svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2); + svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3); + + svfloat32_t p14 = svmad_x (pg, z4, p34, p12); + svfloat32_t p58 = svmad_x (pg, z4, p78, p56); + + svfloat32_t p18 = svmad_x (pg, z8, p58, p14); + + /* ret = shift + z + z^3 * P(z^2). */ + svfloat32_t poly = svmad_x (pg, z2, p18, d->c0); + svfloat32_t ret = svmad_x (pg, poly, z, shift); + + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) + return special_case ( + y, x, + svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)), + cmp_xy); + + /* Account for the sign of x and y. */ + return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); +} diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c index f024fd1..da0d371 100644 --- a/sysdeps/aarch64/fpu/atan_advsimd.c +++ b/sysdeps/aarch64/fpu/atan_advsimd.c @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include "v_math.h" -#include "poly_advsimd_f64.h" static const struct data { @@ -28,16 +27,16 @@ static const struct data } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .c0 = V2 (-0x1.5555555555555p-2), .c1 = 0x1.99999999996c1p-3, - .c2 = V2 (-0x1.2492492478f88p-3), .c3 = 0x1.c71c71bc3951cp-4, - .c4 = V2 (-0x1.745d160a7e368p-4), .c5 = 0x1.3b139b6a88ba1p-4, - .c6 = V2 (-0x1.11100ee084227p-4), .c7 = 0x1.e1d0f9696f63bp-5, - .c8 = V2 (-0x1.aebfe7b418581p-5), .c9 = 0x1.842dbe9b0d916p-5, - .c10 = V2 (-0x1.5d30140ae5e99p-5), .c11 = 0x1.338e31eb2fbbcp-5, - .c12 = V2 (-0x1.00e6eece7de8p-5), .c13 = 0x1.860897b29e5efp-6, - .c14 = V2 (-0x1.0051381722a59p-6), .c15 = 0x1.14e9dc19a4a4ep-7, - .c16 = V2 (-0x1.d0062b42fe3bfp-9), .c17 = 0x1.17739e210171ap-10, - .c18 = V2 (-0x1.ab24da7be7402p-13), .c19 = 0x1.358851160a528p-16, + .c0 = V2 (-0x1.555555555552ap-2), .c1 = 0x1.9999999995aebp-3, + .c2 = V2 (-0x1.24924923923f6p-3), .c3 = 0x1.c71c7184288a2p-4, + .c4 = V2 (-0x1.745d11fb3d32bp-4), .c5 = 0x1.3b136a18051b9p-4, + .c6 = V2 (-0x1.110e6d985f496p-4), .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = V2 (-0x1.ae644e28058c3p-5), .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = V2 (-0x1.59d7f901566cbp-5), .c11 = 0x1.2c982855ab069p-5, + .c12 = V2 (-0x1.eb49592998177p-6), .c13 = 0x1.69d8b396e3d38p-6, + .c14 = V2 (-0x1.ca980345c4204p-7), .c15 = 0x1.dc050eafde0b3p-8, + .c16 = V2 (-0x1.7ea70755b8eccp-9), .c17 = 0x1.ba3da3de903e8p-11, + .c18 = V2 (-0x1.44a4b059b6f67p-13), .c19 = 0x1.c4a45029e5a91p-17, .pi_over_2 = V2 (0x1.921fb54442d18p+0), }; @@ -47,9 +46,9 @@ static const struct data /* Fast implementation of vector atan. Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using - z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps: - _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 - want 0x1.9225645bdd7c3p-1. */ + z=1/x and shift = pi/2. Maximum observed error is 2.45 ulps: + _ZGVnN2v_atan (0x1.0008d737eb3e6p+0) got 0x1.92288c551a4c1p-1 + want 0x1.92288c551a4c3p-1. */ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) { const struct data *d = ptr_barrier (&data); @@ -78,59 +77,53 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) y := arctan(x) for x < 1 y := pi/2 + arctan(-1/x) for x > 1 Hence, use z=-1/a if x>=1, otherwise z=a. */ - uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0)); + uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0)); /* Avoid dependency in abs(x) in division (and comparison). */ - float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x); + float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x); + float64x2_t shift = vreinterpretq_f64_u64 ( vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2))); - /* Use absolute value only when needed (odd powers of z). */ - float64x2_t az = vbslq_f64 ( - SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z); - - /* Calculate the polynomial approximation. - Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of - full scheme to avoid underflow in x^16. - The order 19 polynomial P approximates - (atan(sqrt(x))-sqrt(x))/x^(3/2). */ + + /* Reinsert sign bit from argument into the shift value. */ + shift = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (shift), sign)); + + /* Calculate polynomial approximation P(z^2) with deg(P)=19. */ float64x2_t z2 = vmulq_f64 (z, z); - float64x2_t x2 = vmulq_f64 (z2, z2); - float64x2_t x4 = vmulq_f64 (x2, x2); - float64x2_t x8 = vmulq_f64 (x4, x4); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); - /* estrin_7. */ + /* Order-7 Estrin. */ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); - float64x2_t p03 = vfmaq_f64 (p01, x2, p23); + float64x2_t p03 = vfmaq_f64 (p01, z4, p23); float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); - float64x2_t p47 = vfmaq_f64 (p45, x2, p67); + float64x2_t p47 = vfmaq_f64 (p45, z4, p67); - float64x2_t p07 = vfmaq_f64 (p03, x4, p47); + float64x2_t p07 = vfmaq_f64 (p03, z8, p47); - /* estrin_11. */ + /* Order-11 Estrin. */ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); - float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); + float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); - float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); + float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); - float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); + float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); - float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); - float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); + float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); + float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); - float64x2_t y = vfmaq_f64 (p07, p819, x8); + float64x2_t y = vfmaq_f64 (p07, p819, z16); /* Finalize. y = shift + z + z^3 * P(z^2). */ - y = vfmaq_f64 (az, y, vmulq_f64 (z2, az)); - y = vaddq_f64 (y, shift); - - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign)); - return y; + y = vfmsq_f64 (v_f64 (-1.0), z2, y); + return vfmsq_f64 (shift, z, y); } diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c index 3880ced..a6b0489 100644 --- a/sysdeps/aarch64/fpu/atan_sve.c +++ b/sysdeps/aarch64/fpu/atan_sve.c @@ -18,23 +18,26 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[20]; - float64_t pi_over_2; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64_t shift_val, neg_one; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-1022, 1.0]. */ - .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, - 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, - -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, - 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, - -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, - 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, - -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, - .pi_over_2 = 0x1.921fb54442d18p+0, + .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, + .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, + .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, + .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, + .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, + .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, + .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, + .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, + .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, + .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, + .shift_val = 0x1.490fdaa22168cp+1, .neg_one = -1, }; /* Useful constants. */ @@ -43,15 +46,14 @@ static const struct data /* Fast implementation of SVE atan. Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed - error is 2.27 ulps: - _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 - want 0x1.9225645bdd7c3p-1. */ + error is 2.08 ulps: + _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1 + want 0x1.922a3163e15c4p-1. */ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* No need to trigger special case. Small cases, infs and nans - are supported by our approximation technique. */ + svbool_t ptrue = svptrue_b64 (); svuint64_t ix = svreinterpret_u64 (x); svuint64_t sign = svand_x (pg, ix, SignMask); @@ -59,32 +61,60 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) y := arctan(x) for x < 1 y := pi/2 + arctan(-1/x) for x > 1 Hence, use z=-1/a if x>=1, otherwise z=a. */ - svbool_t red = svacgt (pg, x, 1.0); - /* Avoid dependency in abs(x) in division (and comparison). */ - svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x); - /* Use absolute value only when needed (odd powers of z). */ - svfloat64_t az = svabs_x (pg, z); - az = svneg_m (az, red, az); + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x); + + /* Reuse of -1.0f to reduce constant loads, + We need a shift value of 1/2, which is created via -1 + (1 + 1/2). */ + svfloat64_t shift + = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val)); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign)); /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ - svfloat64_t z2 = svmul_x (pg, z, z); - svfloat64_t x2 = svmul_x (pg, z2, z2); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t x8 = svmul_x (pg, x4, x4); + svfloat64_t z2 = svmul_x (ptrue, z, z); + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + svfloat64_t z16 = svmul_x (ptrue, z8, z8); - svfloat64_t y - = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly), - sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8); + /* Order-7 Estrin. */ + svfloat64_t c13 = svld1rq (ptrue, &d->c1); + svfloat64_t c57 = svld1rq (ptrue, &d->c5); - /* y = shift + z + z^3 * P(z^2). */ - svfloat64_t z3 = svmul_x (pg, z2, az); - y = svmla_x (pg, az, z3, y); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, z4, p23); + svfloat64_t p47 = svmla_x (pg, p45, z4, p67); + svfloat64_t p07 = svmla_x (pg, p03, z8, p47); + + /* Order-11 Estrin. */ + svfloat64_t c911 = svld1rq (ptrue, &d->c9); + svfloat64_t c1315 = svld1rq (ptrue, &d->c13); + svfloat64_t c1719 = svld1rq (ptrue, &d->c17); - /* Apply shift as indicated by `red` predicate. */ - y = svadd_m (red, y, d->pi_over_2); + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); + svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); + svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); - return y; + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); + svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); + svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); + + svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); + svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); + + svfloat64_t y = svmla_x (pg, p07, z16, p819); + + /* y = shift + z + z^3 * P(z^2). */ + shift = svadd_m (red, z, shift); + y = svmul_x (pg, z2, y); + return svmla_x (pg, shift, z, y); } diff --git a/sysdeps/aarch64/fpu/atanf_advsimd.c b/sysdeps/aarch64/fpu/atanf_advsimd.c index 472865e..817a47e 100644 --- a/sysdeps/aarch64/fpu/atanf_advsimd.c +++ b/sysdeps/aarch64/fpu/atanf_advsimd.c @@ -22,26 +22,35 @@ static const struct data { + uint32x4_t sign_mask, pi_over_2; + float32x4_t neg_one; +#if WANT_SIMD_EXCEPT float32x4_t poly[8]; - float32x4_t pi_over_2; +} data = { + .poly = { V4 (-0x1.5554dcp-2), V4 (0x1.9978ecp-3), V4 (-0x1.230a94p-3), + V4 (0x1.b4debp-4), V4 (-0x1.3550dap-4), V4 (0x1.61eebp-5), + V4 (-0x1.0c17d4p-6), V4 (0x1.7ea694p-9) }, +#else + float32x4_t c0, c2, c4, c6; + float c1, c3, c5, c7; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. Generated using fpminimax between FLT_MIN and 1. */ - .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f), - V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f), - V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) }, - .pi_over_2 = V4 (0x1.921fb6p+0f), + .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, + .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, + .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, + .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, +#endif + .pi_over_2 = V4 (0x3fc90fdb), + .neg_one = V4 (-1.0f), + .sign_mask = V4 (0x80000000), }; -#define SignMask v_u32 (0x80000000) - -#define P(i) d->poly[i] - +#if WANT_SIMD_EXCEPT #define TinyBound 0x30800000 /* asuint(0x1p-30). */ #define BigBound 0x4e800000 /* asuint(0x1p30). */ -#if WANT_SIMD_EXCEPT static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t x, float32x4_t y, uint32x4_t special) { @@ -51,19 +60,20 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) /* Fast implementation of vector atanf based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] - using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps: - _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1. */ + using z=-1/x and shift = pi/2. Maximum observed error is 2.02 ulps: + _ZGVnN4v_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) { const struct data *d = ptr_barrier (&data); - /* Small cases, infs and nans are supported by our approximation technique, - but do not set fenv flags correctly. Only trigger special case if we need - fenv. */ uint32x4_t ix = vreinterpretq_u32_f32 (x); - uint32x4_t sign = vandq_u32 (ix, SignMask); + uint32x4_t sign = vandq_u32 (ix, d->sign_mask); #if WANT_SIMD_EXCEPT + /* Small cases, infs and nans are supported by our approximation technique, + but do not set fenv flags correctly. Only trigger special case if we need + fenv. */ uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000)); uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)), v_u32 (BigBound - TinyBound)); @@ -71,41 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) if (__glibc_unlikely (v_any_u32 (special))) return special_case (x, x, v_u32 (-1)); #endif - /* Argument reduction: - y := arctan(x) for x < 1 - y := pi/2 + arctan(-1/x) for x > 1 - Hence, use z=-1/a if x>=1, otherwise z=a. */ - uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0)); - /* Avoid dependency in abs(x) in division (and comparison). */ - float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x); + y := arctan(x) for |x| < 1 + y := arctan(-1/x) + pi/2 for x > +1 + y := arctan(-1/x) - pi/2 for x < -1 + Hence, use z=-1/a if x>=|-1|, otherwise z=a. */ + uint32x4_t red = vcagtq_f32 (x, d->neg_one); + + float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x); + + /* Shift is calculated as +-pi/2 or 0, depending on the argument case. */ float32x4_t shift = vreinterpretq_f32_u32 ( - vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2))); - /* Use absolute value only when needed (odd powers of z). */ - float32x4_t az = vbslq_f32 ( - SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z); + vandq_u32 (red, veorq_u32 (d->pi_over_2, sign))); + + float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z3 = vmulq_f32 (z, z2); + float32x4_t z4 = vmulq_f32 (z2, z2); +#if WANT_SIMD_EXCEPT /* Calculate the polynomial approximation. Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, a standard implementation using z8 creates spurious underflow in the very last fma (when z^8 is small enough). - Therefore, we split the last fma into a mul and an fma. - Horner and single-level Estrin have higher errors that exceed - threshold. */ - float32x4_t z2 = vmulq_f32 (z, z); - float32x4_t z4 = vmulq_f32 (z2, z2); - + Therefore, we split the last fma into a mul and an fma. */ float32x4_t y = vfmaq_f32 ( v_pairwise_poly_3_f32 (z2, z4, d->poly), z4, vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4))); - /* y = shift + z * P(z^2). */ - y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift); +#else + float32x4_t z8 = vmulq_f32 (z4, z4); + + /* Uses an Estrin scheme for polynomial approximation. */ + float32x4_t odd_coeffs = vld1q_f32 (&d->c1); + + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, odd_coeffs, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, odd_coeffs, 1); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, odd_coeffs, 2); + float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, odd_coeffs, 3); - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign)); + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p47 = vfmaq_f32 (p45, z4, p67); - return y; + float32x4_t y = vfmaq_f32 (p03, z8, p47); +#endif + + /* y = shift + z * P(z^2). */ + return vfmaq_f32 (vaddq_f32 (shift, z), z3, y); } libmvec_hidden_def (V_NAME_F1 (atan)) HALF_WIDTH_ALIAS_F1 (atan) diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c index 3a98d70..6558223 100644 --- a/sysdeps/aarch64/fpu/atanf_sve.c +++ b/sysdeps/aarch64/fpu/atanf_sve.c @@ -18,18 +18,26 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f32.h" static const struct data { - float32_t poly[8]; - float32_t pi_over_2; + float32_t c1, c3, c5, c7; + float32_t c0, c2, c4, c6; + float32_t shift_val, neg_one; } data = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. */ - .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, - -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, - .pi_over_2 = 0x1.921fb6p+0f, + .c0 = -0x1.5554dcp-2, + .c1 = 0x1.9978ecp-3, + .c2 = -0x1.230a94p-3, + .c3 = 0x1.b4debp-4, + .c4 = -0x1.3550dap-4, + .c5 = 0x1.61eebp-5, + .c6 = -0x1.0c17d4p-6, + .c7 = 0x1.7ea694p-9, + /* pi/2, used as a shift value after reduction. */ + .shift_val = 0x1.921fb54442d18p+0, + .neg_one = -1.0f, }; #define SignMask (0x80000000) @@ -37,43 +45,49 @@ static const struct data /* Fast implementation of SVE atanf based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using z=-1/x and shift = pi/2. - Largest observed error is 2.9 ULP, close to +/-1.0: - _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1 - want -0x1.967fp-1. */ + Largest observed error is 2.12 ULP: + _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 + want 0x1.95ed36p-1. */ svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); /* No need to trigger special case. Small cases, infs and nans are supported by our approximation technique. */ svuint32_t ix = svreinterpret_u32 (x); - svuint32_t sign = svand_x (pg, ix, SignMask); + svuint32_t sign = svand_x (ptrue, ix, SignMask); /* Argument reduction: y := arctan(x) for x < 1 - y := pi/2 + arctan(-1/x) for x > 1 - Hence, use z=-1/a if x>=1, otherwise z=a. */ - svbool_t red = svacgt (pg, x, 1.0f); - /* Avoid dependency in abs(x) in division (and comparison). */ - svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x); - /* Use absolute value only when needed (odd powers of z). */ - svfloat32_t az = svabs_x (pg, z); - az = svneg_m (az, red, az); - - /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ - svfloat32_t z2 = svmul_x (pg, z, z); - svfloat32_t z4 = svmul_x (pg, z2, z2); - svfloat32_t z8 = svmul_x (pg, z4, z4); - - svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly); - - /* y = shift + z + z^3 * P(z^2). */ - svfloat32_t z3 = svmul_x (pg, z2, az); - y = svmla_x (pg, az, z3, y); - - /* Apply shift as indicated by 'red' predicate. */ - y = svadd_m (red, y, sv_f32 (d->pi_over_2)); - - /* y = atan(x) if x>0, -atan(-x) otherwise. */ - return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign)); + y := arctan(-1/x) + pi/2 for x > +1 + y := arctan(-1/x) - pi/2 for x < -1 + Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (d->neg_one), x), x); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + svfloat32_t shift = svreinterpret_f32 ( + sveor_x (red, svreinterpret_u32 (sv_f32 (d->shift_val)), sign)); + + svfloat32_t z2 = svmul_x (ptrue, z, z); + svfloat32_t z3 = svmul_x (ptrue, z2, z); + svfloat32_t z4 = svmul_x (ptrue, z2, z2); + svfloat32_t z8 = svmul_x (ptrue, z4, z4); + + svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); + + svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); + svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); + svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); + svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); + + svfloat32_t p03 = svmla_x (pg, p01, z4, p23); + svfloat32_t p47 = svmla_x (pg, p45, z4, p67); + + svfloat32_t y = svmla_x (pg, p03, z8, p47); + + /* shift + z + z^3 * P(z^2). */ + shift = svadd_m (red, z, shift); + return svmla_x (pg, shift, z3, y); } diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c index 16a7cf6..958d69a 100644 --- a/sysdeps/aarch64/fpu/atanh_sve.c +++ b/sysdeps/aarch64/fpu/atanh_sve.c @@ -30,7 +30,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special) } /* SVE approximation for double-precision atanh, based on log1p. - The greatest observed error is 2.81 ULP: + The greatest observed error is 3.3 ULP: _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6 want 0x1.ffd8ff31b501cp-6. */ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg) @@ -42,7 +42,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg) svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half)); /* It is special if iax >= 1. */ -// svbool_t special = svcmpge (pg, iax, One); svbool_t special = svacge (pg, x, 1.0); /* Computation is performed based on the following sequence of equality: diff --git a/sysdeps/aarch64/fpu/atanpi_advsimd.c b/sysdeps/aarch64/fpu/atanpi_advsimd.c new file mode 100644 index 0000000..9101419 --- /dev/null +++ b/sysdeps/aarch64/fpu/atanpi_advsimd.c @@ -0,0 +1,117 @@ +/* Double-Precision vector (Advanced SIMD) inverse tanpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + double c2, c4, c6, c8, c10, c12, c14, c16, c18, c20; + float64x2_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; +} data = { + /* Coefficients of polynomial P such that atanpi(x)~x*P(x^2) on + [2^-1022, 1.0]. */ + .c0 = V2 (0x1.45f306dc9c883p-2), .c1 = V2 (-0x1.b2995e7b7ba4ap-4), + .c2 = 0x1.04c26be3d2c1p-4, .c3 = V2 (-0x1.7483759c17ea1p-5), + .c4 = 0x1.21bb95c315d57p-5, .c5 = V2 (-0x1.da1bdc3d453f3p-6), + .c6 = 0x1.912d20459b4bfp-6, .c7 = V2 (-0x1.5bbd4545cad1fp-6), + .c8 = 0x1.331b83bec30a1p-6, .c9 = V2 (-0x1.13d6457f44de3p-6), + .c10 = 0x1.f8e802974db94p-7, .c11 = V2 (-0x1.d7e173ab04a1ap-7), + .c12 = 0x1.bdfa47d6a4f28p-7, .c13 = V2 (-0x1.9ba78f3232ceep-7), + .c14 = 0x1.5e6044590ab4fp-7, .c15 = V2 (-0x1.01ccfdeb9f77fp-7), + .c16 = 0x1.345cf0d4eb1c1p-8, .c17 = V2 (-0x1.19e5f00f67e3ap-9), + .c18 = 0x1.6d3035ac7625bp-11, .c19 = V2 (-0x1.286bb9ae4ed79p-13), + .c20 = 0x1.c37ec36da0e1ap-17, +}; + +#define SignMask v_u64 (0x8000000000000000) + +/* Fast implementation of vector atanpi. + atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1] using + z=1/x and shift = +-1/2. Maximum observed error is 2.76 ulps: + _ZGVnN2v_atanpi(0x1.fa2d6912cd64fp-1) got 0x1.fc45a51bd497fp-3 + want 0x1.fc45a51bd497cp-3. */ +float64x2_t VPCS_ATTR V_NAME_D1 (atanpi) (float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint64x2_t ix = vreinterpretq_u64_f64 (x); + uint64x2_t sign = vandq_u64 (ix, SignMask); + + /* Argument Reduction: + y := arctanpi(x) for |x| < 1 + y := arctanpi(-1/x) + 1/2 for x > 1 + y := arctanpi(-1/x) - 1/2 for x < -1 + Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ + uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0)); + float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x); + + /* Shift is calculated as +1/2 or 0, depending on the argument case. */ + float64x2_t shift = vreinterpretq_f64_u64 ( + vandq_u64 (red, vreinterpretq_u64_f64 (v_f64 (0.5)))); + + /* Reinsert sign bit from argument into the shift value. */ + shift = vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (shift), sign)); + + /* Calculate polynomial approximation P(z^2) with deg(P)=19. */ + float64x2_t z2 = vmulq_f64 (z, z); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); + + float64x2_t c24 = vld1q_f64 (&d->c2); + float64x2_t c68 = vld1q_f64 (&d->c6); + + /* Order-7 Estrin. */ + float64x2_t p12 = vfmaq_laneq_f64 (d->c1, z2, c24, 0); + float64x2_t p34 = vfmaq_laneq_f64 (d->c3, z2, c24, 1); + float64x2_t p56 = vfmaq_laneq_f64 (d->c5, z2, c68, 0); + float64x2_t p78 = vfmaq_laneq_f64 (d->c7, z2, c68, 1); + + float64x2_t p14 = vfmaq_f64 (p12, z4, p34); + float64x2_t p58 = vfmaq_f64 (p56, z4, p78); + float64x2_t p18 = vfmaq_f64 (p14, z8, p58); + + /* Order-11 Estrin. */ + float64x2_t c1012 = vld1q_f64 (&d->c10); + float64x2_t c1416 = vld1q_f64 (&d->c14); + float64x2_t c1820 = vld1q_f64 (&d->c18); + + float64x2_t p910 = vfmaq_laneq_f64 (d->c9, z2, c1012, 0); + float64x2_t p1112 = vfmaq_laneq_f64 (d->c11, z2, c1012, 1); + float64x2_t p912 = vfmaq_f64 (p910, z4, p1112); + + float64x2_t p1314 = vfmaq_laneq_f64 (d->c13, z2, c1416, 0); + float64x2_t p1516 = vfmaq_laneq_f64 (d->c15, z2, c1416, 1); + float64x2_t p1316 = vfmaq_f64 (p1314, z4, p1516); + + float64x2_t p1718 = vfmaq_laneq_f64 (d->c17, z2, c1820, 0); + float64x2_t p1920 = vfmaq_laneq_f64 (d->c19, z2, c1820, 1); + float64x2_t p1720 = vfmaq_f64 (p1718, z4, p1920); + + float64x2_t p916 = vfmaq_f64 (p912, z8, p1316); + float64x2_t p920 = vfmaq_f64 (p916, z16, p1720); + + float64x2_t y = vfmaq_f64 (p18, p920, z16); + + y = vfmaq_f64 (d->c0, z2, y); + + /* y = shift + z * p(z^2). */ + return vfmaq_f64 (shift, z, y); +} diff --git a/sysdeps/aarch64/fpu/atanpi_sve.c b/sysdeps/aarch64/fpu/atanpi_sve.c new file mode 100644 index 0000000..3f8f277 --- /dev/null +++ b/sysdeps/aarch64/fpu/atanpi_sve.c @@ -0,0 +1,127 @@ +/* Double-Precision vector (SVE) inverse tanpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float64_t c2, c4, c6, c8, c10, c12, c14, c16, c18, c20; + float64_t c0, c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64_t shift_val, neg_one; +} data = { + /* Coefficients of polnomial P such that atan(x)~x+x*P(x^2) on + [2^-1022, 1.0]. */ + .c0 = 0x1.45f306dc9c883p-2, + .c1 = -0x1.b2995e7b7ba4ap-4, + .c2 = 0x1.04c26be3d2c1p-4, + .c3 = -0x1.7483759c17ea1p-5, + .c4 = 0x1.21bb95c315d57p-5, + .c5 = -0x1.da1bdc3d453f3p-6, + .c6 = 0x1.912d20459b4bfp-6, + .c7 = -0x1.5bbd4545cad1fp-6, + .c8 = 0x1.331b83bec30a1p-6, + .c9 = -0x1.13d6457f44de3p-6, + .c10 = 0x1.f8e802974db94p-7, + .c11 = -0x1.d7e173ab04a1ap-7, + .c12 = 0x1.bdfa47d6a4f28p-7, + .c13 = -0x1.9ba78f3232ceep-7, + .c14 = 0x1.5e6044590ab4fp-7, + .c15 = -0x1.01ccfdeb9f77fp-7, + .c16 = 0x1.345cf0d4eb1c1p-8, + .c17 = -0x1.19e5f00f67e3ap-9, + .c18 = 0x1.6d3035ac7625bp-11, + .c19 = -0x1.286bb9ae4ed79p-13, + .c20 = 0x1.c37ec36da0e1ap-17, + .shift_val = 1.5, + .neg_one = -1, +}; + +/* Fast implementation of SVE atan. + Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to 0,1 using + z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed + error is 2.80 ulps: + _ZGVsMxv_atanpi(0x1.f19587d63c76fp-1) got 0x1.f6b1304817d02p-3 + want 0x1.f6b1304817d05p-3. */ +svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + svbool_t ptrue = svptrue_b64 (); + svuint64_t ix = svreinterpret_u64 (x); + svuint64_t sign = svand_x (pg, ix, 0x8000000000000000); + + /* Argument reduction: + y := arctan(x) for x < 1 + y := pi/2 + arctan(-1/x) for x > 1 + Hence, use z=-1/a if x>=1, otherwise z=a. */ + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x); + + /* Reuse of -1.0f to reduce constant loads, + We need a shift value of 1/2, which is created via -1 + (1 + 1/2). */ + svfloat64_t shift + = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val)); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign)); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ + svfloat64_t z2 = svmul_x (ptrue, z, z); + svfloat64_t z4 = svmul_x (ptrue, z2, z2); + svfloat64_t z8 = svmul_x (ptrue, z4, z4); + svfloat64_t z16 = svmul_x (ptrue, z8, z8); + + /* Order-7 Estrin. */ + svfloat64_t c24 = svld1rq (ptrue, &d->c2); + svfloat64_t c68 = svld1rq (ptrue, &d->c6); + + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), z2, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), z2, c24, 1); + svfloat64_t p56 = svmla_lane (sv_f64 (d->c5), z2, c68, 0); + svfloat64_t p78 = svmla_lane (sv_f64 (d->c7), z2, c68, 1); + + svfloat64_t p14 = svmla_x (pg, p12, z4, p34); + svfloat64_t p58 = svmla_x (pg, p56, z4, p78); + svfloat64_t p18 = svmla_x (pg, p14, z8, p58); + + /* Order-11 Estrin. */ + svfloat64_t c1012 = svld1rq (ptrue, &d->c10); + svfloat64_t c1416 = svld1rq (ptrue, &d->c14); + svfloat64_t c1820 = svld1rq (ptrue, &d->c18); + + svfloat64_t p910 = svmla_lane (sv_f64 (d->c9), z2, c1012, 0); + svfloat64_t p1112 = svmla_lane (sv_f64 (d->c11), z2, c1012, 1); + svfloat64_t p912 = svmla_x (pg, p910, z4, p1112); + + svfloat64_t p1314 = svmla_lane (sv_f64 (d->c13), z2, c1416, 0); + svfloat64_t p1516 = svmla_lane (sv_f64 (d->c15), z2, c1416, 1); + svfloat64_t p1316 = svmla_x (pg, p1314, z4, p1516); + + svfloat64_t p1718 = svmla_lane (sv_f64 (d->c17), z2, c1820, 0); + svfloat64_t p1920 = svmla_lane (sv_f64 (d->c19), z2, c1820, 1); + svfloat64_t p1720 = svmla_x (pg, p1718, z4, p1920); + + svfloat64_t p916 = svmla_x (pg, p912, z8, p1316); + svfloat64_t p920 = svmla_x (pg, p916, z16, p1720); + + svfloat64_t y = svmla_x (pg, p18, z16, p920); + + y = svmla_x (pg, sv_f64 (d->c0), z2, y); + return svmla_x (pg, shift, z, y); +} diff --git a/sysdeps/aarch64/fpu/atanpif_advsimd.c b/sysdeps/aarch64/fpu/atanpif_advsimd.c new file mode 100644 index 0000000..9295156 --- /dev/null +++ b/sysdeps/aarch64/fpu/atanpif_advsimd.c @@ -0,0 +1,92 @@ +/* Single-Precision vector (Advanced SIMD) inverse tanpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" + +static const struct data +{ + uint32x4_t half; + float32x4_t neg_one; + float32x4_t c0, c1, c3, c5, c7; + float c2, c4, c6, c8; +} data = { + /* Polynomial coefficients generated using Remez algorithm, + see atanpi.sollya for details. */ + .c0 = V4 (0x1.45f306p-2), .c1 = V4 (-0x1.b2975ep-4), + .c2 = 0x1.0490e4p-4, .c3 = V4 (-0x1.70c272p-5), + .c4 = 0x1.0eef52p-5, .c5 = V4 (-0x1.6abbbap-6), + .c6 = 0x1.78157p-7, .c7 = V4 (-0x1.f0b406p-9), + .c8 = 0x1.2ae7fep-11, .half = V4 (0x3f000000), + .neg_one = V4 (-1.0f), +}; + +#define SignMask v_u32 (0x80000000) + +/* Fast implementation of vector atanpif based on + atanpi(x) ~ shift + z * P(z^2) with reduction to [0,1] + using z=-1/x and shift = +-1/2. + Maximum observed error is 2.59ulps: + _ZGVnN4v_atanpif (0x1.f2a89cp-1) got 0x1.f76524p-3 + want 0x1.f7651ep-3. */ +float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanpi) (float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + + uint32x4_t ix = vreinterpretq_u32_f32 (x); + uint32x4_t sign = vandq_u32 (ix, SignMask); + + /* Argument Reduction: + y := arctanpi(x) for |x| < 1 + y := arctanpi(-1/x) + 1/2 for x > 1 + y := arctanpi(-1/x) - 1/2 for x < -1 + Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ + uint32x4_t red = vcagtq_f32 (x, d->neg_one); + + float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x); + + /* Shift is calculated as +1/2 or 0, depending on the argument case. */ + float32x4_t shift = vreinterpretq_f32_u32 (vandq_u32 (red, d->half)); + + /* Reinsert sign bit from argument into the shift value. */ + shift = vreinterpretq_f32_u32 ( + veorq_u32 (vreinterpretq_u32_f32 (shift), sign)); + + /* Uses an Estrin scheme for polynomial approximation. */ + float32x4_t z2 = vmulq_f32 (z, z); + float32x4_t z4 = vmulq_f32 (z2, z2); + float32x4_t z8 = vmulq_f32 (z4, z4); + + float32x4_t even_coeffs = vld1q_f32 (&d->c2); + + float32x4_t p12 = vfmaq_laneq_f32 (d->c1, z2, even_coeffs, 0); + float32x4_t p34 = vfmaq_laneq_f32 (d->c3, z2, even_coeffs, 1); + float32x4_t p56 = vfmaq_laneq_f32 (d->c5, z2, even_coeffs, 2); + float32x4_t p78 = vfmaq_laneq_f32 (d->c7, z2, even_coeffs, 3); + + float32x4_t p14 = vfmaq_f32 (p12, z4, p34); + float32x4_t p58 = vfmaq_f32 (p56, z4, p78); + + float32x4_t y = vfmaq_f32 (p14, z8, p58); + y = vfmaq_f32 (d->c0, z2, y); + + /* y = shift + z * P(z^2). */ + return vfmaq_f32 (shift, z, y); +} +libmvec_hidden_def (V_NAME_F1 (atanpi)) +HALF_WIDTH_ALIAS_F1 (atanpi) diff --git a/sysdeps/aarch64/fpu/atanpif_sve.c b/sysdeps/aarch64/fpu/atanpif_sve.c new file mode 100644 index 0000000..2abd788 --- /dev/null +++ b/sysdeps/aarch64/fpu/atanpif_sve.c @@ -0,0 +1,89 @@ +/* Single-Precision vector (SVE) inverse tanpi function + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" + +static const struct data +{ + float32_t c2, c4, c6, c8; + float32_t c0, c1, c3, c5, c7; + float32_t shift_val, neg_one; +} data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. */ + .c0 = 0x1.45f306p-2, .c1 = -0x1.b2975ep-4, .c2 = 0x1.0490e4p-4, + .c3 = -0x1.70c272p-5, .c4 = 0x1.0eef52p-5, .c5 = -0x1.6abbbap-6, + .c6 = 0x1.78157p-7, .c7 = -0x1.f0b406p-9, .c8 = 0x1.2ae7fep-11, + .shift_val = 1.5f, .neg_one = -1.0f, +}; + +#define SignMask (0x80000000) + +/* Fast implementation of SVE atanpif based on + atan(x) ~ shift + z * P(z^2) with reduction to [0,1] using + z=-1/x and shift = 1/2. + Largest observed error is 2.59 ULP, close to +/-1.0: + _ZGVsMxv_atanpif(0x1.f2a89cp-1) got 0x1.f76524p-3 + want 0x1.f7651ep-3. */ +svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + svbool_t ptrue = svptrue_b32 (); + + /* No need to trigger special case. Small cases, infs and nans + are supported by our approximation technique. */ + svuint32_t ix = svreinterpret_u32 (x); + svuint32_t sign = svand_x (pg, ix, SignMask); + + /* Argument reduction: + y := arctan(x) for x < 1 + y := arctan(-1/x) + 1/2 for x > +1 + y := arctan(-1/x) - 1/2 for x < -1 + Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ + svbool_t red = svacgt (pg, x, d->neg_one); + svfloat32_t z = svsel (red, svdiv_x (ptrue, sv_f32 (d->neg_one), x), x); + + /* Reuse of -1.0f to reduce constant loads, + We need a shift value of 1/2, which is created via -1 + (1 + 1/2). */ + svfloat32_t shift + = svadd_z (red, sv_f32 (d->neg_one), sv_f32 (d->shift_val)); + + /* Reinserts the sign bit of the argument to handle the case of x < -1. */ + shift = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (shift), sign)); + + svfloat32_t z2 = svmul_x (ptrue, z, z); + svfloat32_t z4 = svmul_x (ptrue, z2, z2); + svfloat32_t z8 = svmul_x (ptrue, z4, z4); + + svfloat32_t even_coeffs = svld1rq (ptrue, &d->c2); + + svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), z2, even_coeffs, 0); + svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), z2, even_coeffs, 1); + svfloat32_t p56 = svmla_lane (sv_f32 (d->c5), z2, even_coeffs, 2); + svfloat32_t p78 = svmla_lane (sv_f32 (d->c7), z2, even_coeffs, 3); + + svfloat32_t p14 = svmad_x (pg, z4, p34, p12); + svfloat32_t p58 = svmad_x (pg, z4, p78, p56); + + svfloat32_t p18 = svmad_x (pg, z8, p58, p14); + svfloat32_t y = svmad_x (pg, z2, p18, d->c0); + + /* shift + z * P(z^2). */ + return svmad_x (pg, y, z, shift); +} diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index 5152c0d..77ae10d 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -37,6 +37,10 @@ # define __DECL_SIMD_acosh __DECL_SIMD_aarch64 # undef __DECL_SIMD_acoshf # define __DECL_SIMD_acoshf __DECL_SIMD_aarch64 +# undef __DECL_SIMD_acospi +# define __DECL_SIMD_acospi __DECL_SIMD_aarch64 +# undef __DECL_SIMD_acospif +# define __DECL_SIMD_acospif __DECL_SIMD_aarch64 # undef __DECL_SIMD_asin # define __DECL_SIMD_asin __DECL_SIMD_aarch64 # undef __DECL_SIMD_asinf @@ -45,6 +49,10 @@ # define __DECL_SIMD_asinh __DECL_SIMD_aarch64 # undef __DECL_SIMD_asinhf # define __DECL_SIMD_asinhf __DECL_SIMD_aarch64 +# undef __DECL_SIMD_asinpi +# define __DECL_SIMD_asinpi __DECL_SIMD_aarch64 +# undef __DECL_SIMD_asinpif +# define __DECL_SIMD_asinpif __DECL_SIMD_aarch64 # undef __DECL_SIMD_atan # define __DECL_SIMD_atan __DECL_SIMD_aarch64 # undef __DECL_SIMD_atanf @@ -53,10 +61,18 @@ # define __DECL_SIMD_atanh __DECL_SIMD_aarch64 # undef __DECL_SIMD_atanhf # define __DECL_SIMD_atanhf __DECL_SIMD_aarch64 +# undef __DECL_SIMD_atanpi +# define __DECL_SIMD_atanpi __DECL_SIMD_aarch64 +# undef __DECL_SIMD_atanpif +# define __DECL_SIMD_atanpif __DECL_SIMD_aarch64 # undef __DECL_SIMD_atan2 # define __DECL_SIMD_atan2 __DECL_SIMD_aarch64 # undef __DECL_SIMD_atan2f # define __DECL_SIMD_atan2f __DECL_SIMD_aarch64 +# undef __DECL_SIMD_atan2pi +# define __DECL_SIMD_atan2pi __DECL_SIMD_aarch64 +# undef __DECL_SIMD_atan2pif +# define __DECL_SIMD_atan2pif __DECL_SIMD_aarch64 # undef __DECL_SIMD_cbrt # define __DECL_SIMD_cbrt __DECL_SIMD_aarch64 # undef __DECL_SIMD_cbrtf @@ -176,12 +192,16 @@ typedef __SVBool_t __sv_bool_t; # define __vpcs __attribute__ ((__aarch64_vector_pcs__)) __vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t); +__vpcs __f32x4_t _ZGVnN4vv_atan2pif (__f32x4_t, __f32x4_t); __vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_acospif (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_asinpif (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_atanpif (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t); @@ -207,12 +227,16 @@ __vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_tanpif (__f32x4_t); __vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t); +__vpcs __f64x2_t _ZGVnN2vv_atan2pi (__f64x2_t, __f64x2_t); __vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_acospi (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_asinpi (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_atanpi (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t); @@ -243,12 +267,16 @@ __vpcs __f64x2_t _ZGVnN2v_tanpi (__f64x2_t); #ifdef __SVE_VEC_MATH_SUPPORTED __sv_f32_t _ZGVsMxvv_atan2f (__sv_f32_t, __sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxvv_atan2pif (__sv_f32_t, __sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_acospif (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_asinpif (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_atanpif (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t); @@ -274,12 +302,16 @@ __sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_tanpif (__sv_f32_t, __sv_bool_t); __sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxvv_atan2pi (__sv_f64_t, __sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_acospi (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_asinpi (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_atanpi (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c index 77e58e1..f5a163b 100644 --- a/sysdeps/aarch64/fpu/cosh_sve.c +++ b/sysdeps/aarch64/fpu/cosh_sve.c @@ -21,71 +21,99 @@ static const struct data { - float64_t poly[3]; - float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres; + double c0, c2; + double c1, c3; + float64_t inv_ln2, ln2_hi, ln2_lo, shift; uint64_t special_bound; } data = { - .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3, - 0x1.5555576a59599p-5, }, - - .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2. */ - /* -ln2/N. */ - .ln2_hi = -0x1.62e42fefa39efp-9, - .ln2_lo = -0x1.abc9e3b39803f3p-64, - .shift = 0x1.8p+52, - .thres = 704.0, - - /* 0x1.6p9, above which exp overflows. */ - .special_bound = 0x4086000000000000, + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1.fffffffffdbcdp-2, + .c1 = 0x1.555555555444cp-3, + .c2 = 0x1.555573c6a9f7dp-5, + .c3 = 0x1.1111266d28935p-7, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + /* 1/ln2. */ + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ff80p+46, /* 1.5*2^46+1022. */ + + /* asuint(ln(2^(1024 - 1/128))), the value above which exp overflows. */ + .special_bound = 0x40862e37e7d8ba72, }; -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special) -{ - svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); - svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); - svfloat64_t y = svadd_x (pg, half_t, half_over_t); - return sv_call_f64 (cosh, x, y, special); -} - -/* Helper for approximating exp(x). Copied from sv_exp_tail, with no - special-case handling or tail. */ +/* Helper for approximating exp(x)/2. + Functionally identical to FEXPA exp(x), but an adjustment in + the shift value which leads to a reduction in the exponent of scale by 1, + thus halving the result at no cost. */ static inline svfloat64_t -exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d) +exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d) { /* Calculate exp(x). */ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); svfloat64_t n = svsub_x (pg, z, d->shift); - svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi); - r = svmla_x (pg, r, n, d->ln2_lo); + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); - svuint64_t u = svreinterpret_u64 (z); - svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS); - svuint64_t i = svand_x (svptrue_b64 (), u, 0xff); + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); - svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]); - y = svmla_x (pg, sv_f64 (d->poly[0]), r, y); - y = svmla_x (pg, sv_f64 (1.0), r, y); - y = svmul_x (svptrue_b64 (), r, y); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); + svfloat64_t p04 = svmla_x (pg, p01, p23, r2); + svfloat64_t p = svmla_x (pg, r, p04, r2); - /* s = 2^(n/N). */ - u = svld1_gather_index (pg, __v_exp_tail_data, i); - svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e)); + svfloat64_t scale = svexpa (u); - return svmla_x (pg, s, s, y); + return svmla_x (pg, scale, scale, p); +} + +/* Vectorised special case to handle values past where exp_inline overflows. + Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double + the valid range of inputs, and returns inf for anything past that. */ +static svfloat64_t NOINLINE +special_case (svbool_t pg, svbool_t special, svfloat64_t ax, svfloat64_t t, + const struct data *d) +{ + /* Finish fast path to compute values for non-special cases. */ + svfloat64_t inv_twoexp = svdivr_x (pg, t, 0.25); + svfloat64_t y = svadd_x (pg, t, inv_twoexp); + + /* Halves input value, and then check if any cases + are still going to overflow. */ + ax = svmul_x (special, ax, 0.5); + svbool_t is_safe + = svcmplt (special, svreinterpret_u64 (ax), d->special_bound); + + /* Computes exp(x/2), and sets any overflowing lanes to inf. */ + svfloat64_t half_exp = exp_over_two_inline (special, ax, d); + half_exp = svsel (is_safe, half_exp, sv_f64 (INFINITY)); + + /* Construct special case cosh(x) = (exp(x/2)^2)/2. */ + svfloat64_t exp = svmul_x (svptrue_b64 (), half_exp, 2); + svfloat64_t special_y = svmul_x (special, exp, half_exp); + + /* Select correct return values for special and non-special cases. */ + special_y = svsel (special, special_y, y); + + /* Ensure an input of nan is correctly propagated. */ + svbool_t is_nan + = svcmpgt (special, svreinterpret_u64 (ax), sv_u64 (0x7ff0000000000000)); + return svsel (is_nan, ax, svsel (special, special_y, y)); } /* Approximation for SVE double-precision cosh(x) using exp_inline. cosh(x) = (exp(x) + exp(-x)) / 2. - The greatest observed error is in the scalar fall-back region, so is the - same as the scalar routine, 1.93 ULP: - _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021 - want 0x1.fd774e958236fp+1021. - - The greatest observed error in the non-special region is 1.54 ULP: - _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8 - want 0x1.f5e2bb8d5c991p+8. */ + The greatest observed error in special case region is 2.66 + 0.5 ULP: + _ZGVsMxv_cosh (0x1.633b532ffbc1ap+9) got 0x1.f9b2d3d22399ep+1023 + want 0x1.f9b2d3d22399bp+1023 + + The greatest observed error in the non-special region is 1.01 + 0.5 ULP: + _ZGVsMxv_cosh (0x1.998ecbb3c1f81p+1) got 0x1.890b225657f84p+3 + want 0x1.890b225657f82p+3. */ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); @@ -94,14 +122,13 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound); /* Up to the point that exp overflows, we can use it to calculate cosh by - exp(|x|) / 2 + 1 / (2 * exp(|x|)). */ - svfloat64_t t = exp_inline (ax, pg, d); + (exp(|x|)/2 + 1) / (2 * exp(|x|)). */ + svfloat64_t half_exp = exp_over_two_inline (pg, ax, d); - /* Fall back to scalar for any special cases. */ + /* Falls back to entirely standalone vectorized special case. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, pg, t, special); + return special_case (pg, special, ax, half_exp, d); - svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); - svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); - return svadd_x (pg, half_t, half_over_t); + svfloat64_t inv_twoexp = svdivr_x (pg, half_exp, 0.25); + return svadd_x (pg, half_exp, inv_twoexp); } diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c index 1a74db2..f3e7f8b 100644 --- a/sysdeps/aarch64/fpu/exp10f_sve.c +++ b/sysdeps/aarch64/fpu/exp10f_sve.c @@ -19,26 +19,19 @@ #include "sv_math.h" -/* For x < -Thres, the result is subnormal and not handled correctly by - FEXPA. */ -#define Thres 37.9 +/* For x < -Thres (-log10(2^126)), the result is subnormal and not handled + correctly by FEXPA. */ +#define Thres 0x1.2f702p+5 static const struct data { - float log2_10_lo, c0, c2, c4; - float c1, c3, log10_2; - float shift, log2_10_hi, thres; + float log10_2, log2_10_hi, log2_10_lo, c1; + float c0, shift, thres; } data = { /* Coefficients generated using Remez algorithm with minimisation of relative - error. - rel error: 0x1.89dafa3p-24 - abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2] - maxerr: 0.52 +0.5 ulp. */ - .c0 = 0x1.26bb16p+1f, - .c1 = 0x1.5350d2p+1f, - .c2 = 0x1.04744ap+1f, - .c3 = 0x1.2d8176p+0f, - .c4 = 0x1.12b41ap-1f, + error. */ + .c0 = 0x1.26bb62p1, + .c1 = 0x1.53524cp1, /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */ .shift = 0x1.803f8p17f, .log10_2 = 0x1.a934fp+1, @@ -53,28 +46,23 @@ sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)), with poly(r) in [1/sqrt(2), sqrt(2)] and x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */ - - svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo); + svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log10_2); /* n = round(x/(log10(2)/N)). */ svfloat32_t shift = sv_f32 (d->shift); - svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift); - svfloat32_t n = svsub_x (svptrue_b32 (), z, shift); + svfloat32_t z = svmla_lane (shift, x, lane_consts, 0); + svfloat32_t n = svsub_x (pg, z, shift); /* r = x - n*log10(2)/N. */ - svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x); - r = svmls_lane (r, n, lane_consts, 0); + svfloat32_t r = x; + r = svmls_lane (r, n, lane_consts, 1); + r = svmls_lane (r, n, lane_consts, 2); svfloat32_t scale = svexpa (svreinterpret_u32 (z)); /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */ - svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); - svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); - svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); - svfloat32_t p14 = svmla_x (pg, p12, p34, r2); - svfloat32_t p0 = svmul_lane (r, lane_consts, 1); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); - + svfloat32_t poly = svmla_lane (sv_f32 (d->c0), r, lane_consts, 3); + poly = svmul_x (pg, poly, r); return svmla_x (pg, scale, scale, poly); } @@ -85,11 +73,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d) special); } -/* Single-precision SVE exp10f routine. Implements the same algorithm - as AdvSIMD exp10f. - Worst case error is 1.02 ULPs. - _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1 - want 0x1.ba5f9cp-1. */ +/* Single-precision SVE exp10f routine. Based on the FEXPA instruction. + Worst case error is 1.10 ULP. + _ZGVsMxv_exp10f (0x1.cc76dep+3) got 0x1.be0172p+47 + want 0x1.be017p+47. */ svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c index 6db8526..c135852 100644 --- a/sysdeps/aarch64/fpu/exp2_sve.c +++ b/sysdeps/aarch64/fpu/exp2_sve.c @@ -19,23 +19,21 @@ #include "sv_math.h" -#define N (1 << V_EXP_TABLE_BITS) - #define BigBound 1022 #define UOFlowBound 1280 static const struct data { - double c0, c2; - double c1, c3; + double c2, c4; + double c0, c1, c3; double shift, big_bound, uoflow_bound; } data = { /* Coefficients are computed using Remez algorithm with minimisation of the absolute error. */ - .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3, - .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7, - .shift = 0x1.8p52 / N, .uoflow_bound = UOFlowBound, - .big_bound = BigBound, + .c0 = 0x1.62e42fefa39efp-1, .c1 = 0x1.ebfbdff82a31bp-3, + .c2 = 0x1.c6b08d706c8a5p-5, .c3 = 0x1.3b2ad2ff7d2f3p-7, + .c4 = 0x1.5d8761184beb3p-10, .shift = 0x1.800000000ffc0p+46, + .uoflow_bound = UOFlowBound, .big_bound = BigBound, }; #define SpecialOffset 0x6000000000000000 /* 0x1p513. */ @@ -64,50 +62,52 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); /* |n| > 1280 => 2^(n) overflows. */ - svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound); + svbool_t p_cmp = svacle (pg, n, d->uoflow_bound); svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); - return svsel (p_cmp, r1, r0); + return svsel (p_cmp, r0, r1); } /* Fast vector implementation of exp2. - Maximum measured error is 1.65 ulp. - _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1 - want 0x1.f8db0d4df721dp-1. */ + Maximum measured error is 0.52 + 0.5 ulp. + _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0 + want 0x1.8861641b49e07p+0. */ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svbool_t no_big_scale = svacle (pg, x, d->big_bound); - svbool_t special = svnot_z (pg, no_big_scale); - - /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N]. */ - svfloat64_t shift = sv_f64 (d->shift); - svfloat64_t kd = svadd_x (pg, x, shift); - svuint64_t ki = svreinterpret_u64 (kd); - /* kd = k/N. */ - kd = svsub_x (pg, kd, shift); - svfloat64_t r = svsub_x (pg, x, kd); - - /* scale ~= 2^(k/N). */ - svuint64_t idx = svand_x (pg, ki, N - 1); - svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS); - svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top)); - - svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); - /* Approximate exp2(r) using polynomial. */ - /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */ + svbool_t special = svacge (pg, x, d->big_bound); + + svfloat64_t z = svadd_x (svptrue_b64 (), x, d->shift); + svfloat64_t n = svsub_x (svptrue_b64 (), z, d->shift); + svfloat64_t r = svsub_x (svptrue_b64 (), x, n); + + svfloat64_t scale = svexpa (svreinterpret_u64 (z)); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); - svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); - svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); - svfloat64_t p = svmla_x (pg, p01, p23, r2); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + /* Approximate exp2(r) using polynomial. */ + /* y = exp2(r) - 1 ~= r * (C0 + C1 r + C2 r^2 + C3 r^3 + C4 r^4). */ + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + svfloat64_t p = svmla_x (pg, p12, p34, r2); + p = svmad_x (pg, p, r, d->c0); svfloat64_t y = svmul_x (svptrue_b64 (), r, p); + /* Assemble exp2(x) = exp2(r) * scale. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (pg, scale, y, kd, d); + { + /* FEXPA zeroes the sign bit, however the sign is meaningful to the + special case function so needs to be copied. + e = sign bit of u << 46. */ + svuint64_t e = svand_x (pg, svlsl_x (pg, svreinterpret_u64 (z), 46), + 0x8000000000000000); + scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale))); + return special_case (pg, scale, y, n, d); + } + return svmla_x (pg, scale, scale, y); } diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c index fcd7830..989cefb 100644 --- a/sysdeps/aarch64/fpu/exp2f_sve.c +++ b/sysdeps/aarch64/fpu/exp2f_sve.c @@ -18,21 +18,17 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f32.h" #define Thres 0x1.5d5e2ap+6f static const struct data { - float c0, c2, c4, c1, c3; - float shift, thres; + float c0, c1, shift, thres; } data = { - /* Coefficients copied from the polynomial in AdvSIMD variant. */ - .c0 = 0x1.62e422p-1f, - .c1 = 0x1.ebf9bcp-3f, - .c2 = 0x1.c6bd32p-5f, - .c3 = 0x1.3ce9e4p-7f, - .c4 = 0x1.59977ap-10f, + /* Coefficients generated using Remez algorithm with minimisation of relative + error. */ + .c0 = 0x1.62e485p-1, + .c1 = 0x1.ebfbe0p-3, /* 1.5*2^17 + 127. */ .shift = 0x1.803f8p17f, /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled @@ -51,16 +47,8 @@ sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d) svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - /* Polynomial evaluation: poly(r) ~ exp2(r)-1. - Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for - coefficients 1 to 4, and apply most significant coefficient directly. */ - svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0); - svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); - svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1); - svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2); - svfloat32_t p14 = svmla_x (pg, p12, r2, p34); - svfloat32_t p0 = svmul_lane (r, even_coeffs, 0); - svfloat32_t poly = svmla_x (pg, p0, r2, p14); + svfloat32_t poly = svmla_x (pg, sv_f32 (d->c0), r, sv_f32 (d->c1)); + poly = svmul_x (svptrue_b32 (), poly, r); return svmla_x (pg, scale, scale, poly); } @@ -72,11 +60,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d) special); } -/* Single-precision SVE exp2f routine. Implements the same algorithm - as AdvSIMD exp2f. - Worst case error is 1.04 ULPs. - _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1 - want 0x1.ba6a64p-1. */ +/* Single-precision SVE exp2f routine, based on the FEXPA instruction. + Worst case error is 1.09 ULPs. + _ZGVsMxv_exp2f (0x1.9a2a94p-1) got 0x1.be1054p+0 + want 0x1.be1052p+0. */ svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) { const struct data *d = ptr_barrier (&data); diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c index d4ba8cc..b1d940b 100644 --- a/sysdeps/aarch64/fpu/expm1_sve.c +++ b/sysdeps/aarch64/fpu/expm1_sve.c @@ -18,82 +18,164 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" -#define SpecialBound 0x1.62b7d369a5aa9p+9 -#define ExponentBias 0x3ff0000000000000 +#define FexpaBound 0x1.4cb5ecef28adap-3 /* 15*ln2/64. */ +#define SpecialBound 0x1.628c2855bfaddp+9 /* ln(2^(1023 + 1/128)). */ static const struct data { - double poly[11]; - double shift, inv_ln2, special_bound; - /* To be loaded in one quad-word. */ + double c2, c4; + double inv_ln2; double ln2_hi, ln2_lo; + double c0, c1, c3; + double shift, thres; + uint64_t expm1_data[32]; } data = { - /* Generated using fpminimax. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13, - 0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .special_bound = SpecialBound, - .inv_ln2 = 0x1.71547652b82fep0, - .ln2_hi = 0x1.62e42fefa39efp-1, - .ln2_lo = 0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, + /* Table emulating FEXPA - 1, for values of FEXPA close to 1. + The table holds values of 2^(i/64) - 1, computed in arbitrary precision. + The first half of the table stores values associated to i from 0 to 15. + The second half of the table stores values associated to i from 0 to -15. */ + .expm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x0000000000000000, 0xbfc331751ec3a814, 0xbfc20224341286e4, 0xbfc0cf85bed0f8b7, + 0xbfbf332113d56b1f, 0xbfbcc0768d4175a6, 0xbfba46f918837cb7, 0xbfb7c695afc3b424, + 0xbfb53f391822dbc7, 0xbfb2b0cfe1266bd4, 0xbfb01b466423250a, 0xbfaafd11874c009e, + 0xbfa5b505d5b6f268, 0xbfa05e4119ea5d89, 0xbf95f134923757f3, 0xbf860f9f985bc9f4, + }, + + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ + .thres = SpecialBound, }; -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t pg) +#define SpecialOffset 0x6000000000000000 /* 0x1p513. */ +/* SpecialBias1 + SpecialBias1 = asuint(1.0). */ +#define SpecialBias1 0x7000000000000000 /* 0x1p769. */ +#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ + +static NOINLINE svfloat64_t +special_case (svbool_t pg, svfloat64_t y, svfloat64_t s, svfloat64_t p, + svfloat64_t n) { - return sv_call_f64 (expm1, x, y, pg); + /* s=2^n may overflow, break it up into s=s1*s2, + such that exp = s + s*y can be computed as s1*(s2+s2*y) + and s1*s1 overflows only if n>0. */ + + /* If n<=0 then set b to 0x6, 0 otherwise. */ + svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */ + svuint64_t b + = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */ + + /* Set s1 to generate overflow depending on sign of exponent n, + ie. s1 = 0x70...0 - b. */ + svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1)); + /* Offset s to avoid overflow in final result if n is below threshold. + ie. s2 = as_u64 (s) - 0x3010...0 + b. */ + svfloat64_t s2 = svreinterpret_f64 ( + svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); + + /* |n| > 1280 => 2^(n) overflows. */ + svbool_t p_cmp = svacgt (pg, n, 1280.0); + + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); + svfloat64_t r2 = svmla_x (pg, s2, s2, p); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); + + svbool_t is_safe = svacle (pg, n, 1023); /* Only correct special lanes. */ + return svsel (is_safe, y, svsub_x (pg, svsel (p_cmp, r1, r0), 1.0)); } -/* Double-precision vector exp(x) - 1 function. - The maximum error observed error is 2.18 ULP: - _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2 - want 0x1.a8b9ea8d66e2p-2. */ +/* FEXPA based SVE expm1 algorithm. + Maximum measured error is 2.81 + 0.5 ULP: + _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3 + want 0x1.c290e5858bb5p-3. */ svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - /* Large, Nan/Inf. */ - svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound)); - - /* Reduce argument to smaller range: - Let i = round(x / ln2) - and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where 2^i is exact because i is an integer. */ - svfloat64_t shift = sv_f64 (d->shift); - svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift); - svint64_t i = svcvt_s64_x (pg, n); - svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); - svfloat64_t f = svmls_lane (x, n, ln2, 0); - f = svmls_lane (f, n, ln2, 1); - - /* Approximate expm1(f) using polynomial. - Taylor expansion for expm1(x) has the form: - x + ax^2 + bx^3 + cx^4 .... - So we calculate the polynomial P(f) = a + bf + cf^2 + ... - and assemble the approximation expm1(f) ~= f + f^2 * P(f). */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t f8 = svmul_x (pg, f4, f4); - svfloat64_t p - = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly)); - - /* Assemble the result. - expm1(x) ~= 2^i * (p + 1) - 1 - Let t = 2^i. */ - svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias); - svfloat64_t t = svreinterpret_f64 (u); - - /* expm1(x) ~= p * t + (t - 1). */ - svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t); + svbool_t special = svacgt (pg, x, d->thres); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, y, special); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + /* r = x - n * ln2, r is in [-ln2/128, ln2/128]. */ + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + svfloat64_t scalem1 = svsub_x (pg, scale, sv_f64 (1.0)); + + /* We want to construct expm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. + + This bound is based upon the table size: + Bound = (TableSize-1/64) * ln2. + The current bound is based upon a table size of 16. */ + svbool_t is_small = svaclt (pg, x, FexpaBound); + + if (svptest_any (pg, is_small)) + { + /* Index via the input of FEXPA, but we only care about the lower 4 bits. + */ + svuint64_t base_idx = svand_x (pg, u, 0xf); + + /* We can use the sign of x as a fifth bit to account for the asymmetry + of e^x around 0. */ + svuint64_t signBit + = svlsl_x (pg, svlsr_x (pg, svreinterpret_u64 (x), 63), 4); + svuint64_t idx = svorr_x (pg, base_idx, signBit); + + /* Lookup values for scale - 1 for small x. */ + svfloat64_t lookup = svreinterpret_f64 ( + svld1_gather_index (is_small, d->expm1_data, idx)); + + /* Select the appropriate scale - 1 value based on x. */ + scalem1 = svsel (is_small, lookup, scalem1); + } + + svfloat64_t y = svmla_x (pg, scalem1, scale, p); + + /* FEXPA returns nan for large inputs so we special case those. */ + if (__glibc_unlikely (svptest_any (pg, special))) + { + /* FEXPA zeroes the sign bit, however the sign is meaningful to the + special case function so needs to be copied. + e = sign bit of u << 46. */ + svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000); + /* Copy sign to s. */ + scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale))); + return special_case (pg, y, scale, p, n); + } + + /* return expm1 = (scale - 1) + (scale * poly). */ return y; } diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c index 862c13f..821c078 100644 --- a/sysdeps/aarch64/fpu/log1p_sve.c +++ b/sysdeps/aarch64/fpu/log1p_sve.c @@ -22,19 +22,33 @@ static const struct data { - double poly[19]; + float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16; + float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; double ln2_hi, ln2_lo; uint64_t hfrt2_top, onemhfrt2_top, inf, mone; } data = { /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20 - polynomial, however first 2 coefficients are 0 and 1 so are not stored. */ - .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, - 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, - -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, - 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, - -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, - 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, - -0x1.cfa7385bdb37ep-6, }, + polynomial, however first 2 coefficients are 0 and 1 so are not + stored. */ + .c0 = -0x1.ffffffffffffbp-2, + .c1 = 0x1.55555555551a9p-2, + .c2 = -0x1.00000000008e3p-2, + .c3 = 0x1.9999999a32797p-3, + .c4 = -0x1.555555552fecfp-3, + .c5 = 0x1.249248e071e5ap-3, + .c6 = -0x1.ffffff8bf8482p-4, + .c7 = 0x1.c71c8f07da57ap-4, + .c8 = -0x1.9999ca4ccb617p-4, + .c9 = 0x1.7459ad2e1dfa3p-4, + .c10 = -0x1.554d2680a3ff2p-4, + .c11 = 0x1.3b4c54d487455p-4, + .c12 = -0x1.2548a9ffe80e6p-4, + .c13 = 0x1.0f389a24b2e07p-4, + .c14 = -0x1.eee4db15db335p-5, + .c15 = 0x1.e95b494d4a5ddp-5, + .c16 = -0x1.15fdf07cb7c73p-4, + .c17 = 0x1.0310b70800fcfp-4, + .c18 = -0x1.cfa7385bdb37ep-6, .ln2_hi = 0x1.62e42fefa3800p-1, .ln2_lo = 0x1.ef35793c76730p-45, /* top32(asuint64(sqrt(2)/2)) << 32. */ @@ -49,7 +63,7 @@ static const struct data #define BottomMask 0xffffffff static svfloat64_t NOINLINE -special_case (svbool_t special, svfloat64_t x, svfloat64_t y) +special_case (svfloat64_t x, svfloat64_t y, svbool_t special) { return sv_call_f64 (log1p, x, y, special); } @@ -91,8 +105,9 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ svuint64_t utop = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top); - svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask)); - svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1); + svuint64_t u_red + = svorr_x (pg, utop, svand_x (svptrue_b64 (), mi, BottomMask)); + svfloat64_t f = svsub_x (svptrue_b64 (), svreinterpret_f64 (u_red), 1); /* Correction term c/m. */ svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m); @@ -103,18 +118,49 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) Hence approximation has the form f + f^2 * P(f) where P(x) = C0 + C1*x + C2x^2 + ... Assembling this all correctly is dealt with at the final step. */ - svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2), - f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8); - svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly); + svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f), + f4 = svmul_x (svptrue_b64 (), f2, f2), + f8 = svmul_x (svptrue_b64 (), f4, f4), + f16 = svmul_x (svptrue_b64 (), f8, f8); + + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17); + + /* Order-18 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, f2, p23); + svfloat64_t p47 = svmla_x (pg, p45, f2, p67); + svfloat64_t p07 = svmla_x (pg, p03, f4, p47); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1); + + svfloat64_t p811 = svmla_x (pg, p89, f2, p1011); + svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415); + svfloat64_t p815 = svmla_x (pg, p811, f4, p1215); + + svfloat64_t p015 = svmla_x (pg, p07, f8, p815); + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0); + svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1); + svfloat64_t p = svmla_x (pg, p015, f16, p1618); svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo); svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi); - svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p); if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (special, x, y); - - return y; + return special_case ( + x, svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p), + special); + return svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p); } strong_alias (SV_NAME_D1 (log1p), SV_NAME_D1 (logp1)) diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c index 963453f..072ba8f 100644 --- a/sysdeps/aarch64/fpu/sinh_sve.c +++ b/sysdeps/aarch64/fpu/sinh_sve.c @@ -18,90 +18,153 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[11]; - float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift; uint64_t halff; - int64_t onef; - uint64_t large_bound; + double c2, c4; + double inv_ln2; + double ln2_hi, ln2_lo; + double c0, c1, c3; + double shift, special_bound, bound; + uint64_t expm1_data[20]; } data = { - /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, - 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16, - 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .inv_ln2 = 0x1.71547652b82fep0, - .m_ln2_hi = -0x1.62e42fefa39efp-1, - .m_ln2_lo = -0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, - + /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */ + .expm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7, + }, + + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .inv_ln2 = 0x1.71547652b82fep+0, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ .halff = 0x3fe0000000000000, - .onef = 0x3ff0000000000000, - /* 2^9. expm1 helper overflows for large input. */ - .large_bound = 0x4080000000000000, + .special_bound = 0x1.62e37e7d8ba72p+9, /* ln(2^(1024 - 1/128)). */ + .bound = 0x1.a56ef8ec924ccp-3 /* 19*ln2/64. */ }; +/* A specialised FEXPA expm1 that is only valid for positive inputs and + has no special cases. Based off the full FEXPA expm1 implementated for + _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP. */ static inline svfloat64_t -expm1_inline (svfloat64_t x, svbool_t pg) +expm1_inline (svbool_t pg, svfloat64_t x) { const struct data *d = ptr_barrier (&data); - /* Reduce argument: - exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 - where i = round(x / ln2) - and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */ - svfloat64_t j - = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift); - svint64_t i = svcvt_s64_x (pg, j); - svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi); - f = svmla_x (pg, f, j, d->m_ln2_lo); - /* Approximate expm1(f) using polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t f8 = svmul_x (pg, f4, f4); - svfloat64_t p - = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly)); - /* t = 2^i. */ - svfloat64_t t = svscale_x (pg, sv_f64 (1), i); - /* expm1(x) ~= p * t + (t - 1). */ - return svmla_x (pg, svsub_x (pg, t, 1.0), p, t); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t r = x; + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + + /* We want to construct expm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. */ + svbool_t is_small = svaclt (pg, x, d->bound); + + /* Index via the input of FEXPA, but we only care about the lower 5 bits. */ + svuint64_t base_idx = svand_x (pg, u, 0x1f); + + /* Compute scale - 1 from FEXPA, and lookup values where this fails. */ + svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0)); + svuint64_t scalem1_lookup + = svld1_gather_index (is_small, d->expm1_data, base_idx); + + /* Select the appropriate scale - 1 value based on x. */ + svfloat64_t scalem1 + = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate); + + /* return expm1 = scale - 1 + (scale * poly). */ + return svmla_x (pg, scalem1, scale, p); } +/* Vectorised special case to handle values past where exp_inline overflows. + Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double + the valid range of inputs, and returns inf for anything past that. */ static svfloat64_t NOINLINE -special_case (svfloat64_t x, svbool_t pg) +special_case (svbool_t pg, svbool_t special, svfloat64_t ax, + svfloat64_t halfsign, const struct data *d) { - return sv_call_f64 (sinh, x, x, pg); + /* Halves input value, and then check if any cases + are still going to overflow. */ + ax = svmul_x (special, ax, 0.5); + svbool_t is_safe = svaclt (special, ax, d->special_bound); + + svfloat64_t t = expm1_inline (pg, ax); + + /* Finish fastpass to compute values for non-special cases. */ + svfloat64_t y = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0))); + y = svmul_x (pg, y, halfsign); + + /* Computes special lane, and set remaining overflow lanes to inf. */ + svfloat64_t half_special_y = svmul_x (svptrue_b64 (), t, halfsign); + svfloat64_t special_y = svmul_x (svptrue_b64 (), half_special_y, t); + + svuint64_t signed_inf + = svorr_x (svptrue_b64 (), svreinterpret_u64 (halfsign), + sv_u64 (0x7ff0000000000000)); + special_y = svsel (is_safe, special_y, svreinterpret_f64 (signed_inf)); + + /* Join resulting vectors together and return. */ + return svsel (special, special_y, y); } -/* Approximation for SVE double-precision sinh(x) using expm1. - sinh(x) = (exp(x) - exp(-x)) / 2. - The greatest observed error is 2.57 ULP: - _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2 - want 0x1.ab929fc64bd63p-2. */ +/* Approximation for SVE double-precision sinh(x) using FEXPA expm1. + Uses sinh(x) = e^2x - 1 / 2e^x, rewritten for accuracy. + The greatest observed error in the non-special region is 2.63 + 0.5 ULP: + _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2 + want 0x1.c3587faf97b09p-2 + + The greatest observed error in the special region is 2.65 + 0.5 ULP: + _ZGVsMxv_sinh (0x1.633ce847dab1ap+9) got 0x1.fffd30eea0066p+1023 + want 0x1.fffd30eea0063p+1023. */ svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); + svbool_t special = svacge (pg, x, d->special_bound); svfloat64_t ax = svabs_x (pg, x); svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax)); svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff)); - svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound); - /* Fall back to scalar variant for all lanes if any are special. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, pg); + return special_case (pg, special, ax, halfsign, d); /* Up to the point that expm1 overflows, we can use it to calculate sinh using a slight rearrangement of the definition of sinh. This allows us to retain acceptable accuracy for very small inputs. */ - svfloat64_t t = expm1_inline (ax, pg); + svfloat64_t t = expm1_inline (pg, ax); t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0))); return svmul_x (pg, t, halfsign); } diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h index 71f88e0..c2b196f 100644 --- a/sysdeps/aarch64/fpu/sv_log1p_inline.h +++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h @@ -21,11 +21,12 @@ #define AARCH64_FPU_SV_LOG1P_INLINE_H #include "sv_math.h" -#include "poly_sve_f64.h" static const struct sv_log1p_data { - double poly[19], ln2[2]; + double c0, c2, c4, c6, c8, c10, c12, c14, c16; + double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; + double ln2_lo, ln2_hi; uint64_t hf_rt2_top; uint64_t one_m_hf_rt2_top; uint32_t bottom_mask; @@ -33,15 +34,30 @@ static const struct sv_log1p_data } sv_log1p_data = { /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */ - .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, - 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, - -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, - 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, - -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, - 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, - -0x1.cfa7385bdb37ep-6 }, - .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 }, + .c0 = -0x1.ffffffffffffbp-2, + .c1 = 0x1.55555555551a9p-2, + .c2 = -0x1.00000000008e3p-2, + .c3 = 0x1.9999999a32797p-3, + .c4 = -0x1.555555552fecfp-3, + .c5 = 0x1.249248e071e5ap-3, + .c6 = -0x1.ffffff8bf8482p-4, + .c7 = 0x1.c71c8f07da57ap-4, + .c8 = -0x1.9999ca4ccb617p-4, + .c9 = 0x1.7459ad2e1dfa3p-4, + .c10 = -0x1.554d2680a3ff2p-4, + .c11 = 0x1.3b4c54d487455p-4, + .c12 = -0x1.2548a9ffe80e6p-4, + .c13 = 0x1.0f389a24b2e07p-4, + .c14 = -0x1.eee4db15db335p-5, + .c15 = 0x1.e95b494d4a5ddp-5, + .c16 = -0x1.15fdf07cb7c73p-4, + .c17 = 0x1.0310b70800fcfp-4, + .c18 = -0x1.cfa7385bdb37ep-6, + .ln2_lo = 0x1.62e42fefa3800p-1, + .ln2_hi = 0x1.ef35793c76730p-45, + /* top32(asuint64(sqrt(2)/2)) << 32. */ .hf_rt2_top = 0x3fe6a09e00000000, + /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */ .one_m_hf_rt2_top = 0x00095f6200000000, .bottom_mask = 0xffffffff, .one_top = 0x3ff @@ -51,14 +67,14 @@ static inline svfloat64_t sv_log1p_inline (svfloat64_t x, const svbool_t pg) { /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which - differs from v_log1p_2u5.c by: + differs from advsimd/log1p.c by: - No special-case handling - this should be dealt with by the caller. - Pairwise Horner polynomial evaluation for improved accuracy. - Optionally simulate the shortcut for k=0, used in the scalar routine, using svsel, for improved accuracy when the argument to log1p is close to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1 in the source of the caller before including this file. - See sv_log1p_2u1.c for details of the algorithm. */ + See sve/log1p.c for details of the algorithm. */ const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data); svfloat64_t m = svadd_x (pg, x, 1); svuint64_t mi = svreinterpret_u64 (m); @@ -79,7 +95,7 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg) svfloat64_t cm; #ifndef WANT_SV_LOG1P_K0_SHORTCUT -#error \ +#error \ "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0" #elif WANT_SV_LOG1P_K0_SHORTCUT /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is @@ -96,14 +112,46 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg) #endif /* Approximate log1p(f) on the reduced input using a polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly); + svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f), + f4 = svmul_x (svptrue_b64 (), f2, f2), + f8 = svmul_x (svptrue_b64 (), f4, f4), + f16 = svmul_x (svptrue_b64 (), f8, f8); + + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17); + + /* Order-18 Estrin scheme. */ + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1); + + svfloat64_t p03 = svmla_x (pg, p01, f2, p23); + svfloat64_t p47 = svmla_x (pg, p45, f2, p67); + svfloat64_t p07 = svmla_x (pg, p03, f4, p47); + + svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1); + svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0); + svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1); + + svfloat64_t p811 = svmla_x (pg, p89, f2, p1011); + svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415); + svfloat64_t p815 = svmla_x (pg, p811, f4, p1215); + + svfloat64_t p015 = svmla_x (pg, p07, f8, p815); + svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0); + svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1); + svfloat64_t p = svmla_x (pg, p015, f16, p1618); /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */ - svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]); - svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]); + svfloat64_t ln2_lo_hi = svld1rq (svptrue_b64 (), &d->ln2_lo); + svfloat64_t ylo = svmla_lane (cm, k, ln2_lo_hi, 0); + svfloat64_t yhi = svmla_lane (f, k, ln2_lo_hi, 1); - return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p); + return svmad_x (pg, p, f2, svadd_x (pg, ylo, yhi)); } - #endif diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c index 789cc68..5869419 100644 --- a/sysdeps/aarch64/fpu/tanh_sve.c +++ b/sysdeps/aarch64/fpu/tanh_sve.c @@ -18,83 +18,117 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" static const struct data { - float64_t poly[11]; - float64_t inv_ln2, ln2_hi, ln2_lo, shift; - uint64_t thresh, tiny_bound; + double ln2_hi, ln2_lo; + double c2, c4; + double c0, c1, c3; + double two_over_ln2, shift; + uint64_t tiny_bound; + double large_bound, fexpa_bound; + uint64_t e2xm1_data[20]; } data = { - /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */ - .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5, - 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, - 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16, - 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22, - 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, }, - - .inv_ln2 = 0x1.71547652b82fep0, - .ln2_hi = -0x1.62e42fefa39efp-1, - .ln2_lo = -0x1.abc9e3b39803fp-56, - .shift = 0x1.8p52, - + /* Generated using Remez, in [-log(2)/128, log(2)/128]. */ + .c0 = 0x1p-1, + .c1 = 0x1.55555555548f9p-3, + .c2 = 0x1.5555555554c22p-5, + .c3 = 0x1.111123aaa2fb2p-7, + .c4 = 0x1.6c16d77d98e5bp-10, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .two_over_ln2 = 0x1.71547652b82fep+1, + .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */ .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27). */ - /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */ - .thresh = 0x01f241bf835f9d5f, + .large_bound = 0x1.30fc1931f09cap+4, /* arctanh(1 - 2^-54). */ + .fexpa_bound = 0x1.a56ef8ec924ccp-4, /* 19/64 * ln2/2. */ + /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */ + .e2xm1_data = { + 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901, + 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb, + 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2, + 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a, + 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7, + }, }; +/* An expm1 inspired, FEXPA based helper function that returns an + accurate estimate for e^2x - 1. With no special case or support for + negative inputs of x. */ static inline svfloat64_t -expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d) -{ - /* Helper routine for calculating exp(x) - 1. Vector port of the helper from - the scalar variant of tanh. */ - - /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - svfloat64_t j - = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift); - svint64_t i = svcvt_s64_x (pg, j); - svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi); - f = svmla_x (pg, f, j, d->ln2_lo); - - /* Approximate expm1(f) using polynomial. */ - svfloat64_t f2 = svmul_x (pg, f, f); - svfloat64_t f4 = svmul_x (pg, f2, f2); - svfloat64_t p = svmla_x ( - pg, f, f2, - sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly)); - - /* t = 2 ^ i. */ - svfloat64_t t = svscale_x (pg, sv_f64 (1), i); - /* expm1(x) = p * t + (t - 1). */ - return svmla_x (pg, svsub_x (pg, t, 1), p, t); -} - -static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d) { - return sv_call_f64 (tanh, x, y, special); + svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2); + svuint64_t u = svreinterpret_u64 (z); + svfloat64_t n = svsub_x (pg, z, d->shift); + + /* r = x - n * ln2/2, r is in [-ln2/(2N), ln2/(2N)]. */ + svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); + svfloat64_t r = svadd_x (pg, x, x); + r = svmls_lane (r, n, ln2, 0); + r = svmls_lane (r, n, ln2, 1); + + /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); + + svfloat64_t p; + svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + p = svmad_x (pg, c34, r2, c12); + p = svmad_x (pg, p, r, sv_f64 (d->c0)); + p = svmad_x (pg, p, r2, r); + + svfloat64_t scale = svexpa (u); + + /* We want to construct e2xm1(x) = (scale - 1) + scale * poly. + However, for values of scale close to 1, scale-1 causes large ULP errors + due to cancellation. + + This can be circumvented by using a small lookup for scale-1 + when our input is below a certain bound, otherwise we can use FEXPA. */ + svbool_t is_small = svaclt (pg, x, d->fexpa_bound); + + /* Index via the input of FEXPA, but we only care about the lower 5 bits. */ + svuint64_t base_idx = svand_x (pg, u, 0x1f); + + /* Compute scale - 1 from FEXPA, and lookup values where this fails. */ + svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0)); + svuint64_t scalem1_lookup + = svld1_gather_index (is_small, d->e2xm1_data, base_idx); + + /* Select the appropriate scale - 1 value based on x. */ + svfloat64_t scalem1 + = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate); + return svmla_x (pg, scalem1, scale, p); } -/* SVE approximation for double-precision tanh(x), using a simplified - version of expm1. The greatest observed error is 2.77 ULP: - _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3 - want -0x1.bd6a21a163624p-3. */ +/* SVE approximation for double-precision tanh(x), using a modified version of + FEXPA expm1 to calculate e^2x - 1. + The greatest observed error is 2.79 + 0.5 ULP: + _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9 + want 0x1.fff7be486cae9p-9. */ svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg) { const struct data *d = ptr_barrier (&data); - svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x)); + svbool_t large = svacge (pg, x, d->large_bound); - /* Trigger special-cases for tiny, boring and infinity/NaN. */ - svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh); + /* We can use tanh(x) = (e^2x - 1) / (e^2x + 1) to approximate tanh. + As an additional optimisation, we can ensure more accurate values of e^x + by only using positive inputs. So we calculate tanh(|x|), and restore the + sign of the input before returning. */ + svfloat64_t ax = svabs_x (pg, x); + svuint64_t sign_bit + = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax)); - svfloat64_t u = svadd_x (pg, x, x); + svfloat64_t p = e2xm1_inline (pg, ax, d); + svfloat64_t q = svadd_x (pg, p, 2); - /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ - svfloat64_t q = expm1_inline (u, pg, d); - svfloat64_t qp2 = svadd_x (pg, q, 2); + /* For sufficiently high inputs, the result of tanh(|x|) is 1 when correctly + rounded, at this point we can return 1 directly, with sign correction. + This will also act as a guard against our approximation overflowing. */ + svfloat64_t y = svsel (large, sv_f64 (1.0), svdiv_x (pg, p, q)); - if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svdiv_x (pg, q, qp2), special); - return svdiv_x (pg, q, qp2); + return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y))); } diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c index 07133eb..a3fef22 100644 --- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c @@ -25,11 +25,15 @@ VPCS_VECTOR_WRAPPER (acos_advsimd, _ZGVnN2v_acos) VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh) +VPCS_VECTOR_WRAPPER (acospi_advsimd, _ZGVnN2v_acospi) VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin) VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh) +VPCS_VECTOR_WRAPPER (asinpi_advsimd, _ZGVnN2v_asinpi) VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan) VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh) +VPCS_VECTOR_WRAPPER (atanpi_advsimd, _ZGVnN2v_atanpi) VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2) +VPCS_VECTOR_WRAPPER_ff (atan2pi_advsimd, _ZGVnN2vv_atan2pi) VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt) VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos) VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh) diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c index 02953cb..f4a5ae8 100644 --- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c @@ -44,11 +44,15 @@ SVE_VECTOR_WRAPPER (acos_sve, _ZGVsMxv_acos) SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh) +SVE_VECTOR_WRAPPER (acospi_sve, _ZGVsMxv_acospi) SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin) SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh) +SVE_VECTOR_WRAPPER (asinpi_sve, _ZGVsMxv_asinpi) SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan) SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh) +SVE_VECTOR_WRAPPER (atanpi_sve, _ZGVsMxv_atanpi) SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2) +SVE_VECTOR_WRAPPER_ff (atan2pi_sve, _ZGVsMxvv_atan2pi) SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt) SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos) SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh) diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c index 118bbb0..bc22956 100644 --- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c @@ -25,11 +25,15 @@ VPCS_VECTOR_WRAPPER (acosf_advsimd, _ZGVnN4v_acosf) VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf) +VPCS_VECTOR_WRAPPER (acospif_advsimd, _ZGVnN4v_acospif) VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf) VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf) +VPCS_VECTOR_WRAPPER (asinpif_advsimd, _ZGVnN4v_asinpif) VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf) VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf) +VPCS_VECTOR_WRAPPER (atanpif_advsimd, _ZGVnN4v_atanpif) VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f) +VPCS_VECTOR_WRAPPER_ff (atan2pif_advsimd, _ZGVnN4vv_atan2pif) VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf) VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf) VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf) diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c index f5e7c8c..ad0d6ad 100644 --- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c @@ -44,11 +44,15 @@ SVE_VECTOR_WRAPPER (acosf_sve, _ZGVsMxv_acosf) SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf) +SVE_VECTOR_WRAPPER (acospif_sve, _ZGVsMxv_acospif) SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf) SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf) +SVE_VECTOR_WRAPPER (asinpif_sve, _ZGVsMxv_asinpif) SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf) SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf) +SVE_VECTOR_WRAPPER (atanpif_sve, _ZGVsMxv_atanpif) SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f) +SVE_VECTOR_WRAPPER_ff (atan2pif_sve, _ZGVsMxvv_atan2pif) SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf) SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf) SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf) diff --git a/sysdeps/aarch64/machine-gmon.h b/sysdeps/aarch64/machine-gmon.h index eba7c24..05323c9 100644 --- a/sysdeps/aarch64/machine-gmon.h +++ b/sysdeps/aarch64/machine-gmon.h @@ -27,9 +27,8 @@ static void mcount_internal (u_long frompc, u_long selfpc); #define _MCOUNT_DECL(frompc, selfpc) \ static inline void mcount_internal (u_long frompc, u_long selfpc) -/* Note: strip_pac is needed for frompc because of gcc PR target/94791. */ #define MCOUNT \ void __mcount (void *frompc) \ { \ - mcount_internal ((u_long) strip_pac (frompc), (u_long) RETURN_ADDRESS (0)); \ + mcount_internal ((u_long) frompc, (u_long) RETURN_ADDRESS (0)); \ } diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 8dc314b..0e26171 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -36,18 +36,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/aarch64/multiarch/memcpy.c, memmove.c and memset.c. */ IFUNC_IMPL (i, name, memcpy, IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_oryon1) -#if HAVE_AARCH64_SVE_ASM - IFUNC_IMPL_ADD (array, i, memcpy, sve && !bti, __memcpy_a64fx) + IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx) IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve) -#endif IFUNC_IMPL_ADD (array, i, memcpy, mops, __memcpy_mops) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) IFUNC_IMPL (i, name, memmove, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_oryon1) -#if HAVE_AARCH64_SVE_ASM - IFUNC_IMPL_ADD (array, i, memmove, sve && !bti, __memmove_a64fx) + IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx) IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve) -#endif IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) IFUNC_IMPL (i, name, memset, @@ -55,10 +51,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_oryon1) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) -#if HAVE_AARCH64_SVE_ASM - IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx) + IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx) IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64) -#endif IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) IFUNC_IMPL (i, name, memchr, diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h index 63c24e7..75b3e08 100644 --- a/sysdeps/aarch64/multiarch/init-arch.h +++ b/sysdeps/aarch64/multiarch/init-arch.h @@ -31,7 +31,7 @@ unsigned __attribute__((unused)) zva_size = \ GLRO(dl_aarch64_cpu_features).zva_size; \ bool __attribute__((unused)) bti = \ - HAVE_AARCH64_BTI && GLRO(dl_aarch64_cpu_features).bti; \ + GLRO(dl_aarch64_cpu_features).bti; \ bool __attribute__((unused)) mte = \ MTE_ENABLED (); \ bool __attribute__((unused)) sve = \ diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index 0e33d19..894dabe 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -43,7 +43,7 @@ select_memcpy_ifunc (void) if (mops) return __memcpy_mops; - if (sve && HAVE_AARCH64_SVE_ASM) + if (sve) { if (IS_A64FX (midr)) return __memcpy_a64fx; diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S index ed18682..acad6e8 100644 --- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S +++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S @@ -19,9 +19,6 @@ #include <sysdep.h> -#undef BTI_C -#define BTI_C - /* Assumptions: * * ARMv8.2-a, AArch64, unaligned accesses, sve @@ -38,8 +35,6 @@ #define vlen x7 #define vlen8 x8 -#if HAVE_AARCH64_SVE_ASM - .arch armv8.2-a+sve .macro ld1b_unroll8 @@ -91,9 +86,6 @@ st1b z7.b, p0, [dst, 7, mul vl] .endm -#undef BTI_C -#define BTI_C - ENTRY (__memcpy_a64fx) cntb vlen @@ -296,4 +288,3 @@ L(full_overlap): b L(last_bytes) END (__memmove_a64fx) -#endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S index 26d4890..0ba6358 100644 --- a/sysdeps/aarch64/multiarch/memcpy_sve.S +++ b/sysdeps/aarch64/multiarch/memcpy_sve.S @@ -56,8 +56,6 @@ The loop tail is handled by always copying 64 bytes from the end. */ -#if HAVE_AARCH64_SVE_ASM - .arch armv8.2-a+sve ENTRY (__memcpy_sve) @@ -199,4 +197,3 @@ L(return): ret END (__memmove_sve) -#endif diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c index 47b7268..6b0d0ce 100644 --- a/sysdeps/aarch64/multiarch/memmove.c +++ b/sysdeps/aarch64/multiarch/memmove.c @@ -41,7 +41,7 @@ select_memmove_ifunc (void) if (mops) return __memmove_mops; - if (sve && HAVE_AARCH64_SVE_ASM) + if (sve) { if (IS_A64FX (midr)) return __memmove_a64fx; diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index 872f39f..2b0a58b 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -46,7 +46,7 @@ select_memset_ifunc (void) if (mops) return __memset_mops; - if (sve && HAVE_AARCH64_SVE_ASM) + if (sve) { if (IS_A64FX (midr) && zva_size == 256) return __memset_a64fx; diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index ea60b78..e921240 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -31,8 +31,6 @@ #define PF_DIST_L1 (CACHE_LINE_SIZE * 16) // Prefetch distance L1 #define vector_length x9 -#if HAVE_AARCH64_SVE_ASM - .arch armv8.2-a+sve #define dstin x0 @@ -50,10 +48,6 @@ .endif .endm - -#undef BTI_C -#define BTI_C - ENTRY (__memset_a64fx) cntb vector_length @@ -170,5 +164,3 @@ L(L2): b L(last) END (__memset_a64fx) - -#endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S index 7fb40fd..c385e1a 100644 --- a/sysdeps/aarch64/multiarch/memset_sve_zva64.S +++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S @@ -25,8 +25,6 @@ * ZVA size is 64. */ -#if HAVE_AARCH64_SVE_ASM - .arch armv8.2-a+sve #define dstin x0 @@ -120,4 +118,3 @@ L(no_zva_loop): ret END (__memset_sve_zva64) -#endif diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure index 19657b6..e1b772c 100644 --- a/sysdeps/aarch64/preconfigure +++ b/sysdeps/aarch64/preconfigure @@ -3,5 +3,6 @@ aarch64*) base_machine=aarch64 machine=aarch64 mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S index d82d62c..53c5e7d 100644 --- a/sysdeps/aarch64/setjmp.S +++ b/sysdeps/aarch64/setjmp.S @@ -35,6 +35,20 @@ libc_hidden_def (_setjmp) ENTRY_ALIGN (__sigsetjmp, 2) 1: + +#if IS_IN(libc) + /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. + The calling convention of __libc_arm_za_disable allows to do + this thus allowing to avoid saving to and reading from stack. + As a result we also don't need to sign the return address and + check it after returning because it is not stored to stack. */ + mov x13, x30 + cfi_register (x30, x13) + bl __libc_arm_za_disable + mov x30, x13 + cfi_register (x13, x30) +#endif + stp x19, x20, [x0, #JB_X19<<3] stp x21, x22, [x0, #JB_X21<<3] stp x23, x24, [x0, #JB_X23<<3] @@ -73,7 +87,7 @@ L(gcs_done): #if IS_IN (rtld) /* In ld.so we never save the signal mask */ mov w0, #0 - RET + ret #else b C_SYMBOL_NAME(__sigjmp_save) #endif diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S index 544e397..694c338 100644 --- a/sysdeps/aarch64/start.S +++ b/sysdeps/aarch64/start.S @@ -108,7 +108,7 @@ ENTRY(_start) because crt1.o and rcrt1.o share code and the later must avoid the use of GOT relocations before __libc_start_main is called. */ __wrap_main: - BTI_C + bti c b main #endif END(_start) diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h index 7781b37..a3322a9 100644 --- a/sysdeps/aarch64/sys/ifunc.h +++ b/sysdeps/aarch64/sys/ifunc.h @@ -19,24 +19,77 @@ #ifndef _SYS_IFUNC_H #define _SYS_IFUNC_H +#include <sys/cdefs.h> + /* A second argument is passed to the ifunc resolver. */ #define _IFUNC_ARG_HWCAP (1ULL << 62) -/* The prototype of a gnu indirect function resolver on AArch64 is +/* Maximum number of HWCAP elements that are currently supported. */ +#define _IFUNC_HWCAP_MAX 4 + +/* The prototype of a GNU indirect function resolver on AArch64 is + + ElfW(Addr) ifunc_resolver (uint64_t, const uint64_t *); + + The following prototype is also compatible: ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *); - the first argument should have the _IFUNC_ARG_HWCAP bit set and - the remaining bits should match the AT_HWCAP settings. */ + The first argument might have the _IFUNC_ARG_HWCAP bit set and + the remaining bits should match the AT_HWCAP settings. + + If the _IFUNC_ARG_HWCAP bit is set in the first argument, then + the second argument is passed to the resolver function. In + this case, the second argument is a const pointer to a buffer + that allows to access all available HWCAP elements. + + This buffer has its size in bytes at offset 0. The HWCAP elements + are available at offsets 8, 16, 24, 32... respectively for AT_HWCAP, + AT_HWCAP2, AT_HWCAP3, AT_HWCAP4... (these offsets are multiples of + sizeof (unsigned long)). + + Indirect function resolvers must check availability of HWCAP + elements at runtime before accessing them using the size of the + buffer. */ -/* Second argument to an ifunc resolver. */ struct __ifunc_arg_t { - unsigned long _size; /* Size of the struct, so it can grow. */ + unsigned long _size; /* Size of the struct, so it can grow. */ unsigned long _hwcap; - unsigned long _hwcap2; + unsigned long _hwcap2; /* End of 1st published struct. */ + unsigned long _hwcap3; + unsigned long _hwcap4; /* End of 2nd published struct. */ }; typedef struct __ifunc_arg_t __ifunc_arg_t; +/* Constants for IDs of HWCAP elements to be used with the + __ifunc_hwcap function below. */ +enum +{ + _IFUNC_ARG_AT_HWCAP = 1, + _IFUNC_ARG_AT_HWCAP2 = 2, + _IFUNC_ARG_AT_HWCAP3 = 3, + _IFUNC_ARG_AT_HWCAP4 = 4, +}; + +/* A helper function to obtain HWCAP element by its ID from the + parameters ARG0 and ARG1 passed to the ifunc resolver. Note that + ID 1 corresponds to AT_HWCAP, ID 2 corresponds to AT_HWCAP2, etc. + If there is no element available for the requested ID then 0 is + returned. If ID doesn't much any supported AT_HWCAP{,2,...} value, + then 0 is also returned. */ +static __inline unsigned long __attribute__ ((unused, always_inline)) +__ifunc_hwcap (unsigned long __id, + unsigned long __arg0, const unsigned long *__arg1) +{ + if (__glibc_likely (__arg0 & _IFUNC_ARG_HWCAP)) + { + const unsigned long size = __arg1[0]; + const unsigned long offset = __id * sizeof (unsigned long); + return offset < size && __id > 0 ? __arg1[__id] : 0; + } + return __id == 1 ? __arg0 : 0; +} + #endif diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h index 9424115..f5e28cb 100644 --- a/sysdeps/aarch64/sysdep.h +++ b/sysdeps/aarch64/sysdep.h @@ -21,43 +21,15 @@ #include <sysdeps/generic/sysdep.h> -#ifndef __ASSEMBLER__ -/* Strip pointer authentication code from pointer p. */ -static inline void * -strip_pac (void *p) -{ - register void *ra asm ("x30") = (p); - asm ("hint 7 // xpaclri" : "+r"(ra)); - return ra; -} - -/* This is needed when glibc is built with -mbranch-protection=pac-ret - with a gcc that is affected by PR target/94891. */ -# if HAVE_AARCH64_PAC_RET -# undef RETURN_ADDRESS -# define RETURN_ADDRESS(n) strip_pac (__builtin_return_address (n)) -# endif -#endif - #ifdef __ASSEMBLER__ +/* CFI directive for return address. */ +#define cfi_negate_ra_state .cfi_negate_ra_state + /* Syntactic details of assembler. */ #define ASM_SIZE_DIRECTIVE(name) .size name,.-name -/* Branch Target Identitication support. */ -#if HAVE_AARCH64_BTI -# define BTI_C hint 34 -# define BTI_J hint 36 -#else -# define BTI_C nop -# define BTI_J nop -#endif - -/* Return address signing support (pac-ret). */ -#define PACIASP hint 25 -#define AUTIASP hint 29 - /* Guarded Control Stack support. */ #define CHKFEAT_X16 hint 40 #define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1 @@ -87,11 +59,7 @@ strip_pac (void *p) /* Add GNU property note with the supported features to all asm code where sysdep.h is included. */ -#if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS) -#elif HAVE_AARCH64_BTI -GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS) -#endif /* Define an entry point visible from C. */ #define ENTRY(name) \ @@ -100,7 +68,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS) .p2align 6; \ C_LABEL(name) \ cfi_startproc; \ - BTI_C; \ + bti c; \ CALL_MCOUNT /* Define an entry point visible from C. */ @@ -110,7 +78,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS) .p2align align; \ C_LABEL(name) \ cfi_startproc; \ - BTI_C; \ + bti c; \ CALL_MCOUNT /* Define an entry point visible from C with a specified alignment and @@ -127,7 +95,7 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS) .endr; \ C_LABEL(name) \ cfi_startproc; \ - BTI_C; \ + bti c; \ CALL_MCOUNT #undef END diff --git a/sysdeps/aarch64/tst-ifunc-arg-1.c b/sysdeps/aarch64/tst-ifunc-arg-1.c index b90c836..292c5ae 100644 --- a/sysdeps/aarch64/tst-ifunc-arg-1.c +++ b/sysdeps/aarch64/tst-ifunc-arg-1.c @@ -57,6 +57,21 @@ do_test (void) TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t)); TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP)); TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2)); + TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3)); + TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4)); + + const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2; + + TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP)); + TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP2)); + TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP3)); + TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP4)); + + return 0; } diff --git a/sysdeps/aarch64/tst-ifunc-arg-2.c b/sysdeps/aarch64/tst-ifunc-arg-2.c index dac144d..c05129a 100644 --- a/sysdeps/aarch64/tst-ifunc-arg-2.c +++ b/sysdeps/aarch64/tst-ifunc-arg-2.c @@ -60,6 +60,20 @@ do_test (void) TEST_COMPARE (saved_arg2._size, sizeof (__ifunc_arg_t)); TEST_COMPARE (saved_arg2._hwcap, getauxval (AT_HWCAP)); TEST_COMPARE (saved_arg2._hwcap2, getauxval (AT_HWCAP2)); + TEST_COMPARE (saved_arg2._hwcap3, getauxval (AT_HWCAP3)); + TEST_COMPARE (saved_arg2._hwcap4, getauxval (AT_HWCAP4)); + + const unsigned long *saved_arg2_ptr = (const unsigned long *)&saved_arg2; + + TEST_COMPARE (__ifunc_hwcap (1, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP)); + TEST_COMPARE (__ifunc_hwcap (2, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP2)); + TEST_COMPARE (__ifunc_hwcap (3, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP3)); + TEST_COMPARE (__ifunc_hwcap (4, saved_arg1, saved_arg2_ptr), + getauxval (AT_HWCAP4)); + return 0; } diff --git a/sysdeps/aarch64/tst-ifunc-arg-3.c b/sysdeps/aarch64/tst-ifunc-arg-3.c new file mode 100644 index 0000000..49d8866 --- /dev/null +++ b/sysdeps/aarch64/tst-ifunc-arg-3.c @@ -0,0 +1,97 @@ +/* Tests for __ifunc_hwcap helper function. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdint.h> +#include <sys/ifunc.h> +#include <support/check.h> + +#define CHECK_VALUES_WITH_ARG(p1, p2, p3, p4) \ + ({ \ + TEST_COMPARE (__ifunc_hwcap (0, _IFUNC_ARG_HWCAP, arg), 0); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, _IFUNC_ARG_HWCAP, arg), p1); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, _IFUNC_ARG_HWCAP, arg), p2); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, _IFUNC_ARG_HWCAP, arg), p3); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, _IFUNC_ARG_HWCAP, arg), p4); \ + TEST_COMPARE (__ifunc_hwcap (5, _IFUNC_ARG_HWCAP, arg), 0); \ + }) + +#define CHECK_VALUES_WITHOUT_ARG(p1) \ + ({ \ + TEST_COMPARE (__ifunc_hwcap (0, p1, arg), 0); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP, p1, arg), p1); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, p1, arg), 0); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP3, p1, arg), 0); \ + TEST_COMPARE (__ifunc_hwcap (_IFUNC_ARG_AT_HWCAP4, p1, arg), 0); \ + TEST_COMPARE (__ifunc_hwcap (5, p1, arg), 0); \ + }) + +static void +test_one (const unsigned long *arg) +{ + uint64_t size = arg[0] / sizeof (uint64_t); + + switch (size) + { + case 1: + CHECK_VALUES_WITH_ARG (0, 0, 0, 0); + CHECK_VALUES_WITHOUT_ARG (0); + break; + case 2: + CHECK_VALUES_WITH_ARG (1, 0, 0, 0); + CHECK_VALUES_WITHOUT_ARG (1); + break; + case 3: + CHECK_VALUES_WITH_ARG (1, 2, 0, 0); + CHECK_VALUES_WITHOUT_ARG (1); + break; + case 4: + CHECK_VALUES_WITH_ARG (1, 2, 3, 0); + CHECK_VALUES_WITHOUT_ARG (1); + break; + case 5: + CHECK_VALUES_WITH_ARG (1, 2, 3, 4); + CHECK_VALUES_WITHOUT_ARG (1); + break; + default: + TEST_VERIFY (0); // unexpected size + break; + } +} + +static int +do_test (void) +{ + uint64_t arg[_IFUNC_HWCAP_MAX + 1] = { + 0, /* Placeholder for size */ + _IFUNC_ARG_AT_HWCAP, /* AT_HWCAP */ + _IFUNC_ARG_AT_HWCAP2, /* AT_HWCAP2 */ + _IFUNC_ARG_AT_HWCAP3, /* AT_HWCAP3 */ + _IFUNC_ARG_AT_HWCAP4, /* AT_HWCAP4 */ + }; + + for (int k = 0; k <= _IFUNC_HWCAP_MAX; k++) + { + /* Update size */ + arg[0] = (k + 1) * sizeof (uint64_t); + test_one (arg); + } + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/aarch64/tst-ifunc-arg-4.c b/sysdeps/aarch64/tst-ifunc-arg-4.c new file mode 100644 index 0000000..c95ef9e --- /dev/null +++ b/sysdeps/aarch64/tst-ifunc-arg-4.c @@ -0,0 +1,67 @@ +/* Test for ifunc resolver that uses __ifunc_hwcap helper function. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdint.h> +#include <sys/auxv.h> +#include <sys/ifunc.h> +#include <support/check.h> + +static int +one (void) +{ + return 1; +} + +static int +two (void) +{ + return 2; +} + +/* Resolver function. */ +static void * +resolver (uint64_t arg0, const uint64_t arg1[]) +{ + uint64_t hwcap2 = __ifunc_hwcap (_IFUNC_ARG_AT_HWCAP2, arg0, arg1); + if (hwcap2 & HWCAP2_POE) + return (void *)one; + else + return (void *)two; +} + +/* An extern visible ifunc symbol. */ +int fun (void) __attribute__((ifunc ("resolver"))); + +static int +do_test (void) +{ + if (getauxval (AT_HWCAP2) & HWCAP2_POE) + { + printf ("using 1st implementation\n"); + TEST_VERIFY (fun () == 1); + } + else + { + printf ("using 2nd implementation\n"); + TEST_VERIFY (fun () == 2); + } + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h new file mode 100644 index 0000000..f049416 --- /dev/null +++ b/sysdeps/aarch64/tst-sme-helper.h @@ -0,0 +1,97 @@ +/* Utility functions for SME tests. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* Streaming SVE vector register size. */ +static unsigned long svl; + +struct blk { + void *za_save_buffer; + uint16_t num_za_save_slices; + char __reserved[6]; +}; + +/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ +static unsigned long +get_svcr (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0xd53b4240 /* mrs x0, svcr */\n" + : "=r" (x0)); + return x0; +} + +/* Returns tpidr2. */ +static void * +get_tpidr2 (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" + : "=r"(x0) :: "memory"); + return (void *) x0; +} + +/* Obtains current streaming SVE vector register size. */ +static unsigned long +get_svl (void) +{ + register unsigned long x0 asm ("x0"); + asm volatile ( + ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" + : "=r" (x0)); + return x0; +} + +/* PSTATE.ZA = 1, set ZA state to active. */ +static void +start_za (void) +{ + asm volatile ( + ".inst 0xd503457f /* smstart za */"); +} + +/* Load data into ZA byte by byte from p. */ +static void __attribute__ ((noinline)) +load_za (const void *p) +{ + register unsigned long x15 asm ("x15") = 0; + register unsigned long x16 asm ("x16") = (unsigned long)p; + register unsigned long x17 asm ("x17") = svl; + + asm volatile ( + ".inst 0xd503437f /* smstart sm */\n" + ".L_ldr_loop:\n" + ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" + "add w15, w15, 1\n" + ".inst 0x04305030 /* addvl x16, x16, 1 */\n" + "cmp w15, w17\n" + "bne .L_ldr_loop\n" + ".inst 0xd503427f /* smstop sm */\n" + : "+r"(x15), "+r"(x16), "+r"(x17)); +} + +/* Set tpidr2 to BLK. */ +static void +set_tpidr2 (struct blk *blk) +{ + register unsigned long x0 asm ("x0") = (unsigned long)blk; + asm volatile ( + ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" + :: "r"(x0) : "memory"); +} diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c index 62c419f..103897a 100644 --- a/sysdeps/aarch64/tst-sme-jmp.c +++ b/sysdeps/aarch64/tst-sme-jmp.c @@ -27,87 +27,12 @@ #include <support/support.h> #include <support/test-driver.h> -struct blk { - void *za_save_buffer; - uint16_t num_za_save_slices; - char __reserved[6]; -}; +#include "tst-sme-helper.h" -static unsigned long svl; static uint8_t *za_orig; static uint8_t *za_dump; static uint8_t *za_save; -static unsigned long -get_svl (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0x04bf5820 /* rdsvl x0, 1 */\n" - : "=r" (x0)); - return x0; -} - -/* PSTATE.ZA = 1, set ZA state to active. */ -static void -start_za (void) -{ - asm volatile ( - ".inst 0xd503457f /* smstart za */"); -} - -/* Read SVCR to get SM (bit0) and ZA (bit1) state. */ -static unsigned long -get_svcr (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0xd53b4240 /* mrs x0, svcr */\n" - : "=r" (x0)); - return x0; -} - -/* Load data into ZA byte by byte from p. */ -static void __attribute__ ((noinline)) -load_za (const void *p) -{ - register unsigned long x15 asm ("x15") = 0; - register unsigned long x16 asm ("x16") = (unsigned long)p; - register unsigned long x17 asm ("x17") = svl; - - asm volatile ( - ".inst 0xd503437f /* smstart sm */\n" - ".L_ldr_loop:\n" - ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n" - "add w15, w15, 1\n" - ".inst 0x04305030 /* addvl x16, x16, 1 */\n" - "cmp w15, w17\n" - "bne .L_ldr_loop\n" - ".inst 0xd503427f /* smstop sm */\n" - : "+r"(x15), "+r"(x16), "+r"(x17)); -} - -/* Set tpidr2 to BLK. */ -static void -set_tpidr2 (struct blk *blk) -{ - register unsigned long x0 asm ("x0") = (unsigned long)blk; - asm volatile ( - ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n" - :: "r"(x0) : "memory"); -} - -/* Returns tpidr2. */ -static void * -get_tpidr2 (void) -{ - register unsigned long x0 asm ("x0"); - asm volatile ( - ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n" - : "=r"(x0) :: "memory"); - return (void *) x0; -} - static void print_data(const char *msg, void *p) { @@ -168,8 +93,8 @@ longjmp_test (void) { p = get_tpidr2 (); printf ("before longjmp: tp2 = %p\n", p); - if (p != &blk) - FAIL_EXIT1 ("tpidr2 is clobbered"); + if (p != NULL) + FAIL_EXIT1 ("tpidr2 has not been reset to null"); do_longjmp (env); FAIL_EXIT1 ("longjmp returned"); } diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c new file mode 100644 index 0000000..63f6eeb --- /dev/null +++ b/sysdeps/aarch64/tst-sme-za-state.c @@ -0,0 +1,119 @@ +/* Test for SME ZA state being cleared on setjmp and longjmp. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <sys/auxv.h> + +#include <support/check.h> +#include <support/support.h> +#include <support/test-driver.h> + +#include "tst-sme-helper.h" + +static uint8_t *state; + +static void +enable_sme_za_state (struct blk *ptr) +{ + set_tpidr2 (ptr); + start_za (); + load_za (state); +} + +static void +check_sme_za_state (const char msg[], bool clear) +{ + unsigned long svcr = get_svcr (); + void *tpidr2 = get_tpidr2 (); + printf ("[%s]\n", msg); + printf ("svcr = %016lx\n", svcr); + printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2); + if (clear) + { + TEST_VERIFY (svcr == 0); + TEST_VERIFY (tpidr2 == NULL); + } + else + { + TEST_VERIFY (svcr != 0); + TEST_VERIFY (tpidr2 != NULL); + } +} + +static void +run (struct blk *ptr) +{ + jmp_buf buf; + int ret; + + check_sme_za_state ("initial state", /* Clear. */ true); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before setjmp", /* Clear. */ false); + + if ((ret = setjmp (buf)) == 0) + { + check_sme_za_state ("after setjmp", /* Clear. */ true); + + /* Enabled ZA state so that effect of disabling be observable. */ + enable_sme_za_state (ptr); + check_sme_za_state ("before longjmp", /* Clear. */ false); + + longjmp (buf, 42); + + /* Unreachable. */ + TEST_VERIFY (false); + __builtin_unreachable (); + } + + TEST_COMPARE (ret, 42); + check_sme_za_state ("after longjmp", /* Clear. */ true); +} + +static int +do_test (void) +{ + unsigned long hwcap2 = getauxval (AT_HWCAP2); + if ((hwcap2 & HWCAP2_SME) == 0) + return EXIT_UNSUPPORTED; + + /* Get current streaming SVE vector register size. */ + svl = get_svl (); + printf ("svl: %lu\n", svl); + TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16))); + + /* Initialise buffer for ZA state of SME. */ + state = xmalloc (svl * svl); + memset (state, 1, svl * svl); + struct blk blk = { + .za_save_buffer = state, + .num_za_save_slices = svl, + .__reserved = {0}, + }; + + run (&blk); + + free (state); + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile index 3ed75dd..1be63b7 100644 --- a/sysdeps/generic/Makefile +++ b/sysdeps/generic/Makefile @@ -21,6 +21,9 @@ CFLAGS-wordcopy.c += -Wno-uninitialized endif ifeq ($(subdir),elf) +ifeq ($(enable-gsframe),yes) +sysdep_routines += sframe-read sframe +endif ifeq (yes:yes,$(build-shared):$(unwind-find-fde)) # This is needed to support g++ v2 and v3. sysdep_routines += framestate unwind-pe diff --git a/sysdeps/generic/getrandom-internal.h b/sysdeps/generic/getrandom-internal.h index 7c54194..4872598 100644 --- a/sysdeps/generic/getrandom-internal.h +++ b/sysdeps/generic/getrandom-internal.h @@ -19,7 +19,7 @@ #ifndef _GETRANDOM_INTERNAL_H #define _GETRANDOM_INTERNAL_H -static inline void __getrandom_early_init (_Bool) +static inline void __getrandom_early_init (_Bool initial) { } diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index fc4a3de..74025f1 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -368,8 +368,6 @@ struct rtld_global size_t n_elements; void (*free) (void *); } _ns_unique_sym_table; - /* Keep track of changes to each namespace' list. */ - struct r_debug_extended _ns_debug; } _dl_ns[DL_NNS]; /* One higher than index of last used namespace. */ EXTERN size_t _dl_nns; @@ -1089,15 +1087,29 @@ extern void _dl_debug_state (void); rtld_hidden_proto (_dl_debug_state) /* Initialize `struct r_debug_extended' for the namespace NS. LDBASE - is the run-time load address of the dynamic linker, to be put in the - `r_ldbase' member. Return the address of the structure. */ + is the run-time load address of the dynamic linker, to be put in + the `r_ldbase' member. + + Return the address of the r_debug structure for the namespace. + This is not merely a convenience or optimization, but it is + necessary for the LIBC_PROBE Systemtap/debugger probes to work + reliably: direct variable access can create probes that tools + cannot consume. */ extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) attribute_hidden; +/* This is called after relocation processing to handle a potential + copy relocation for _r_debug. */ +void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden; + /* Update the `r_map' member and return the address of `struct r_debug' of the namespace NS. */ extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden; +/* Update R->r_state to STATE and notify the debugger by calling + _dl_debug_state. */ +void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden; + /* Initialize the basic data structure for the search paths. SOURCE is either "LD_LIBRARY_PATH" or "--library-path". GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h deleted file mode 100644 index b95e409..0000000 --- a/sysdeps/generic/libc-tsd.h +++ /dev/null @@ -1,60 +0,0 @@ -/* libc-internal interface for thread-specific data. Stub or TLS version. - Copyright (C) 1998-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _GENERIC_LIBC_TSD_H -#define _GENERIC_LIBC_TSD_H 1 - -/* This file defines the following macros for accessing a small fixed - set of thread-specific `void *' data used only internally by libc. - - __libc_tsd_define(CLASS, TYPE, KEY) -- Define or declare a datum with TYPE - for KEY. CLASS can be `static' for - keys used in only one source file, - empty for global definitions, or - `extern' for global declarations. - __libc_tsd_address(TYPE, KEY) -- Return the `TYPE *' pointing to - the current thread's datum for KEY. - __libc_tsd_get(TYPE, KEY) -- Return the `TYPE' datum for KEY. - __libc_tsd_set(TYPE, KEY, VALUE) -- Set the datum for KEY to VALUE. - - The set of available KEY's will usually be provided as an enum, - and contains (at least): - _LIBC_TSD_KEY_MALLOC - _LIBC_TSD_KEY_DL_ERROR - _LIBC_TSD_KEY_RPC_VARS - All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros. - Some implementations may not provide any enum at all and instead - using string pasting in the macros. */ - -#include <tls.h> - -/* When full support for __thread variables is available, this interface is - just a trivial wrapper for it. Without TLS, this is the generic/stub - implementation for wholly single-threaded systems. - - We don't define an enum for the possible key values, because the KEYs - translate directly into variables by macro magic. */ - -#define __libc_tsd_define(CLASS, TYPE, KEY) \ - CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie; - -#define __libc_tsd_address(TYPE, KEY) (&__libc_tsd_##KEY) -#define __libc_tsd_get(TYPE, KEY) (__libc_tsd_##KEY) -#define __libc_tsd_set(TYPE, KEY, VALUE) (__libc_tsd_##KEY = (VALUE)) - -#endif /* libc-tsd.h */ diff --git a/sysdeps/generic/sframe-read.c b/sysdeps/generic/sframe-read.c new file mode 100644 index 0000000..a6ebc42 --- /dev/null +++ b/sysdeps/generic/sframe-read.c @@ -0,0 +1,636 @@ +/* Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> +#include <sframe-read.h> + +/* Get the SFrame header size. */ + +static inline uint32_t +sframe_get_hdr_size (sframe_header *sfh) +{ + return SFRAME_V1_HDR_SIZE (*sfh); +} + +/* Access functions for frame row entry data. */ + +static inline uint8_t +sframe_fre_get_offset_count (uint8_t fre_info) +{ + return SFRAME_V1_FRE_OFFSET_COUNT (fre_info); +} + +static inline uint8_t +sframe_fre_get_offset_size (uint8_t fre_info) +{ + return SFRAME_V1_FRE_OFFSET_SIZE (fre_info); +} + +static inline bool +sframe_get_fre_ra_mangled_p (uint8_t fre_info) +{ + return SFRAME_V1_FRE_MANGLED_RA_P (fre_info); +} + +/* Access functions for info from function descriptor entry. */ + +static uint32_t +sframe_get_fre_type (sframe_func_desc_entry *fdep) +{ + uint32_t fre_type = 0; + if (fdep != NULL) + fre_type = SFRAME_V1_FUNC_FRE_TYPE (fdep->sfde_func_info); + return fre_type; +} + +static uint32_t +sframe_get_fde_type (sframe_func_desc_entry *fdep) +{ + uint32_t fde_type = 0; + if (fdep != NULL) + fde_type = SFRAME_V1_FUNC_FDE_TYPE (fdep->sfde_func_info); + return fde_type; +} + +/* Check if SFrame header has valid data. Only consider SFrame type + 2. */ + +static bool +sframe_header_sanity_check_p (sframe_header *hp) +{ + /* Check preamble is valid. */ + if ((hp->sfh_preamble.sfp_magic != SFRAME_MAGIC) + || (hp->sfh_preamble.sfp_version != SFRAME_VERSION_2) + || (hp->sfh_preamble.sfp_flags & ~SFRAME_V2_F_ALL_FLAGS)) + return false; + + /* Check offsets are valid. */ + if (hp->sfh_fdeoff > hp->sfh_freoff) + return false; + + return true; +} + +/* Get the FRE start address size. */ + +static size_t +sframe_fre_start_addr_size (uint32_t fre_type) +{ + size_t addr_size = 0; + switch (fre_type) + { + case SFRAME_FRE_TYPE_ADDR1: + addr_size = 1; + break; + case SFRAME_FRE_TYPE_ADDR2: + addr_size = 2; + break; + case SFRAME_FRE_TYPE_ADDR4: + addr_size = 4; + break; + default: + break; + } + return addr_size; +} + +/* Check if the FREP has valid data. */ + +static bool +sframe_fre_sanity_check_p (sframe_frame_row_entry *frep) +{ + uint8_t offset_size, offset_cnt; + uint8_t fre_info; + + if (frep == NULL) + return false; + + fre_info = frep->fre_info; + offset_size = sframe_fre_get_offset_size (fre_info); + + if (offset_size != SFRAME_FRE_OFFSET_1B + && offset_size != SFRAME_FRE_OFFSET_2B + && offset_size != SFRAME_FRE_OFFSET_4B) + return false; + + offset_cnt = sframe_fre_get_offset_count (fre_info); + if (offset_cnt > MAX_NUM_STACK_OFFSETS) + return false; + + return true; +} + +/* Get FRE_INFO's offset size in bytes. */ + +static size_t +sframe_fre_offset_bytes_size (uint8_t fre_info) +{ + uint8_t offset_size, offset_cnt; + + offset_size = sframe_fre_get_offset_size (fre_info); + + offset_cnt = sframe_fre_get_offset_count (fre_info); + + if (offset_size == SFRAME_FRE_OFFSET_2B + || offset_size == SFRAME_FRE_OFFSET_4B) /* 2 or 4 bytes. */ + return (offset_cnt * (offset_size * 2)); + + return (offset_cnt); +} + +/* Get total size in bytes to represent FREP in the binary format. This + includes the starting address, FRE info, and all the offsets. */ + +static size_t +sframe_fre_entry_size (sframe_frame_row_entry *frep, size_t addr_size) +{ + if (frep == NULL) + return 0; + + uint8_t fre_info = frep->fre_info; + + return (addr_size + sizeof (frep->fre_info) + + sframe_fre_offset_bytes_size (fre_info)); +} + +/* Get SFrame header from the given decoder context DCTX. */ + +static inline sframe_header * +sframe_decoder_get_header (sframe_decoder_ctx *dctx) +{ + sframe_header *hp = NULL; + if (dctx != NULL) + hp = &dctx->sfd_header; + return hp; +} + +/* Get the offset of the sfde_func_start_address field (from the start of the + on-disk layout of the SFrame section) of the FDE at FUNC_IDX in the decoder + context DCTX. */ + +static uint32_t +sframe_decoder_get_offsetof_fde_start_addr (sframe_decoder_ctx *dctx, + uint32_t func_idx, + _Unwind_Reason_Code *errp) +{ + sframe_header *dhp; + + dhp = sframe_decoder_get_header (dctx); + if (dhp == NULL) + { + if (errp != NULL) + *errp = _URC_END_OF_STACK; + return 0; + } + + if (func_idx >= dhp->sfh_num_fdes) + { + if (errp != NULL) + *errp = _URC_END_OF_STACK; + return 0; + } + else if (errp != NULL) + *errp = _URC_NO_REASON; + + return (sframe_get_hdr_size (dhp) + + func_idx * sizeof (sframe_func_desc_entry) + + offsetof (sframe_func_desc_entry, sfde_func_start_address)); +} + + +/* Get the offset of the start PC of the SFrame FDE at FUNC_IDX from + the start of the SFrame section. If the flag + SFRAME_F_FDE_FUNC_START_PCREL is set, sfde_func_start_address is + the offset of the start PC of the function from the field itself. + + If FUNC_IDX is not a valid index in the given decoder object, returns 0. */ + +static int32_t +sframe_decoder_get_secrel_func_start_addr (sframe_decoder_ctx *dctx, + uint32_t func_idx) +{ + int32_t func_start_addr; + _Unwind_Reason_Code err = 0; + int32_t offsetof_fde_in_sec = 0; + + /* Check if we have SFRAME_F_FDE_FUNC_START_PCREL. */ + sframe_header *sh = &dctx->sfd_header; + if ((sh->sfh_preamble.sfp_flags & SFRAME_F_FDE_FUNC_START_PCREL)) + { + offsetof_fde_in_sec = + sframe_decoder_get_offsetof_fde_start_addr (dctx, func_idx, &err); + /* If func_idx is not a valid index, return 0. */ + if (err == _URC_END_OF_STACK) + return 0; + } + + func_start_addr = dctx->sfd_funcdesc[func_idx].sfde_func_start_address; + + return func_start_addr + offsetof_fde_in_sec; +} + +/* Check if the SFrame Frame Row Entry identified via the + START_IP_OFFSET and the END_IP_OFFSET (for SFrame FDE at + FUNC_IDX). */ + +static bool +sframe_fre_check_range_p (sframe_decoder_ctx *dctx, uint32_t func_idx, + uint32_t start_ip_offset, uint32_t end_ip_offset, + int32_t pc) +{ + sframe_func_desc_entry *fdep; + int32_t func_start_addr; + uint8_t rep_block_size; + uint32_t fde_type; + uint32_t pc_offset; + bool mask_p; + + fdep = &dctx->sfd_funcdesc[func_idx]; + if (fdep == NULL) + return false; + + func_start_addr = sframe_decoder_get_secrel_func_start_addr (dctx, func_idx); + fde_type = sframe_get_fde_type (fdep); + mask_p = (fde_type == SFRAME_FDE_TYPE_PCMASK); + rep_block_size = fdep->sfde_func_rep_size; + + if (func_start_addr > pc) + return false; + + /* Given func_start_addr <= pc, pc - func_start_addr must be positive. */ + pc_offset = pc - func_start_addr; + /* For SFrame FDEs encoding information for repetitive pattern of insns, + masking with the rep_block_size is necessary to find the matching FRE. */ + if (mask_p) + pc_offset = pc_offset % rep_block_size; + + return (start_ip_offset <= pc_offset) && (end_ip_offset >= pc_offset); +} + +/* Get IDX'th offset from FRE. Set ERRP as applicable. */ + +static int32_t +sframe_get_fre_offset (sframe_frame_row_entry *fre, + int idx, + _Unwind_Reason_Code *errp) +{ + uint8_t offset_cnt, offset_size; + + if (!sframe_fre_sanity_check_p (fre)) + { + *errp = _URC_END_OF_STACK; + return 0; + } + + offset_cnt = sframe_fre_get_offset_count (fre->fre_info); + offset_size = sframe_fre_get_offset_size (fre->fre_info); + + if (offset_cnt < (idx + 1)) + { + *errp = _URC_END_OF_STACK; + return 0; + } + *errp = _URC_NO_REASON; + + if (offset_size == SFRAME_FRE_OFFSET_1B) + { + int8_t *sp = (int8_t *)fre->fre_offsets; + return sp[idx]; + } + else if (offset_size == SFRAME_FRE_OFFSET_2B) + { + int16_t *sp = (int16_t *)fre->fre_offsets; + return sp[idx]; + } + else + { + int32_t *ip = (int32_t *)fre->fre_offsets; + return ip[idx]; + } +} + +/* Decode the SFrame FRE start address offset value from FRE_BUF in on-disk + binary format, given the FRE_TYPE. Updates the FRE_START_ADDR. */ + +static void +sframe_decode_fre_start_address (const char *fre_buf, + uint32_t *fre_start_addr, + uint32_t fre_type) +{ + uint32_t saddr = 0; + + if (fre_type == SFRAME_FRE_TYPE_ADDR1) + { + uint8_t *uc = (uint8_t *)fre_buf; + saddr = (uint32_t)*uc; + } + else if (fre_type == SFRAME_FRE_TYPE_ADDR2) + { + uint16_t *ust = (uint16_t *)fre_buf; + saddr = (uint32_t)*ust; + } + else if (fre_type == SFRAME_FRE_TYPE_ADDR4) + { + uint32_t *uit = (uint32_t *)fre_buf; + saddr = (uint32_t)*uit; + } + else + return; + + *fre_start_addr = saddr; +} + +/* Find the function descriptor entry starting which contains the specified + address ADDR. */ + +static sframe_func_desc_entry * +sframe_get_funcdesc_with_addr_internal (sframe_decoder_ctx *ctx, int32_t addr, + int *errp, uint32_t *func_idx) +{ + sframe_header *dhp; + sframe_func_desc_entry *fdp; + int low, high; + + if (ctx == NULL) + return NULL; + + dhp = sframe_decoder_get_header (ctx); + + if (dhp == NULL || dhp->sfh_num_fdes == 0 || ctx->sfd_funcdesc == NULL) + return NULL; + /* If the FDE sub-section is not sorted on PCs, skip the lookup because + binary search cannot be used. */ + if ((dhp->sfh_preamble.sfp_flags & SFRAME_F_FDE_SORTED) == 0) + return NULL; + + /* Do the binary search. */ + fdp = (sframe_func_desc_entry *) ctx->sfd_funcdesc; + low = 0; + high = dhp->sfh_num_fdes - 1; + while (low <= high) + { + int mid = low + (high - low) / 2; + + /* Given sfde_func_start_address <= addr, + addr - sfde_func_start_address must be positive. */ + if (sframe_decoder_get_secrel_func_start_addr (ctx, mid) <= addr + && ((uint32_t)(addr - sframe_decoder_get_secrel_func_start_addr (ctx, + mid)) + < fdp[mid].sfde_func_size)) + { + *func_idx = mid; + return fdp + mid; + } + + if (sframe_decoder_get_secrel_func_start_addr (ctx, mid) < addr) + low = mid + 1; + else + high = mid - 1; + } + + return NULL; +} + +/* Get the end IP offset for the FRE at index i in the FDEP. The buffer FRES + is the starting location for the FRE. */ + +static uint32_t +sframe_fre_get_end_ip_offset (sframe_func_desc_entry *fdep, unsigned int i, + const char *fres) +{ + uint32_t end_ip_offset = 0; + uint32_t fre_type; + + fre_type = sframe_get_fre_type (fdep); + + /* Get the start address of the next FRE in sequence. */ + if (i < fdep->sfde_func_num_fres - 1) + { + sframe_decode_fre_start_address (fres, &end_ip_offset, fre_type); + end_ip_offset -= 1; + } + else + /* The end IP offset for the FRE needs to be deduced from the function + size. */ + end_ip_offset = fdep->sfde_func_size - 1; + + return end_ip_offset; +} + +/* Get the SFrame's fixed FP offset given the decoder context CTX. */ + +static int8_t +sframe_decoder_get_fixed_fp_offset (sframe_decoder_ctx *ctx) +{ + sframe_header *dhp; + dhp = sframe_decoder_get_header (ctx); + return dhp->sfh_cfa_fixed_fp_offset; +} + +/* Get the SFrame's fixed RA offset given the decoder context CTX. */ + +static int8_t +sframe_decoder_get_fixed_ra_offset (sframe_decoder_ctx *ctx) +{ + sframe_header *dhp; + dhp = sframe_decoder_get_header (ctx); + return dhp->sfh_cfa_fixed_ra_offset; +} + +/* Get the base reg id from the FRE info. Set errp if failure. */ + +uint8_t +__sframe_fre_get_base_reg_id (sframe_frame_row_entry *fre) +{ + uint8_t fre_info = fre->fre_info; + return SFRAME_V1_FRE_CFA_BASE_REG_ID (fre_info); +} + +/* Get the CFA offset from the FRE. If the offset is unavailable, + sets errp. */ + +int32_t +__sframe_fre_get_cfa_offset (sframe_decoder_ctx *dctx __attribute__ ((__unused__)), + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp) +{ + return sframe_get_fre_offset (fre, SFRAME_FRE_CFA_OFFSET_IDX, errp); +} + +/* Get the FP offset from the FRE. If the offset is unavailable, sets + errp. */ + +int32_t +__sframe_fre_get_fp_offset (sframe_decoder_ctx *dctx, + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp) +{ + uint32_t fp_offset_idx = 0; + int8_t fp_offset = sframe_decoder_get_fixed_fp_offset (dctx); + + *errp = _URC_NO_REASON; + /* If the FP offset is not being tracked, return the fixed FP offset + from the SFrame header. */ + if (fp_offset != SFRAME_CFA_FIXED_FP_INVALID) + return fp_offset; + + /* In some ABIs, the stack offset to recover RA (using the CFA) from is + fixed (like AMD64). In such cases, the stack offset to recover FP will + appear at the second index. */ + fp_offset_idx = ((sframe_decoder_get_fixed_ra_offset (dctx) + != SFRAME_CFA_FIXED_RA_INVALID) + ? SFRAME_FRE_RA_OFFSET_IDX + : SFRAME_FRE_FP_OFFSET_IDX); + return sframe_get_fre_offset (fre, fp_offset_idx, errp); +} + +/* Get the RA offset from the FRE. If the offset is unavailable, sets + errp. */ + +int32_t +__sframe_fre_get_ra_offset (sframe_decoder_ctx *dctx, + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp) +{ + int8_t ra_offset = sframe_decoder_get_fixed_ra_offset (dctx); + *errp = _URC_NO_REASON; + + /* If the RA offset was not being tracked, return the fixed RA offset + from the SFrame header. */ + if (ra_offset != SFRAME_CFA_FIXED_RA_INVALID) + return ra_offset; + + /* Otherwise, get the RA offset from the FRE. */ + return sframe_get_fre_offset (fre, SFRAME_FRE_RA_OFFSET_IDX, errp); +} + +/* Decode the specified SFrame buffer SF_BUF and return the new SFrame + decoder context. */ + +_Unwind_Reason_Code +__sframe_decode (sframe_decoder_ctx *dctx, const char *sf_buf) +{ + const sframe_preamble *sfp; + size_t hdrsz; + sframe_header *sfheaderp; + char *frame_buf; + + int fidx_size; + uint32_t fre_bytes; + + if (sf_buf == NULL) + return _URC_END_OF_STACK; + + sfp = (const sframe_preamble *) sf_buf; + + /* Check for foreign endianness. */ + if (sfp->sfp_magic != SFRAME_MAGIC) + return _URC_END_OF_STACK; + + frame_buf = (char *)sf_buf; + + /* Handle the SFrame header. */ + dctx->sfd_header = *(sframe_header *) frame_buf; + + /* Validate the contents of SFrame header. */ + sfheaderp = &dctx->sfd_header; + if (!sframe_header_sanity_check_p (sfheaderp)) + return _URC_END_OF_STACK; + + hdrsz = sframe_get_hdr_size (sfheaderp); + frame_buf += hdrsz; + + /* Handle the SFrame Function Descriptor Entry section. */ + fidx_size + = sfheaderp->sfh_num_fdes * sizeof (sframe_func_desc_entry); + dctx->sfd_funcdesc = (sframe_func_desc_entry *)frame_buf; + frame_buf += (fidx_size); + + dctx->sfd_fres = frame_buf; + fre_bytes = sfheaderp->sfh_fre_len; + dctx->sfd_fre_nbytes = fre_bytes; + + return _URC_NO_REASON; +} + +/* Find the SFrame Row Entry which contains the PC. Returns + _URC_END_OF_STACK if failure. */ + +_Unwind_Reason_Code +__sframe_find_fre (sframe_decoder_ctx *ctx, int32_t pc, + sframe_frame_row_entry *frep) +{ + sframe_func_desc_entry *fdep; + uint32_t func_idx; + uint32_t fre_type, i; + uint32_t start_ip_offset; + int32_t func_start_addr; + uint32_t end_ip_offset; + const char *fres; + size_t size = 0; + int err = 0; + + if ((ctx == NULL) || (frep == NULL)) + return _URC_END_OF_STACK; + + /* Find the FDE which contains the PC, then scan its fre entries. */ + fdep = sframe_get_funcdesc_with_addr_internal (ctx, pc, &err, &func_idx); + if (fdep == NULL || ctx->sfd_fres == NULL) + return _URC_END_OF_STACK; + + fre_type = sframe_get_fre_type (fdep); + + fres = ctx->sfd_fres + fdep->sfde_func_start_fre_off; + func_start_addr = sframe_decoder_get_secrel_func_start_addr (ctx, func_idx); + + for (i = 0; i < fdep->sfde_func_num_fres; i++) + { + size_t addr_size; + + /* Partially decode the FRE. */ + sframe_decode_fre_start_address (fres, &frep->fre_start_addr, fre_type); + + addr_size = sframe_fre_start_addr_size (fre_type); + if (addr_size == 0) + return _URC_END_OF_STACK; + + frep->fre_info = *(uint8_t *)(fres + addr_size); + size = sframe_fre_entry_size (frep, addr_size); + + start_ip_offset = frep->fre_start_addr; + end_ip_offset = sframe_fre_get_end_ip_offset (fdep, i, fres + size); + + /* Stop search if FRE's start_ip is greater than pc. Given + func_start_addr <= pc, pc - func_start_addr must be positive. */ + if (start_ip_offset > (uint32_t) (pc - func_start_addr)) + return _URC_END_OF_STACK; + + if (sframe_fre_check_range_p (ctx, func_idx, start_ip_offset, + end_ip_offset, pc)) + { + /* Decode last FRE bits: offsets size. */ + frep->fre_offsets = fres + addr_size + sizeof (frep->fre_info); + return _URC_NO_REASON; + } + + fres += size; + } + return _URC_END_OF_STACK; +} diff --git a/sysdeps/generic/sframe-read.h b/sysdeps/generic/sframe-read.h new file mode 100644 index 0000000..1461421 --- /dev/null +++ b/sysdeps/generic/sframe-read.h @@ -0,0 +1,112 @@ +/* Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _SFRAME_API_H +#define _SFRAME_API_H + +#include <sframe.h> +#include <stdbool.h> +#include <unwind.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +typedef struct sframe_decoder_ctx +{ + + sframe_header sfd_header; + + sframe_func_desc_entry *sfd_funcdesc; + /* SFrame FRE table. */ + char *sfd_fres; + /* Number of bytes needed for SFrame FREs. */ + int sfd_fre_nbytes; +} sframe_decoder_ctx; + +#define MAX_NUM_STACK_OFFSETS 3 + +/* User interfacing SFrame Row Entry. + An abstraction provided by libsframe so the consumer is decoupled from + the binary format representation of the same. + + The members are best ordered such that they are aligned at their natural + boundaries. This helps avoid usage of undesirable misaligned memory + accesses. See PR libsframe/29856. */ + +typedef struct sframe_frame_row_entry +{ + uint32_t fre_start_addr; + const char *fre_offsets; + unsigned char fre_info; +} sframe_frame_row_entry; + +/* The SFrame Decoder. */ + +/* Decode the specified SFrame buffer CF_BUF and return the new SFrame + decoder context. */ + +extern _Unwind_Reason_Code +__sframe_decode (sframe_decoder_ctx *dctx, const char *cf_buf); + +/* Find the SFrame Frame Row Entry which contains the PC. Returns + _URC_END_OF_STACK if failure. */ + +extern _Unwind_Reason_Code +__sframe_find_fre (sframe_decoder_ctx *ctx, int32_t pc, + sframe_frame_row_entry *frep); + +/* Get the base reg id from the FRE info. */ + +extern uint8_t +__sframe_fre_get_base_reg_id (sframe_frame_row_entry *fre); + +/* Get the CFA offset from the FRE. Sets ERRP if an error is + detected. */ + +extern int32_t +__sframe_fre_get_cfa_offset (sframe_decoder_ctx *dtcx, + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp); + +/* Get the FP offset from the FRE. If the offset is unavailable, sets + ERRP. */ + +extern int32_t +__sframe_fre_get_fp_offset (sframe_decoder_ctx *dctx, + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp); + +/* Get the RA offset from the FRE. Sets ERRP if ra offset is + unavailable. */ + +extern int32_t +__sframe_fre_get_ra_offset (sframe_decoder_ctx *dctx, + sframe_frame_row_entry *fre, + _Unwind_Reason_Code *errp); + +/* Get the offset of the sfde_func_start_address field. */ + +extern uint32_t +__sframe_decoder_get_offsetof_fde_start_addr (sframe_decoder_ctx *dctx, + uint32_t func_idx, + _Unwind_Reason_Code *errp); +#ifdef __cplusplus +} +#endif + +#endif /* _SFRAME_API_H */ diff --git a/sysdeps/generic/sframe.c b/sysdeps/generic/sframe.c new file mode 100644 index 0000000..ba0830d --- /dev/null +++ b/sysdeps/generic/sframe.c @@ -0,0 +1,187 @@ +/* Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <sframe-read.h> +#include <stdlib.h> +#include <dlfcn.h> +#include <unwind.h> +#include <uw-sigframe.h> +#include <ldsodefs.h> + +/* Some arches like s390x needs an offset to correct the value where + SP is located in relation to CFA. */ +#ifndef SFRAME_SP_VAL_OFFSET +#define SFRAME_SP_VAL_OFFSET 0 +#endif + +static inline _Unwind_Ptr +read_stack_value (_Unwind_Ptr loc) +{ + _Unwind_Ptr value = *((_Unwind_Ptr *) loc); + return value; +} + +/* Helper to avoid PLT call in libc. Fixes elf/check-localplt + errors. */ + +static int +_dl_find_object_helper (void *address, struct dl_find_object *result) +{ + return GLRO (dl_find_object) (address, result); +} + +/* Backtrace the stack and collect the stacktrace given SFrame info. + If successful, store the return addresses in RA_LST. The SIZE + argument specifies the maximum number of return addresses that can + be stored in RA_LST and contains the number of the addresses + collected. */ + +int +__stacktrace_sframe (void **ra_lst, int count, frame *frame) +{ + _Unwind_Ptr sframe_vma, cfa, return_addr, ra_stack_loc, fp_stack_loc, pc, + frame_ptr; + int cfa_offset, fp_offset, ra_offset, i; + sframe_frame_row_entry fred, *frep = &fred; + + if (!ra_lst || !count) + return 0; + + for (i = 0; i < count; i++) + { + _Unwind_Reason_Code err; + struct dl_find_object data; + sframe_decoder_ctx decoder_context, *dctx = &decoder_context; + + /* Clean decoder context. */ + memset (dctx, 0, sizeof (sframe_decoder_ctx)); + + /* Load and set up the SFrame stack trace info for pc. */ + if (_dl_find_object_helper ((void *) frame->pc, &data) < 0) + /* Force fallback to DWARF stacktracer. */ + return 0; + + sframe_vma = (_Unwind_Ptr) data.dlfo_sframe; + if (!sframe_vma || !(data.dlfo_flags & DLFO_FLAG_SFRAME)) + { +#ifdef MD_DECODE_SIGNAL_FRAME + /* If there is no valid SFrame section or SFrame section is + corrupted then check if it is a signal frame. */ + if (MD_DECODE_SIGNAL_FRAME (frame) == _URC_NO_REASON) + { + ra_lst[i] = (void *) frame->pc; + continue; + } +#endif + /* Force fallback to DWARF stacktracer. */ + return 0; + } + + /* Decode the specified SFrame buffer populate sframe's decoder + context. */ + if (__sframe_decode (dctx, (char *) data.dlfo_sframe) != _URC_NO_REASON) + /* Force fallback to DWARF stacktracer. */ + return 0; + + pc = frame->pc - sframe_vma; + /* Find the SFrame Row Entry which contains the PC. */ + if (__sframe_find_fre (dctx, pc, frep) == _URC_END_OF_STACK) + { +#ifdef MD_DECODE_SIGNAL_FRAME + /* If there are no valid FREs, check if it is a signal + frame, and if so decode it. */ + if (MD_DECODE_SIGNAL_FRAME (frame) == _URC_NO_REASON) + { + ra_lst[i] = (void *) frame->pc; + continue; + } +#endif +#ifdef MD_DETECT_OUTERMOST_FRAME + if (MD_DETECT_OUTERMOST_FRAME (frame) == _URC_END_OF_STACK) + return i; +#endif + /* Force fallback to DWARF stacktracer. */ + return 0; + } + + /* Get the CFA offset from the FRE. If offset is unavailable, + sets err. */ + cfa_offset = __sframe_fre_get_cfa_offset (dctx, frep, &err); + if (err != _URC_NO_REASON) + /* Force fallback to DWARF stacktracer. */ + return 0; + + /* Get CFA using base reg id from the FRE info. */ + cfa = ((__sframe_fre_get_base_reg_id (frep) + == SFRAME_BASE_REG_SP) ? frame->sp : frame->fp) + cfa_offset; + + /* Get the RA offset from the FRE. If the offset is + unavailable, sets err. */ + ra_offset = __sframe_fre_get_ra_offset (dctx, frep, &err); + if (err != _URC_NO_REASON) + /* Force fallback to DWARF stacktracer. */ + return 0; + + /* RA offset is available, get the value stored in the stack + location. */ + ra_stack_loc = cfa + ra_offset; + return_addr = read_stack_value (ra_stack_loc); + + ra_lst[i] = (void *) return_addr; + + /* Get the FP offset from the FRE. If the offset is + unavailable, sets err. */ + fp_offset = __sframe_fre_get_fp_offset (dctx, frep, &err); + frame_ptr = frame->fp; + if (err == _URC_NO_REASON) + { + /* FP offset is available, get the value stored in the stack + location. */ + fp_stack_loc = cfa + fp_offset; + frame_ptr = read_stack_value (fp_stack_loc); + } + + /* Set up for the next frame. */ + frame->fp = frame_ptr; + frame->sp = cfa + SFRAME_SP_VAL_OFFSET; + frame->pc = return_addr; + } + return i; +} + +libc_hidden_def (__stacktrace_sframe); + +/* A noinline helper used to obtain the caller's current PC. */ + +_Unwind_Ptr __attribute__ ((noinline)) +__getPC (void) +{ + return (_Unwind_Ptr) + __builtin_extract_return_addr (__builtin_return_address (0)); +} + +libc_hidden_def (__getPC); + +/* A noinline helper used to obtain the caller's current SP. It + mimics gcc14's __builtin_stack_address() functionality. */ + +_Unwind_Ptr __attribute__ ((noinline)) +__getSP (void) +{ + return (_Unwind_Ptr) __builtin_dwarf_cfa() + SFRAME_SP_VAL_OFFSET; +} + +libc_hidden_def (__getSP); diff --git a/sysdeps/generic/sframe.h b/sysdeps/generic/sframe.h new file mode 100644 index 0000000..e38adcf --- /dev/null +++ b/sysdeps/generic/sframe.h @@ -0,0 +1,378 @@ +/* SFrame format description. + Copyright (C) 2022-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SFRAME_H +#define _SFRAME_H + +#include <sys/types.h> +#include <limits.h> +#include <stdint.h> +#include <unwind.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* SFrame format. + + SFrame format is a simple format to represent the information needed + for generating vanilla backtraces. SFrame format keeps track of the + minimal necessary information needed for stack tracing: + - Canonical Frame Address (CFA) + - Frame Pointer (FP) + - Return Address (RA) + + The SFrame section itself has the following structure: + + +--------+------------+---------+ + | file | function | frame | + | header | descriptor | row | + | | entries | entries | + +--------+------------+---------+ + + The file header stores a magic number and version information, flags, and + the byte offset of each of the sections relative to the end of the header + itself. The file header also specifies the total number of Function + Descriptor Entries, Frame Row Entries and length of the FRE sub-section. + + Following the header is a list of Function Descriptor Entries (FDEs). + This list may be sorted if the flags in the file header indicate it to be + so. The sort order, if applicable, is the order of functions in the + .text.* sections in the resulting binary artifact. Each Function + Descriptor Entry specifies the start PC of a function, the size in bytes + of the function and an offset to its first Frame Row Entry (FRE). Each FDE + additionally also specifies the type of FRE it uses to encode the stack + trace information. + + Next, the SFrame Frame Row Entry sub-section is a list of variable size + records. Each entry represents stack trace information for a set of PCs + of the function. A singular Frame Row Entry is a self-sufficient record + which contains information on how to generate stack trace from the + applicable set of PCs. + + */ + + +/* SFrame format versions. */ +#define SFRAME_VERSION_1 1 +#define SFRAME_VERSION_2 2 +/* SFrame magic number. */ +#define SFRAME_MAGIC 0xdee2 +/* Current version of SFrame format. */ +#define SFRAME_VERSION SFRAME_VERSION_2 + +/* Various flags for SFrame. */ + +/* Function Descriptor Entries are sorted on PC. */ +#define SFRAME_F_FDE_SORTED 0x1 +/* Functions preserve frame pointer. */ +#define SFRAME_F_FRAME_POINTER 0x2 +/* Function start address in SFrame FDE is encoded as the distance from the + location of the sfde_func_start_address to the start PC of the function. + If absent, the function start address in SFrame FDE is encoded as the + distance from the start of the SFrame FDE section to the start PC of the + function. */ +#define SFRAME_F_FDE_FUNC_START_PCREL 0x4 + +/* Set of all defined flags in SFrame V2. */ +#define SFRAME_V2_F_ALL_FLAGS \ + (SFRAME_F_FDE_SORTED | SFRAME_F_FRAME_POINTER \ + | SFRAME_F_FDE_FUNC_START_PCREL) + +#define SFRAME_CFA_FIXED_FP_INVALID 0 +#define SFRAME_CFA_FIXED_RA_INVALID 0 + +/* Supported ABIs/Arch. */ +#define SFRAME_ABI_AARCH64_ENDIAN_BIG 1 /* AARCH64 big endian. */ +#define SFRAME_ABI_AARCH64_ENDIAN_LITTLE 2 /* AARCH64 little endian. */ +#define SFRAME_ABI_AMD64_ENDIAN_LITTLE 3 /* AMD64 little endian. */ + +/* SFrame FRE types. */ +#define SFRAME_FRE_TYPE_ADDR1 0 +#define SFRAME_FRE_TYPE_ADDR2 1 +#define SFRAME_FRE_TYPE_ADDR4 2 + +/* SFrame Function Descriptor Entry types. + + The SFrame format has two possible representations for functions. The + choice of which type to use is made according to the instruction patterns + in the relevant program stub. + + An SFrame FDE of type SFRAME_FDE_TYPE_PCINC is an indication + that the PCs in the FREs should be treated as increments in bytes. This is + used for a bulk of the executable code of a program, which contains + instructions with no specific pattern. + + An SFrame FDE of type SFRAME_FDE_TYPE_PCMASK is an indication + that the PCs in the FREs should be treated as masks. This type is useful + for the cases when a small pattern of instructions in a program stub is + repeatedly to cover a specific functionality. Typical usescases are pltN + entries, trampolines etc. */ + +/* Unwinders perform a (PC >= FRE_START_ADDR) to look up a matching FRE. */ +#define SFRAME_FDE_TYPE_PCINC 0 +/* Unwinders perform a (PC % REP_BLOCK_SIZE >= FRE_START_ADDR) to look up a + matching FRE. */ +#define SFRAME_FDE_TYPE_PCMASK 1 + +typedef struct sframe_preamble +{ + uint16_t sfp_magic; /* Magic number (SFRAME_MAGIC). */ + uint8_t sfp_version; /* Data format version number (SFRAME_VERSION). */ + uint8_t sfp_flags; /* Flags. */ +} __attribute__ ((packed)) sframe_preamble; + +typedef struct sframe_header +{ + sframe_preamble sfh_preamble; + /* Information about the arch (endianness) and ABI. */ + uint8_t sfh_abi_arch; + /* Offset for the Frame Pointer (FP) from CFA may be fixed for some + ABIs (e.g, in AMD64 when -fno-omit-frame-pointer is used). When fixed, + this field specifies the fixed stack frame offset and the individual + FREs do not need to track it. When not fixed, it is set to + SFRAME_CFA_FIXED_FP_INVALID, and the individual FREs may provide + the applicable stack frame offset, if any. */ + int8_t sfh_cfa_fixed_fp_offset; + /* Offset for the Return Address from CFA is fixed for some ABIs + (e.g., AMD64 has it as CFA-8). When fixed, the header specifies the + fixed stack frame offset and the individual FREs do not track it. When + not fixed, it is set to SFRAME_CFA_FIXED_RA_INVALID, and individual + FREs provide the applicable stack frame offset, if any. */ + int8_t sfh_cfa_fixed_ra_offset; + /* Number of bytes making up the auxiliary header, if any. + Some ABI/arch, in the future, may use this space for extending the + information in SFrame header. Auxiliary header is contained in + bytes sequentially following the sframe_header. */ + uint8_t sfh_auxhdr_len; + /* Number of SFrame FDEs in this SFrame section. */ + uint32_t sfh_num_fdes; + /* Number of SFrame Frame Row Entries. */ + uint32_t sfh_num_fres; + /* Number of bytes in the SFrame Frame Row Entry section. */ + uint32_t sfh_fre_len; + /* Offset of SFrame Function Descriptor Entry section. */ + uint32_t sfh_fdeoff; + /* Offset of SFrame Frame Row Entry section. */ + uint32_t sfh_freoff; +} __attribute__ ((packed)) sframe_header; + +#define SFRAME_V1_HDR_SIZE(sframe_hdr) \ + ((sizeof (sframe_header) + (sframe_hdr).sfh_auxhdr_len)) + +/* Two possible keys for executable (instruction) pointers signing. */ +#define SFRAME_AARCH64_PAUTH_KEY_A 0 /* Key A. */ +#define SFRAME_AARCH64_PAUTH_KEY_B 1 /* Key B. */ + +typedef struct sframe_func_desc_entry +{ + /* Function start address. Encoded as a signed offset, relative to the + beginning of the current FDE. */ + int32_t sfde_func_start_address; + /* Size of the function in bytes. */ + uint32_t sfde_func_size; + /* Offset of the first SFrame Frame Row Entry of the function, relative to the + beginning of the SFrame Frame Row Entry sub-section. */ + uint32_t sfde_func_start_fre_off; + /* Number of frame row entries for the function. */ + uint32_t sfde_func_num_fres; + /* Additional information for stack tracing from the function: + - 4-bits: Identify the FRE type used for the function. + - 1-bit: Identify the FDE type of the function - mask or inc. + - 1-bit: PAC authorization A/B key (aarch64). + - 2-bits: Unused. + ------------------------------------------------------------------------ + | Unused | PAC auth A/B key (aarch64) | FDE type | FRE type | + | | Unused (amd64) | | | + ------------------------------------------------------------------------ + 8 6 5 4 0 */ + uint8_t sfde_func_info; + /* Size of the block of repeating insns. Used for SFrame FDEs of type + SFRAME_FDE_TYPE_PCMASK. */ + uint8_t sfde_func_rep_size; + uint16_t sfde_func_padding2; +} __attribute__ ((packed)) sframe_func_desc_entry; + +/* Macros to compose and decompose function info in FDE. */ + +/* Note: Set PAC auth key to SFRAME_AARCH64_PAUTH_KEY_A by default. */ +#define SFRAME_V1_FUNC_INFO(fde_type, fre_enc_type) \ + (((SFRAME_AARCH64_PAUTH_KEY_A & 0x1) << 5) | \ + (((fde_type) & 0x1) << 4) | ((fre_enc_type) & 0xf)) + +#define SFRAME_V1_FUNC_FRE_TYPE(data) ((data) & 0xf) +#define SFRAME_V1_FUNC_FDE_TYPE(data) (((data) >> 4) & 0x1) +#define SFRAME_V1_FUNC_PAUTH_KEY(data) (((data) >> 5) & 0x1) + +/* Set the pauth key as indicated. */ +#define SFRAME_V1_FUNC_INFO_UPDATE_PAUTH_KEY(pauth_key, fde_info) \ + ((((pauth_key) & 0x1) << 5) | ((fde_info) & 0xdf)) + +/* Size of stack frame offsets in an SFrame Frame Row Entry. A single + SFrame FRE has all offsets of the same size. Offset size may vary + across frame row entries. */ +#define SFRAME_FRE_OFFSET_1B 0 +#define SFRAME_FRE_OFFSET_2B 1 +#define SFRAME_FRE_OFFSET_4B 2 + +/* An SFrame Frame Row Entry can be SP or FP based. */ +#define SFRAME_BASE_REG_FP 0 +#define SFRAME_BASE_REG_SP 1 + +/* The index at which a specific offset is presented in the variable length + bytes of an FRE. */ +#define SFRAME_FRE_CFA_OFFSET_IDX 0 +/* The RA stack offset, if present, will always be at index 1 in the variable + length bytes of the FRE. */ +#define SFRAME_FRE_RA_OFFSET_IDX 1 +/* The FP stack offset may appear at offset 1 or 2, depending on the ABI as RA + may or may not be tracked. */ +#define SFRAME_FRE_FP_OFFSET_IDX 2 + +typedef struct sframe_fre_info +{ + /* Information about + - 1 bit: base reg for CFA + - 4 bits: Number of offsets (N). A value of upto 3 is allowed to track + all three of CFA, FP and RA (fixed implicit order). + - 2 bits: information about size of the offsets (S) in bytes. + Valid values are SFRAME_FRE_OFFSET_1B, SFRAME_FRE_OFFSET_2B, + SFRAME_FRE_OFFSET_4B + - 1 bit: Mangled RA state bit (aarch64 only). + ---------------------------------------------------------------------------------- + | Mangled-RA (aarch64) | Size of offsets | Number of offsets | base_reg | + | Unused (amd64) | | | | + ---------------------------------------------------------------------------------- + 8 7 5 1 0 + + */ + uint8_t fre_info; +} sframe_fre_info; + +/* Macros to compose and decompose FRE info. */ + +/* Note: Set mangled_ra_p to zero by default. */ +#define SFRAME_V1_FRE_INFO(base_reg_id, offset_num, offset_size) \ + (((0 & 0x1) << 7) | (((offset_size) & 0x3) << 5) | \ + (((offset_num) & 0xf) << 1) | ((base_reg_id) & 0x1)) + +/* Set the mangled_ra_p bit as indicated. */ +#define SFRAME_V1_FRE_INFO_UPDATE_MANGLED_RA_P(mangled_ra_p, fre_info) \ + ((((mangled_ra_p) & 0x1) << 7) | ((fre_info) & 0x7f)) + +#define SFRAME_V1_FRE_CFA_BASE_REG_ID(data) ((data) & 0x1) +#define SFRAME_V1_FRE_OFFSET_COUNT(data) (((data) >> 1) & 0xf) +#define SFRAME_V1_FRE_OFFSET_SIZE(data) (((data) >> 5) & 0x3) +#define SFRAME_V1_FRE_MANGLED_RA_P(data) (((data) >> 7) & 0x1) + +/* SFrame Frame Row Entry definitions. + + Used for both AMD64 and AARCH64. + + An SFrame Frame Row Entry is a self-sufficient record which contains + information on how to generate the stack trace for the specified range of + PCs. Each SFrame Frame Row Entry is followed by S*N bytes, where: + S is the size of the stack frame offset for the FRE, and + N is the number of stack frame offsets in the FRE + + The interpretation of FRE stack offsets is ABI-specific: + + AMD64: + offset1 (interpreted as CFA = BASE_REG + offset1) + if FP is being tracked + offset2 (intrepreted as FP = CFA + offset2) + fi + + AARCH64: + offset1 (interpreted as CFA = BASE_REG + offset1) + if FP is being tracked (in other words, if frame record created) + offset2 (interpreted as RA = CFA + offset2) + offset3 (intrepreted as FP = CFA + offset3) + fi + Note that in AAPCS64, a frame record, if created, will save both FP and + LR on stack. +*/ + +/* Used when SFRAME_FRE_TYPE_ADDR1 is specified as FRE type. */ +typedef struct sframe_frame_row_entry_addr1 +{ + /* Start address of the frame row entry. Encoded as an 1-byte unsigned + offset, relative to the start address of the function. */ + uint8_t sfre_start_address; + sframe_fre_info sfre_info; +} __attribute__ ((packed)) sframe_frame_row_entry_addr1; + +/* Upper limit of start address in sframe_frame_row_entry_addr1 + is 0x100 (not inclusive). */ +#define SFRAME_FRE_TYPE_ADDR1_LIMIT \ + (1ULL << ((SFRAME_FRE_TYPE_ADDR1 + 1) * 8)) + +/* Used when SFRAME_FRE_TYPE_ADDR2 is specified as FRE type. */ +typedef struct sframe_frame_row_entry_addr2 +{ + /* Start address of the frame row entry. Encoded as an 2-byte unsigned + offset, relative to the start address of the function. */ + uint16_t sfre_start_address; + sframe_fre_info sfre_info; +} __attribute__ ((packed)) sframe_frame_row_entry_addr2; + +/* Upper limit of start address in sframe_frame_row_entry_addr2 + is 0x10000 (not inclusive). */ +#define SFRAME_FRE_TYPE_ADDR2_LIMIT \ + (1ULL << ((SFRAME_FRE_TYPE_ADDR2 * 2) * 8)) + +/* Used when SFRAME_FRE_TYPE_ADDR4 is specified as FRE type. */ +typedef struct sframe_frame_row_entry_addr4 +{ + /* Start address of the frame row entry. Encoded as a 4-byte unsigned + offset, relative to the start address of the function. */ + uint32_t sfre_start_address; + sframe_fre_info sfre_info; +} __attribute__ ((packed)) sframe_frame_row_entry_addr4; + +/* Upper limit of start address in sframe_frame_row_entry_addr2 + is 0x100000000 (not inclusive). */ +#define SFRAME_FRE_TYPE_ADDR4_LIMIT \ + (1ULL << ((SFRAME_FRE_TYPE_ADDR4 * 2) * 8)) + +/* Used to pass frame information to stack trace routine. */ +typedef struct cframe +{ + _Unwind_Ptr pc; + _Unwind_Ptr sp; + _Unwind_Ptr fp; +} frame; + +/* SFrame stack tracing support. */ +int __stacktrace_sframe (void **, int, frame *); +libc_hidden_proto (__stacktrace_sframe); + +/* Helper used by SFrame tracing algorithm. */ +_Unwind_Ptr __getPC (void); +libc_hidden_proto (__getPC); + +/* Helper used by SFrame tracing algorithm. */ +_Unwind_Ptr __getSP (void); +libc_hidden_proto (__getSP); + +#ifdef __cplusplus +} +#endif + +#endif /* _SFRAME_H */ diff --git a/sysdeps/generic/sysdep.h b/sysdeps/generic/sysdep.h index 4c0dda4..ef5eba2 100644 --- a/sysdeps/generic/sysdep.h +++ b/sysdeps/generic/sysdep.h @@ -45,6 +45,7 @@ # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off # define cfi_offset(reg, off) .cfi_offset reg, off # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off +# define cfi_val_offset(reg, off) .cfi_val_offset reg, off # define cfi_register(r1, r2) .cfi_register r1, r2 # define cfi_return_column(reg) .cfi_return_column reg # define cfi_restore(reg) .cfi_restore reg @@ -74,6 +75,8 @@ ".cfi_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) # define CFI_REL_OFFSET(reg, off) \ ".cfi_rel_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) +# define CFI_VAL_OFFSET(reg, off) \ + ".cfi_val_offset " CFI_STRINGIFY(reg) "," CFI_STRINGIFY(off) # define CFI_REGISTER(r1, r2) \ ".cfi_register " CFI_STRINGIFY(r1) "," CFI_STRINGIFY(r2) # define CFI_RETURN_COLUMN(reg) \ diff --git a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h b/sysdeps/generic/uw-sigframe.h index 401079c..b357f8a 100644 --- a/sysdeps/unix/sysv/linux/sparc/kernel_termios.h +++ b/sysdeps/generic/uw-sigframe.h @@ -1,4 +1,5 @@ -/* Copyright (C) 1997-2025 Free Software Foundation, Inc. +/* Internal header file for handling signal frames. Generic version. + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -15,26 +16,16 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef _KERNEL_TERMIOS_H -#define _KERNEL_TERMIOS_H 1 -/* The following corresponds to the values from the Linux 2.1.20 kernel. */ +/* Each architecture that supports SFrame may need to define several + macros to handle exceptional cases during stack backtracing. -/* We need the definition of tcflag_t, cc_t, and speed_t. */ -#include <termios.h> + MD_DECODE_SIGNAL_FRAME(frame) should recover frame information when + a signal-related exception occurs. The input frame must contain a + valid program counter (PC) field. On success, the macro should + return _URC_NO_REASON. -#define __KERNEL_NCCS 17 + MD_DETECT_OUTERMOST_FRAME(frame) is used to detect the outermost + stack frame. It returns _URC_NO_REASON upon successful + detection. -struct __kernel_termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[__KERNEL_NCCS]; /* control characters */ - }; - -#define _HAVE_C_ISPEED 0 -#define _HAVE_C_OSPEED 0 - -#endif /* kernel_termios.h */ + The FRAME structure is defined in sysdeps/generic/sframe.h */ diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h index e841644..d7d907a 100644 --- a/sysdeps/gnu/errlist.h +++ b/sysdeps/gnu/errlist.h @@ -797,3 +797,12 @@ _S(ED, N_("?")) #ifdef EPROGUNAVAIL _S(EPROGUNAVAIL, N_("RPC program not available")) #endif +#ifdef EINIT +_S(EINIT, N_("Initialization error")) +#endif +#ifdef EREMDEV +_S(EREMDEV, N_("Device is remote")) +#endif +#ifdef ERREMOTE +_S(ERREMOTE, N_("Too many levels of remote in path")) +#endif diff --git a/sysdeps/gnu/netinet/tcp.h b/sysdeps/gnu/netinet/tcp.h index b2acbb4..7a3500b 100644 --- a/sysdeps/gnu/netinet/tcp.h +++ b/sysdeps/gnu/netinet/tcp.h @@ -212,6 +212,9 @@ enum # define TCPI_OPT_ECN 8 /* ECN was negotiated at TCP session init */ # define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ # define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ +# define TCPI_OPT_USEC_TS 64 /* usec timestamps */ +# define TCPI_OPT_TFO_CHILD 128 /* child from a Fast Open option on SYN */ + /* Values for tcpi_state. */ enum tcp_ca_state diff --git a/sysdeps/htl/include/bits/cancelation.h b/sysdeps/htl/include/bits/cancelation.h new file mode 100644 index 0000000..ef2cd70 --- /dev/null +++ b/sysdeps/htl/include/bits/cancelation.h @@ -0,0 +1,5 @@ +#include_next <bits/cancelation.h> + +#ifndef _ISOMAC +#include <pthreadP.h> +#endif diff --git a/sysdeps/htl/libc-lock.h b/sysdeps/htl/libc-lock.h index 66779b9..8e764a7 100644 --- a/sysdeps/htl/libc-lock.h +++ b/sysdeps/htl/libc-lock.h @@ -37,13 +37,10 @@ { \ __handler.__handler = FCT; \ __handler.__arg = ARG; \ - if (__pthread_get_cleanup_stack != NULL) \ - { \ - __handlers = __pthread_get_cleanup_stack (); \ - __handler.__next = *__handlers; \ - *__handlers = &__handler; \ - __registered = 1; \ - } \ + __handlers = __pthread_get_cleanup_stack (); \ + __handler.__next = *__handlers; \ + *__handlers = &__handler; \ + __registered = 1; \ } \ #define __libc_cleanup_end(DOIT) \ @@ -59,12 +56,4 @@ #define __libc_cleanup_push(fct, arg) __libc_cleanup_region_start (1, fct, arg) #define __libc_cleanup_pop(execute) __libc_cleanup_region_end (execute) -#if !IS_IN (libpthread) -# ifdef weak_extern -weak_extern (__pthread_get_cleanup_stack) -# else -# pragma weak __pthread_get_cleanup_stack -# endif -#endif - #endif diff --git a/sysdeps/htl/libc-lockP.h b/sysdeps/htl/libc-lockP.h index 092eb35..e9977e4 100644 --- a/sysdeps/htl/libc-lockP.h +++ b/sysdeps/htl/libc-lockP.h @@ -126,15 +126,9 @@ libc_hidden_proto (__pthread_setcancelstate) single-threaded processes. */ #if !defined(__NO_WEAK_PTHREAD_ALIASES) && !IS_IN (libpthread) # ifdef weak_extern -weak_extern (__pthread_key_create) -weak_extern (__pthread_setspecific) -weak_extern (__pthread_getspecific) weak_extern (__pthread_initialize) weak_extern (__pthread_atfork) # else -# pragma weak __pthread_key_create -# pragma weak __pthread_setspecific -# pragma weak __pthread_getspecific # pragma weak __pthread_initialize # pragma weak __pthread_atfork # endif diff --git a/sysdeps/htl/pt-destroy-specific.c b/sysdeps/htl/pt-destroy-specific.c index e63b807..b5eb0ba 100644 --- a/sysdeps/htl/pt-destroy-specific.c +++ b/sysdeps/htl/pt-destroy-specific.c @@ -20,6 +20,7 @@ #include <stdlib.h> #include <pt-internal.h> +#include <string.h> void __pthread_destroy_specific (struct __pthread *thread) @@ -100,3 +101,4 @@ __pthread_destroy_specific (struct __pthread *thread) memset (&thread->static_thread_specifics, 0, sizeof (thread->static_thread_specifics)); } +libc_hidden_def (__pthread_destroy_specific) diff --git a/sysdeps/htl/pt-getspecific.c b/sysdeps/htl/pt-getspecific.c index 0052ce8..d3ebb31 100644 --- a/sysdeps/htl/pt-getspecific.c +++ b/sysdeps/htl/pt-getspecific.c @@ -19,6 +19,7 @@ #include <pthread.h> #include <pt-internal.h> +#include <shlib-compat.h> void * __pthread_getspecific (pthread_key_t key) @@ -42,5 +43,9 @@ __pthread_getspecific (pthread_key_t key) return self->thread_specifics[key]; } -weak_alias (__pthread_getspecific, pthread_getspecific); -hidden_def (__pthread_getspecific) +libc_hidden_def (__pthread_getspecific) +versioned_symbol (libc, __pthread_getspecific, pthread_getspecific, GLIBC_2_42); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42) +compat_symbol (libpthread, __pthread_getspecific, pthread_getspecific, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-key-create.c b/sysdeps/htl/pt-key-create.c index cf8a8d1..92a9db8 100644 --- a/sysdeps/htl/pt-key-create.c +++ b/sysdeps/htl/pt-key-create.c @@ -22,6 +22,9 @@ #include <pt-internal.h> #include <pthreadP.h> +#include <shlib-compat.h> +#include <ldsodefs.h> + pthread_mutex_t __pthread_key_lock; pthread_once_t __pthread_key_once = PTHREAD_ONCE_INIT; @@ -116,5 +119,9 @@ do_search: __pthread_mutex_unlock (&__pthread_key_lock); return 0; } -weak_alias (__pthread_key_create, pthread_key_create) -hidden_def (__pthread_key_create) +libc_hidden_def (__pthread_key_create) +versioned_symbol (libc, __pthread_key_create, pthread_key_create, GLIBC_2_42); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42) +compat_symbol (libpthread, __pthread_key_create, pthread_key_create, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-key-delete.c b/sysdeps/htl/pt-key-delete.c index 79879e9..666314f 100644 --- a/sysdeps/htl/pt-key-delete.c +++ b/sysdeps/htl/pt-key-delete.c @@ -19,6 +19,8 @@ #include <pthread.h> #include <pt-internal.h> +#include <shlib-compat.h> +#include <ldsodefs.h> int __pthread_key_delete (pthread_key_t key) @@ -69,4 +71,9 @@ __pthread_key_delete (pthread_key_t key) return err; } -weak_alias (__pthread_key_delete, pthread_key_delete) +libc_hidden_def (__pthread_key_delete) +versioned_symbol (libc, __pthread_key_delete, pthread_key_delete, GLIBC_2_42); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42) +compat_symbol (libpthread, __pthread_key_delete, pthread_key_delete, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pt-setspecific.c b/sysdeps/htl/pt-setspecific.c index dfd55b6..0535225 100644 --- a/sysdeps/htl/pt-setspecific.c +++ b/sysdeps/htl/pt-setspecific.c @@ -19,6 +19,8 @@ #include <pthread.h> #include <pt-internal.h> +#include <shlib-compat.h> +#include <string.h> int __pthread_setspecific (pthread_key_t key, const void *value) @@ -68,5 +70,9 @@ __pthread_setspecific (pthread_key_t key, const void *value) self->thread_specifics[key] = (void *) value; return 0; } -weak_alias (__pthread_setspecific, pthread_setspecific); -hidden_def (__pthread_setspecific) +libc_hidden_def (__pthread_setspecific) +versioned_symbol (libc, __pthread_setspecific, pthread_setspecific, GLIBC_2_42); + +#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_12, GLIBC_2_42) +compat_symbol (libpthread, __pthread_setspecific, pthread_setspecific, GLIBC_2_12); +#endif diff --git a/sysdeps/htl/pthread-functions.h b/sysdeps/htl/pthread-functions.h index 467d031..aec13a6 100644 --- a/sysdeps/htl/pthread-functions.h +++ b/sysdeps/htl/pthread-functions.h @@ -22,11 +22,6 @@ #include <pthread.h> void __pthread_exit (void *) __attribute__ ((__noreturn__)); -struct __pthread_cancelation_handler **__pthread_get_cleanup_stack (void); -int __pthread_once (pthread_once_t *, void (*) (void)); -int __pthread_key_create (pthread_key_t *, void (*) (void *)); -void *__pthread_getspecific (pthread_key_t); -int __pthread_setspecific (pthread_key_t, const void *); void _cthreads_flockfile (FILE *); void _cthreads_funlockfile (FILE *); @@ -38,11 +33,6 @@ int _cthreads_ftrylockfile (FILE *); struct pthread_functions { void (*ptr___pthread_exit) (void *) __attribute__ ((__noreturn__)); - struct __pthread_cancelation_handler **(*ptr___pthread_get_cleanup_stack) (void); - int (*ptr_pthread_once) (pthread_once_t *, void (*) (void)); - int (*ptr___pthread_key_create) (pthread_key_t *, void (*) (void *)); - void *(*ptr___pthread_getspecific) (pthread_key_t); - int (*ptr___pthread_setspecific) (pthread_key_t, const void *); void (*ptr__IO_flockfile) (FILE *); void (*ptr__IO_funlockfile) (FILE *); int (*ptr__IO_ftrylockfile) (FILE *); diff --git a/sysdeps/htl/pthreadP.h b/sysdeps/htl/pthreadP.h index 535deeb..535740f 100644 --- a/sysdeps/htl/pthreadP.h +++ b/sysdeps/htl/pthreadP.h @@ -182,9 +182,13 @@ int __cthread_keycreate (__cthread_key_t *); int __cthread_getspecific (__cthread_key_t, void **); int __cthread_setspecific (__cthread_key_t, void *); int __pthread_key_create (pthread_key_t *key, void (*destr) (void *)); +libc_hidden_proto (__pthread_key_create) void *__pthread_getspecific (pthread_key_t key); +libc_hidden_proto (__pthread_getspecific) int __pthread_setspecific (pthread_key_t key, const void *value); +libc_hidden_proto (__pthread_setspecific) int __pthread_key_delete (pthread_key_t key); +libc_hidden_proto (__pthread_key_delete) int __pthread_once (pthread_once_t *once_control, void (*init_routine) (void)); int __pthread_getattr_np (pthread_t, pthread_attr_t *); @@ -212,14 +216,11 @@ int __pthread_condattr_init (pthread_condattr_t *attr); libc_hidden_proto (__pthread_self) libc_hidden_proto (__pthread_attr_init) libc_hidden_proto (__pthread_condattr_init) +libc_hidden_proto (__pthread_get_cleanup_stack) #if IS_IN (libpthread) hidden_proto (__pthread_create) hidden_proto (__pthread_detach) -hidden_proto (__pthread_key_create) -hidden_proto (__pthread_getspecific) -hidden_proto (__pthread_setspecific) -hidden_proto (__pthread_get_cleanup_stack) #endif #if !defined(__NO_WEAK_PTHREAD_ALIASES) && !IS_IN (libpthread) diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile index a2e8c0b..74068ea 100644 --- a/sysdeps/i386/Makefile +++ b/sysdeps/i386/Makefile @@ -17,20 +17,25 @@ ifeq ($(subdir),gmon) sysdep_routines += i386-mcount endif -ifeq ($(subdir),elf) -CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused -CFLAGS-dl-load.c += -Wno-unused -CFLAGS-dl-reloc.c += -Wno-unused -endif - ifeq ($(subdir),csu) gen-as-const-headers += link-defines.sym +gen-as-const-headers += tlsdesc.sym else stack-align-test-flags += -malign-double endif +# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since +# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters +# which must be preserved. +# With SSE disabled, ensure -fpmath is not set to use sse either. +rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387 ifeq ($(subdir),elf) -sysdep-dl-routines += tlsdesc dl-tlsdesc +CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused +CFLAGS-dl-load.c += -Wno-unused +CFLAGS-dl-reloc.c += -Wno-unused +sysdep-dl-routines += \ + dl-tls-get-addr \ +# sysdep-dl-routines tests += tst-audit3 modules-names += tst-auditmod3a tst-auditmod3b @@ -38,18 +43,6 @@ modules-names += tst-auditmod3a tst-auditmod3b $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so -endif - -ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym -endif - -# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since -# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters -# which must be preserved. -# With SSE disabled, ensure -fpmath is not set to use sse either. -rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387 -ifeq ($(subdir),elf) CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ $(rtld-CFLAGS)) diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c new file mode 100644 index 0000000..c97e5c5 --- /dev/null +++ b/sysdeps/i386/dl-tls-get-addr.c @@ -0,0 +1,68 @@ +/* Ifunc selector for ___tls_get_addr. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifdef SHARED +# define ___tls_get_addr __redirect____tls_get_addr +# include <dl-tls.h> +# undef ___tls_get_addr +# undef __tls_get_addr + +# define SYMBOL_NAME ___tls_get_addr +# include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (cpu_features->xsave_state_size != 0) + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + return OPTIMIZE (xsavec); + else + return OPTIMIZE (xsave); + } + else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + return OPTIMIZE (fxsave); + return OPTIMIZE (fnsave); +} + +libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, + IFUNC_SELECTOR ()); + +/* The special thing about the x86 TLS ABI is that we have two + variants of the __tls_get_addr function with different calling + conventions. The GNU version, which we are mostly concerned here, + takes the parameter in a register. The name is changed by adding + an additional underscore at the beginning. The Sun version uses + the normal calling convention. */ + +rtld_hidden_proto (___tls_get_addr) +rtld_hidden_def (___tls_get_addr) + +void * +__tls_get_addr (tls_index *ti) +{ + return ___tls_get_addr (ti); +} +#endif diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h index f453931..ef605c5 100644 --- a/sysdeps/i386/dl-tls.h +++ b/sysdeps/i386/dl-tls.h @@ -37,34 +37,14 @@ typedef struct dl_tls_index /* This is the prototype for the GNU version. */ extern void *___tls_get_addr (tls_index *ti) __attribute__ ((__regparm__ (1))); -extern void *___tls_get_addr_internal (tls_index *ti) - __attribute__ ((__regparm__ (1))) attribute_hidden; - # if IS_IN (rtld) -/* The special thing about the x86 TLS ABI is that we have two - variants of the __tls_get_addr function with different calling - conventions. The GNU version, which we are mostly concerned here, - takes the parameter in a register. The name is changed by adding - an additional underscore at the beginning. The Sun version uses - the normal calling convention. */ -void * -__tls_get_addr (tls_index *ti) -{ - return ___tls_get_addr_internal (ti); -} - - /* Prepare using the definition of __tls_get_addr in the generic version of this file. */ -# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr -strong_alias (___tls_get_addr, ___tls_get_addr_internal) -rtld_hidden_proto (___tls_get_addr) -rtld_hidden_def (___tls_get_addr) -#else - +# define __tls_get_addr \ + __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal +# else /* Users should get the better interface. */ -# define __tls_get_addr ___tls_get_addr - +# define __tls_get_addr ___tls_get_addr # endif #endif diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h index 6aec06d..be9ecd6 100644 --- a/sysdeps/i386/dl-tlsdesc-dynamic.h +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h @@ -16,34 +16,6 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#undef REGISTER_SAVE_AREA - -#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 -# error STATE_SAVE_ALIGNMENT must be multiple of 16 -#endif - -#if DL_RUNTIME_RESOLVE_REALIGN_STACK -# ifdef USE_FNSAVE -# error USE_FNSAVE shouldn't be defined -# endif -# ifdef USE_FXSAVE -/* Use fxsave to save all registers. */ -# define REGISTER_SAVE_AREA 512 -# endif -#else -# ifdef USE_FNSAVE -/* Use fnsave to save x87 FPU stack registers. */ -# define REGISTER_SAVE_AREA 108 -# else -# ifndef USE_FXSAVE -# error USE_FXSAVE must be defined -# endif -/* Use fxsave to save all registers. Add 12 bytes to align the stack - to 16 bytes. */ -# define REGISTER_SAVE_AREA (512 + 12) -# endif -#endif - .hidden _dl_tlsdesc_dynamic .global _dl_tlsdesc_dynamic .type _dl_tlsdesc_dynamic,@function @@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: ret .p2align 4,,7 2: - cfi_adjust_cfa_offset (32) -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - movl %ebx, -28(%esp) - movl %esp, %ebx - cfi_def_cfa_register(%ebx) - and $-STATE_SAVE_ALIGNMENT, %esp -#endif -#ifdef REGISTER_SAVE_AREA - subl $REGISTER_SAVE_AREA, %esp -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK - cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) -# endif -#else -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK -# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true -# endif - /* Allocate stack space of the required size to save the state. */ - LOAD_PIC_REG (cx) - subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp -#endif -#ifdef USE_FNSAVE - fnsave (%esp) -#elif defined USE_FXSAVE - fxsave (%esp) -#else - /* Save the argument for ___tls_get_addr in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - /* Clear the XSAVE Header. */ -# ifdef USE_XSAVE - movl %edx, (512)(%esp) - movl %edx, (512 + 4 * 1)(%esp) - movl %edx, (512 + 4 * 2)(%esp) - movl %edx, (512 + 4 * 3)(%esp) -# endif - movl %edx, (512 + 4 * 4)(%esp) - movl %edx, (512 + 4 * 5)(%esp) - movl %edx, (512 + 4 * 6)(%esp) - movl %edx, (512 + 4 * 7)(%esp) - movl %edx, (512 + 4 * 8)(%esp) - movl %edx, (512 + 4 * 9)(%esp) - movl %edx, (512 + 4 * 10)(%esp) - movl %edx, (512 + 4 * 11)(%esp) - movl %edx, (512 + 4 * 12)(%esp) - movl %edx, (512 + 4 * 13)(%esp) - movl %edx, (512 + 4 * 14)(%esp) - movl %edx, (512 + 4 * 15)(%esp) -# ifdef USE_XSAVE - xsave (%esp) -# else - xsavec (%esp) -# endif - /* Restore the argument for ___tls_get_addr in EAX. */ - movl %ecx, %eax -#endif - call HIDDEN_JUMPTARGET (___tls_get_addr) - /* Get register content back. */ -#ifdef USE_FNSAVE - frstor (%esp) -#elif defined USE_FXSAVE - fxrstor (%esp) -#else - /* Save and retore ___tls_get_addr return value stored in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - xrstor (%esp) - movl %ecx, %eax -#endif -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - mov %ebx, %esp - cfi_def_cfa_register(%esp) - movl -28(%esp), %ebx - cfi_restore(%ebx) -#else - addl $REGISTER_SAVE_AREA, %esp - cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) -#endif +#include "tls-get-addr-wrapper.h" jmp 1b cfi_endproc .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index c080993..c914ca4 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S @@ -22,23 +22,6 @@ #include <features-offsets.h> #include "tlsdesc.h" -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: - - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 - - __tls_get_addr may be called with 4-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. */ -# define DL_STACK_ALIGNMENT 4 -#endif - -/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align - stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) - .text /* This function is used to compute the TP offset for symbols in diff --git a/sysdeps/i386/fpu/e_ilogb.S b/sysdeps/i386/fpu/e_ilogb.S deleted file mode 100644 index f4b792c..0000000 --- a/sysdeps/i386/fpu/e_ilogb.S +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $") - -ENTRY(__ieee754_ilogb) - fldl 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogb) diff --git a/sysdeps/i386/fpu/e_ilogbf.S b/sysdeps/i386/fpu/e_ilogbf.S deleted file mode 100644 index 37298b9..0000000 --- a/sysdeps/i386/fpu/e_ilogbf.S +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $") - -ENTRY(__ieee754_ilogbf) - flds 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogbf) diff --git a/sysdeps/i386/fpu/math_err.c b/sysdeps/i386/fpu/math_err.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/i386/fpu/math_err.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h new file mode 100644 index 0000000..0708e5a --- /dev/null +++ b/sysdeps/i386/tls-get-addr-wrapper.h @@ -0,0 +1,127 @@ +/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#undef REGISTER_SAVE_AREA + +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# ifdef USE_FNSAVE +# error USE_FNSAVE shouldn't be defined +# endif +# ifdef USE_FXSAVE +/* Use fxsave to save all registers. */ +# define REGISTER_SAVE_AREA 512 +# endif +#else +# ifdef USE_FNSAVE +/* Use fnsave to save x87 FPU stack registers. */ +# define REGISTER_SAVE_AREA 108 +# else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save all registers. Add 12 bytes to align the stack + to 16 bytes. */ +# define REGISTER_SAVE_AREA (512 + 12) +# endif +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movl %ebx, 28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + and $-STATE_SAVE_ALIGNMENT, %esp +#endif +#ifdef REGISTER_SAVE_AREA + subl $REGISTER_SAVE_AREA, %esp +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +# endif + /* Allocate stack space of the required size to save the state. */ + LOAD_PIC_REG (cx) + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ + +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +#endif +#ifdef USE_FNSAVE + fnsave (%esp) +#elif defined USE_FXSAVE + fxsave (%esp) +#else + /* Save the argument for ___tls_get_addr in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movl %edx, (512)(%esp) + movl %edx, (512 + 4 * 1)(%esp) + movl %edx, (512 + 4 * 2)(%esp) + movl %edx, (512 + 4 * 3)(%esp) +# endif + movl %edx, (512 + 4 * 4)(%esp) + movl %edx, (512 + 4 * 5)(%esp) + movl %edx, (512 + 4 * 6)(%esp) + movl %edx, (512 + 4 * 7)(%esp) + movl %edx, (512 + 4 * 8)(%esp) + movl %edx, (512 + 4 * 9)(%esp) + movl %edx, (512 + 4 * 10)(%esp) + movl %edx, (512 + 4 * 11)(%esp) + movl %edx, (512 + 4 * 12)(%esp) + movl %edx, (512 + 4 * 13)(%esp) + movl %edx, (512 + 4 * 14)(%esp) + movl %edx, (512 + 4 * 15)(%esp) +# ifdef USE_XSAVE + xsave (%esp) +# else + xsavec (%esp) +# endif + /* Restore the argument for ___tls_get_addr in EAX. */ + movl %ecx, %eax +#endif + call ___tls_get_addr_internal + /* Get register content back. */ +#ifdef USE_FNSAVE + frstor (%esp) +#elif defined USE_FXSAVE + fxrstor (%esp) +#else + /* Save and retore ___tls_get_addr return value stored in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor (%esp) + movl %ecx, %eax +#endif +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl 28(%esp), %ebx + cfi_restore(%ebx) +#else + addl $REGISTER_SAVE_AREA, %esp + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S new file mode 100644 index 0000000..7d143d8 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.S @@ -0,0 +1,57 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include <cpu-features-offsets.h> +#include <features-offsets.h> + + .text +#ifdef SHARED +# define USE_FNSAVE +# define MINIMUM_ALIGNMENT 4 +# define STATE_SAVE_ALIGNMENT 4 +# define ___tls_get_addr _____tls_get_addr_fnsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef MINIMUM_ALIGNMENT +# undef USE_FNSAVE + +# define MINIMUM_ALIGNMENT 16 + +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define ___tls_get_addr _____tls_get_addr_fxsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsavec +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVEC +#endif /* SHARED */ diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h new file mode 100644 index 0000000..1825798 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.h @@ -0,0 +1,42 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + .hidden ___tls_get_addr + .global ___tls_get_addr + .type ___tls_get_addr,@function + + /* This function is a wrapper of ___tls_get_addr_internal to + preserve caller-saved vector registers. */ + + cfi_startproc + .align 16 +___tls_get_addr: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + movl %ecx, 20(%esp) + movl %edx, 24(%esp) +#include "tls-get-addr-wrapper.h" + movl 20(%esp), %ecx + movl 24(%esp), %edx + addl $32, %esp + cfi_adjust_cfa_offset (-32) + ret + cfi_endproc + .size ___tls_get_addr, .-___tls_get_addr diff --git a/sysdeps/ieee754/dbl-64/e_ilogb.c b/sysdeps/ieee754/dbl-64/e_ilogb.c index 1e338a5..1ea2f23 100644 --- a/sysdeps/ieee754/dbl-64/e_ilogb.c +++ b/sysdeps/ieee754/dbl-64/e_ilogb.c @@ -1,63 +1 @@ -/* @(#)s_ilogb.c 5.1 93/09/24 */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_ilogb.c,v 1.9 1995/05/10 20:47:28 jtc Exp $"; -#endif - -/* ilogb(double x) - * return the binary exponent of non-zero x - * ilogb(0) = FP_ILOGB0 - * ilogb(NaN) = FP_ILOGBNAN (no signal is raised) - * ilogb(+-Inf) = INT_MAX (no signal is raised) - */ - -#include <limits.h> -#include <math.h> -#include <math_private.h> - -int -__ieee754_ilogb (double x) -{ - int32_t hx, lx, ix; - - GET_HIGH_WORD (hx, x); - hx &= 0x7fffffff; - if (hx < 0x00100000) - { - GET_LOW_WORD (lx, x); - if ((hx | lx) == 0) - return FP_ILOGB0; /* ilogb(0) = FP_ILOGB0 */ - else /* subnormal x */ - if (hx == 0) - { - for (ix = -1043; lx > 0; lx <<= 1) - ix -= 1; - } - else - { - for (ix = -1022, hx <<= 11; hx > 0; hx <<= 1) - ix -= 1; - } - return ix; - } - else if (hx < 0x7ff00000) - return (hx >> 20) - 1023; - else if (FP_ILOGBNAN != INT_MAX) - { - /* ISO C99 requires ilogb(+-Inf) == INT_MAX. */ - GET_LOW_WORD (lx, x); - if (((hx ^ 0x7ff00000) | lx) == 0) - return INT_MAX; - } - return FP_ILOGBNAN; -} +/* ilogb is implemented at w_ilogb.c */ diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h index 3382e38..d9288c4 100644 --- a/sysdeps/ieee754/dbl-64/math_config.h +++ b/sysdeps/ieee754/dbl-64/math_config.h @@ -109,6 +109,7 @@ issignaling_inline (double x) #define BIT_WIDTH 64 #define MANTISSA_WIDTH 52 #define EXPONENT_WIDTH 11 +#define EXPONENT_BIAS 1023 #define MANTISSA_MASK UINT64_C(0x000fffffffffffff) #define EXPONENT_MASK UINT64_C(0x7ff0000000000000) #define EXP_MANT_MASK UINT64_C(0x7fffffffffffffff) @@ -121,12 +122,24 @@ is_nan (uint64_t x) return (x & EXP_MANT_MASK) > EXPONENT_MASK; } +static inline bool +is_inf (uint64_t x) +{ + return (x << 1) == (EXPONENT_MASK << 1); +} + static inline uint64_t get_mantissa (uint64_t x) { return x & MANTISSA_MASK; } +static inline int +get_exponent (uint64_t x) +{ + return (int)((x >> MANTISSA_WIDTH & 0x7ff) - EXPONENT_BIAS); +} + /* Convert integer number X, unbiased exponent EP, and sign S to double: result = X * 2^(EP+1 - exponent_bias) @@ -164,6 +177,8 @@ attribute_hidden double __math_divzero (uint32_t); /* Invalid input unless it is a quiet NaN. */ attribute_hidden double __math_invalid (double); +attribute_hidden int __math_invalid_i (int); +attribute_hidden long int __math_invalid_li (long int); /* Error handling using output checking, only for errno setting. */ diff --git a/sysdeps/ieee754/dbl-64/math_err.c b/sysdeps/ieee754/dbl-64/math_err.c index 4a07fd5..b8c645a 100644 --- a/sysdeps/ieee754/dbl-64/math_err.c +++ b/sysdeps/ieee754/dbl-64/math_err.c @@ -29,8 +29,24 @@ with_errno (double y, int e) errno = e; return y; } + +NOINLINE static int +with_errno_i (int y, int e) +{ + errno = e; + return y; +} + +NOINLINE static long int +with_errno_li (long int y, int e) +{ + errno = e; + return y; +} #else #define with_errno(x, e) (x) +#define with_errno_i(x, e) (x) +#define with_errno_li(x, e) (x) #endif attribute_hidden double @@ -83,6 +99,22 @@ __math_invalid (double x) return isnan (x) ? y : with_errno (y, EDOM); } +attribute_hidden int +__math_invalid_i (int r) +{ + double y = 0.0 / 0.0; + math_force_eval (y); + return with_errno_i (r, EDOM); +} + +attribute_hidden long int +__math_invalid_li (long int r) +{ + double y = 0.0 / 0.0; + math_force_eval (y); + return with_errno_li (r, EDOM); +} + /* Check result and set errno if necessary. */ attribute_hidden double diff --git a/sysdeps/ieee754/dbl-64/s_modf.c b/sysdeps/ieee754/dbl-64/s_modf.c index 0de2084..90cd8e8 100644 --- a/sysdeps/ieee754/dbl-64/s_modf.c +++ b/sysdeps/ieee754/dbl-64/s_modf.c @@ -1,63 +1,68 @@ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +/* Extract signed integral and fractional values. + Copyright (C) 1993-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. -/* - * modf(double x, double *iptr) - * return fraction part of x, and return x's integral part in *iptr. - * Method: - * Bit twiddling. - * - * Exception: - * No exception. - */ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ #include <math.h> -#include <math_private.h> #include <libm-alias-double.h> -#include <stdint.h> - -static const double one = 1.0; +#include "math_config.h" +#include <math-use-builtins-trunc.h> double -__modf(double x, double *iptr) +__modf (double x, double *iptr) { - int64_t i0; - int32_t j0; - EXTRACT_WORDS64(i0,x); - j0 = ((i0>>52)&0x7ff)-0x3ff; /* exponent of x */ - if(j0<52) { /* integer part in x */ - if(j0<0) { /* |x|<1 */ - /* *iptr = +-0 */ - INSERT_WORDS64(*iptr,i0&UINT64_C(0x8000000000000000)); - return x; - } else { - uint64_t i = UINT64_C(0x000fffffffffffff)>>j0; - if((i0&i)==0) { /* x is integral */ - *iptr = x; - /* return +-0 */ - INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000)); - return x; - } else { - INSERT_WORDS64(*iptr,i0&(~i)); - return x - *iptr; - } - } - } else { /* no fraction part */ - *iptr = x*one; - /* We must handle NaNs separately. */ - if (j0 == 0x400 && (i0 & UINT64_C(0xfffffffffffff))) - return x*one; - INSERT_WORDS64(x,i0&UINT64_C(0x8000000000000000)); /* return +-0 */ - return x; + uint64_t t = asuint64 (x); +#if USE_TRUNC_BUILTIN + if (is_inf (t)) + { + *iptr = x; + return copysign (0.0, x); + } + *iptr = trunc (x); + return copysign (x - *iptr, x); +#else + int e = get_exponent (t); + /* No fraction part. */ + if (e < MANTISSA_WIDTH) + { + if (e < 0) + { + /* |x|<1 -> *iptr = +-0 */ + *iptr = asdouble (t & SIGN_MASK); + return x; + } + + uint64_t i = MANTISSA_MASK >> e; + if ((t & i) == 0) + { + /* x in integral, return +-0 */ + *iptr = x; + return asdouble (t & SIGN_MASK); } + + *iptr = asdouble (t & ~i); + return x - *iptr; + } + + /* Set invalid operation for sNaN. */ + *iptr = x * 1.0; + if ((e == 0x400) && (t & MANTISSA_MASK)) + return *iptr; + return asdouble (t & SIGN_MASK); +#endif } #ifndef __modf libm_alias_double (__modf, modf) diff --git a/sysdeps/ieee754/dbl-64/w_ilogb-impl.h b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h new file mode 100644 index 0000000..c919735 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/w_ilogb-impl.h @@ -0,0 +1,37 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +static inline RET_TYPE +IMPL_NAME (double x) +{ + uint64_t ux = asuint64 (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (__glibc_unlikely (ex == 0)) /* zero or subnormal */ + { + /* Clear sign and exponent */ + ux <<= 12; + if (ux == 0) + return RET_INVALID (RET_LOGB0); + /* subnormal */ + return (RET_TYPE)-1023 - stdc_leading_zeros (ux); + } + if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH)) + /* NaN or Inf */ + return RET_INVALID (ux << 12 ? RET_LOGBNAN : RET_LOGMAX); + return ex - 1023; +} diff --git a/sysdeps/ieee754/dbl-64/w_ilogb.c b/sysdeps/ieee754/dbl-64/w_ilogb.c new file mode 100644 index 0000000..e460f14 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/w_ilogb.c @@ -0,0 +1,52 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include <libm-alias-double.h> +#include "math_config.h" + +#ifdef DEF_AS_LLOGB +# define DECL_NAME __llogb +# define FUNC_NAME llogb +# define RET_TYPE long int +# define RET_LOGB0 FP_LLOGB0 +# define RET_LOGBNAN FP_LLOGBNAN +# define RET_LOGMAX LONG_MAX +# define RET_INVALID __math_invalid_li +#else +# define DECL_NAME __ilogb +# define FUNC_NAME ilogb +# define RET_TYPE int +# define RET_LOGB0 FP_ILOGB0 +# define RET_LOGBNAN FP_ILOGBNAN +# define RET_LOGMAX INT_MAX +# define RET_INVALID __math_invalid_i +#endif +#define __IMPL_NAME(x,y) x ## _ ## y +#define _IMPL_NAME(x,y) __IMPL_NAME(x,y) +#define IMPL_NAME _IMPL_NAME(FUNC_NAME, impl) +#include <w_ilogb-impl.h> + +RET_TYPE +DECL_NAME (double x) +{ + return IMPL_NAME (x); +} +libm_alias_double (DECL_NAME, FUNC_NAME) diff --git a/sysdeps/ieee754/dbl-64/w_llogb.c b/sysdeps/ieee754/dbl-64/w_llogb.c new file mode 100644 index 0000000..c984cd15 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/w_llogb.c @@ -0,0 +1,2 @@ +#define DEF_AS_LLOGB +#include "w_ilogb.c" diff --git a/sysdeps/ieee754/flt-32/e_ilogbf.c b/sysdeps/ieee754/flt-32/e_ilogbf.c index db24012..a27fb94 100644 --- a/sysdeps/ieee754/flt-32/e_ilogbf.c +++ b/sysdeps/ieee754/flt-32/e_ilogbf.c @@ -1,43 +1 @@ -/* s_ilogbf.c -- float version of s_ilogb.c. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#if defined(LIBM_SCCS) && !defined(lint) -static char rcsid[] = "$NetBSD: s_ilogbf.c,v 1.4 1995/05/10 20:47:31 jtc Exp $"; -#endif - -#include <limits.h> -#include <math.h> -#include <math_private.h> - -int __ieee754_ilogbf(float x) -{ - int32_t hx,ix; - - GET_FLOAT_WORD(hx,x); - hx &= 0x7fffffff; - if(hx<0x00800000) { - if(hx==0) - return FP_ILOGB0; /* ilogb(0) = FP_ILOGB0 */ - else /* subnormal x */ - for (ix = -126,hx<<=8; hx>0; hx<<=1) ix -=1; - return ix; - } - else if (hx<0x7f800000) return (hx>>23)-127; - else if (FP_ILOGBNAN != INT_MAX) { - /* ISO C99 requires ilogbf(+-Inf) == INT_MAX. */ - if (hx==0x7f800000) - return INT_MAX; - } - return FP_ILOGBNAN; -} +/* ilogbf is implemented at w_ilogbf.c */ diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 8d9c8ee..33ea631 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -165,6 +165,7 @@ issignalingf_inline (float x) #define BIT_WIDTH 32 #define MANTISSA_WIDTH 23 #define EXPONENT_WIDTH 8 +#define EXPONENT_BIAS 127 #define MANTISSA_MASK 0x007fffff #define EXPONENT_MASK 0x7f800000 #define EXP_MANT_MASK 0x7fffffff @@ -177,12 +178,24 @@ is_nan (uint32_t x) return (x & EXP_MANT_MASK) > EXPONENT_MASK; } +static inline bool +is_inf (uint32_t x) +{ + return (x << 1) == (EXPONENT_MASK << 1); +} + static inline uint32_t get_mantissa (uint32_t x) { return x & MANTISSA_MASK; } +static inline int +get_exponent (uint32_t x) +{ + return (int)((x >> MANTISSA_WIDTH & 0xff) - EXPONENT_BIAS); +} + /* Convert integer number X, unbiased exponent EP, and sign S to double: result = X * 2^(EP+1 - exponent_bias) @@ -208,6 +221,8 @@ attribute_hidden float __math_uflowf (uint32_t); attribute_hidden float __math_may_uflowf (uint32_t); attribute_hidden float __math_divzerof (uint32_t); attribute_hidden float __math_invalidf (float); +attribute_hidden int __math_invalidf_i (int); +attribute_hidden long int __math_invalidf_li (long int); attribute_hidden float __math_edomf (float x); /* Shared between expf, exp2f, exp10f, and powf. */ diff --git a/sysdeps/ieee754/flt-32/math_errf.c b/sysdeps/ieee754/flt-32/math_errf.c index edcc4c0..244e38a 100644 --- a/sysdeps/ieee754/flt-32/math_errf.c +++ b/sysdeps/ieee754/flt-32/math_errf.c @@ -16,6 +16,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ +#include <math-barriers.h> #include "math_config.h" #if WANT_ERRNO @@ -27,8 +28,24 @@ with_errnof (float y, int e) errno = e; return y; } + +NOINLINE static int +with_errnof_i (int y, int e) +{ + errno = e; + return y; +} + +NOINLINE static long int +with_errnof_li (long int y, int e) +{ + errno = e; + return y; +} #else # define with_errnof(x, e) (x) +# define with_errnof_i(x, x) (x) +# define with_errnof_li(x, x) (x) #endif attribute_hidden float @@ -80,3 +97,19 @@ __math_invalidf (float x) float y = (x - x) / (x - x); return isnan (x) ? y : with_errnof (y, EDOM); } + +attribute_hidden int +__math_invalidf_i (int x) +{ + float y = 0.0f / 0.0f; + math_force_eval (y); + return with_errnof_i (x, EDOM); +} + +attribute_hidden long int +__math_invalidf_li (long int x) +{ + float y = 0.0f / 0.0f; + math_force_eval (y); + return with_errnof_li (x, EDOM); +} diff --git a/sysdeps/ieee754/flt-32/s_modff.c b/sysdeps/ieee754/flt-32/s_modff.c index ad2e91d..965136b 100644 --- a/sysdeps/ieee754/flt-32/s_modff.c +++ b/sysdeps/ieee754/flt-32/s_modff.c @@ -1,54 +1,69 @@ -/* s_modff.c -- float version of s_modf.c. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +/* Extract signed integral and fractional values. + Copyright (C) 1993-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ #include <math.h> -#include <math_private.h> #include <libm-alias-float.h> - -static const float one = 1.0; +#include "math_config.h" +#include <math-use-builtins-trunc.h> float -__modff(float x, float *iptr) +__modff (float x, float *iptr) { - int32_t i0,j0; - uint32_t i; - GET_FLOAT_WORD(i0,x); - j0 = ((i0>>23)&0xff)-0x7f; /* exponent of x */ - if(__builtin_expect(j0<23, 1)) { /* integer part in x */ - if(j0<0) { /* |x|<1 */ - SET_FLOAT_WORD(*iptr,i0&0x80000000); /* *iptr = +-0 */ - return x; - } else { - i = (0x007fffff)>>j0; - if((i0&i)==0) { /* x is integral */ - uint32_t ix; - *iptr = x; - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(x,ix&0x80000000); /* return +-0 */ - return x; - } else { - SET_FLOAT_WORD(*iptr,i0&(~i)); - return x - *iptr; - } - } - } else { /* no fraction part */ - *iptr = x*one; - /* We must handle NaNs separately. */ - if (j0 == 0x80 && (i0 & 0x7fffff)) - return x*one; - SET_FLOAT_WORD(x,i0&0x80000000); /* return +-0 */ - return x; + uint32_t t = asuint (x); +#if USE_TRUNCF_BUILTIN + if (is_inf (t)) + { + *iptr = x; + return copysignf (0.0, x); + } + *iptr = truncf (x); + return copysignf (x - *iptr, x); +#else + int e = get_exponent (t); + /* No fraction part. */ + if (e < MANTISSA_WIDTH) + { + if (e < 0) + { + /* |x|<1 -> *iptr = +-0 */ + *iptr = asfloat (t & SIGN_MASK); + return x; } + + uint32_t i = MANTISSA_MASK >> e; + if ((t & i) == 0) + { + /* x in integral, return +-0 */ + *iptr = x; + return asfloat (t & SIGN_MASK); + } + + *iptr = asfloat (t & ~i); + return x - *iptr; + } + + /* Set invalid operation for sNaN. */ + *iptr = x * 1.0f; + if ((e == 0x80) && (t & MANTISSA_MASK)) + return *iptr; + return asfloat (t & SIGN_MASK); +#endif } +#ifndef __modff libm_alias_float (__modf, modf) +#endif diff --git a/sysdeps/ieee754/flt-32/w_ilogbf-impl.h b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h new file mode 100644 index 0000000..5aa8bf0 --- /dev/null +++ b/sysdeps/ieee754/flt-32/w_ilogbf-impl.h @@ -0,0 +1,38 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +static inline RET_TYPE +IMPL_NAME (float x) +{ + uint32_t ux = asuint (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (__glibc_unlikely (ex == 0)) + { + /* Zero or subnormal. + Clear sign and exponent. */ + ux <<= 1 + EXPONENT_WIDTH; + if (ux == 0) + return RET_INVALID (RET_LOGB0); + /* subnormal */ + return (RET_TYPE)-127 - stdc_leading_zeros (ux); + } + if (__glibc_unlikely (ex == EXPONENT_MASK >> MANTISSA_WIDTH)) + /* NaN or Inf */ + return RET_INVALID (ux << (1 + EXPONENT_WIDTH) ? RET_LOGBNAN : RET_LOGMAX); + return ex - 127; +} diff --git a/sysdeps/ieee754/flt-32/w_ilogbf.c b/sysdeps/ieee754/flt-32/w_ilogbf.c new file mode 100644 index 0000000..4e2a707 --- /dev/null +++ b/sysdeps/ieee754/flt-32/w_ilogbf.c @@ -0,0 +1,53 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include <libm-alias-float.h> +#include <math-type-macros-float.h> +#include "math_config.h" + +#ifdef DEF_AS_LLOGBF +# define DECL_NAME __llogb +# define FUNC_NAME llogb +# define RET_TYPE long int +# define RET_LOGB0 FP_LLOGB0 +# define RET_LOGBNAN FP_LLOGBNAN +# define RET_LOGMAX LONG_MAX +# define RET_INVALID __math_invalidf_li +#else +# define DECL_NAME __ilogb +# define FUNC_NAME ilogb +# define RET_TYPE int +# define RET_LOGB0 FP_ILOGB0 +# define RET_LOGBNAN FP_ILOGBNAN +# define RET_LOGMAX INT_MAX +# define RET_INVALID __math_invalidf_i +#endif +#define __IMPL_NAME(x,y) x ## _ ## y +#define _IMPL_NAME(x,y) __IMPL_NAME(x,y) +#define IMPL_NAME _IMPL_NAME(FUNC_NAME, impl) +#include <w_ilogbf-impl.h> + +RET_TYPE +M_DECL_FUNC (DECL_NAME) (float x) +{ + return IMPL_NAME (x); +} +libm_alias_float (DECL_NAME, FUNC_NAME); diff --git a/sysdeps/ieee754/flt-32/w_llogbf.c b/sysdeps/ieee754/flt-32/w_llogbf.c new file mode 100644 index 0000000..8676434 --- /dev/null +++ b/sysdeps/ieee754/flt-32/w_llogbf.c @@ -0,0 +1,2 @@ +#define DEF_AS_LLOGBF +#include "w_ilogbf.c" diff --git a/sysdeps/ieee754/ldbl-128/Makefile b/sysdeps/ieee754/ldbl-128/Makefile index 5476a55..e666bdc 100644 --- a/sysdeps/ieee754/ldbl-128/Makefile +++ b/sysdeps/ieee754/ldbl-128/Makefile @@ -83,7 +83,7 @@ CFLAGS-w_j1l.c += -fno-builtin-j1f64x -fno-builtin-j1f128 CFLAGS-w_jnl.c += -fno-builtin-jnf64x -fno-builtin-jnf128 CFLAGS-s_ldexpl.c += -fno-builtin-ldexpf64x -fno-builtin-ldexpf128 CFLAGS-w_lgammal.c += -fno-builtin-lgammaf64x -fno-builtin-lgammaf128 -CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r +CFLAGS-w_lgammal_r.c += -fno-builtin-lgammaf64x_r -fno-builtin-lgammaf128_r CFLAGS-w_llogbl.c += -fno-builtin-llogbf64x -fno-builtin-llogbf128 CFLAGS-s_llrintl.c += -fno-builtin-llrintf64x -fno-builtin-llrintf128 CFLAGS-s_llroundl.c += -fno-builtin-llroundf64x -fno-builtin-llroundf128 diff --git a/sysdeps/ieee754/ldbl-128ibm-compat/Versions b/sysdeps/ieee754/ldbl-128ibm-compat/Versions index 29a3869..ae4bd5b 100644 --- a/sysdeps/ieee754/ldbl-128ibm-compat/Versions +++ b/sysdeps/ieee754/ldbl-128ibm-compat/Versions @@ -157,6 +157,7 @@ libm { __compoundnieee128; __pownieee128; __powrieee128; + __rootnieee128; __rsqrtieee128; } } diff --git a/sysdeps/ieee754/ldbl-opt/Makefile b/sysdeps/ieee754/ldbl-opt/Makefile index 72369eb..ef7da1f 100644 --- a/sysdeps/ieee754/ldbl-opt/Makefile +++ b/sysdeps/ieee754/ldbl-opt/Makefile @@ -181,6 +181,7 @@ libnldbl-calls = \ remainder \ remquo \ rint \ + rootn \ round \ roundeven \ rsqrt \ @@ -265,7 +266,7 @@ extra-objs += $(addsuffix .oS, $(libnldbl-routines)) CFLAGS-nldbl-acos.c = -fno-builtin-acosl CFLAGS-nldbl-acosh.c = -fno-builtin-acoshl -CFLAGS-nldbl-acospi.c = -fno-builtin-acospi +CFLAGS-nldbl-acospi.c = -fno-builtin-acospil CFLAGS-nldbl-asin.c = -fno-builtin-asinl CFLAGS-nldbl-asinh.c = -fno-builtin-asinhl CFLAGS-nldbl-asinpi.c = -fno-builtin-asinpil @@ -296,7 +297,7 @@ CFLAGS-nldbl-conj.c = -fno-builtin-conjl CFLAGS-nldbl-copysign.c = -fno-builtin-copysignl CFLAGS-nldbl-cos.c = -fno-builtin-cosl CFLAGS-nldbl-cosh.c = -fno-builtin-coshl -CFLAGS-nldbl-cospi.c = -fno-builtin-cospi +CFLAGS-nldbl-cospi.c = -fno-builtin-cospil CFLAGS-nldbl-cpow.c = -fno-builtin-cpowl CFLAGS-nldbl-cproj.c = -fno-builtin-cprojl CFLAGS-nldbl-creal.c = -fno-builtin-creall @@ -384,6 +385,7 @@ CFLAGS-nldbl-powr.c = -fno-builtin-powrl CFLAGS-nldbl-remainder.c = -fno-builtin-remainderl -fno-builtin-dreml CFLAGS-nldbl-remquo.c = -fno-builtin-remquol CFLAGS-nldbl-rint.c = -fno-builtin-rintl +CFLAGS-nldbl-rootn.c = -fno-builtin-rootnl CFLAGS-nldbl-round.c = -fno-builtin-roundl CFLAGS-nldbl-roundeven.c = -fno-builtin-roundevenl CFLAGS-nldbl-rsqrt.c = -fno-builtin-rsqrtl @@ -396,11 +398,11 @@ CFLAGS-nldbl-significand.c = -fno-builtin-significandl CFLAGS-nldbl-sin.c = -fno-builtin-sinl CFLAGS-nldbl-sincos.c = -fno-builtin-sincosl CFLAGS-nldbl-sinh.c = -fno-builtin-sinhl -CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpi +CFLAGS-nldbl-sinpi.c = -fno-builtin-sinpil CFLAGS-nldbl-sqrt.c = -fno-builtin-sqrtl CFLAGS-nldbl-tan.c = -fno-builtin-tanl CFLAGS-nldbl-tanh.c = -fno-builtin-tanhl -CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpi +CFLAGS-nldbl-tanpi.c = -fno-builtin-tanpil CFLAGS-nldbl-tgamma.c = -fno-builtin-tgammal CFLAGS-nldbl-totalorder.c = -fno-builtin-totalorderl CFLAGS-nldbl-totalordermag.c = -fno-builtin-totalordermagl diff --git a/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c new file mode 100644 index 0000000..fb0d860 --- /dev/null +++ b/sysdeps/ieee754/ldbl-opt/nldbl-rootn.c @@ -0,0 +1,8 @@ +#include "nldbl-compat.h" + +double +attribute_hidden +rootnl (double x, long long int y) +{ + return rootn (x, y); +} diff --git a/sysdeps/loongarch/fpu/e_ilogbf.c b/sysdeps/loongarch/fpu/e_ilogbf.c index adced63..a27fb94 100644 --- a/sysdeps/loongarch/fpu/e_ilogbf.c +++ b/sysdeps/loongarch/fpu/e_ilogbf.c @@ -1,39 +1 @@ -/* __ieee754_ilogbf(). LoongArch version. - Copyright (C) 2022-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#define NO_MATH_REDIRECT -#include <math.h> -#include <fpu_control.h> - -int -__ieee754_ilogbf (float x) -{ - int x_cond; - asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x)); - - if (__glibc_unlikely (x_cond & _FCLASS_ZERO)) - return FP_ILOGB0; - else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF))) - return FP_ILOGBNAN; - else - { - asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x)); - asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x)); - return x; - } -} +/* ilogbf is implemented at w_ilogbf.c */ diff --git a/sysdeps/loongarch/fpu/e_ilogb.c b/sysdeps/loongarch/fpu/w_ilogb-impl.h index f21fa5c..1905373 100644 --- a/sysdeps/loongarch/fpu/e_ilogb.c +++ b/sysdeps/loongarch/fpu/w_ilogb-impl.h @@ -1,4 +1,4 @@ -/* __ieee754_ilogb(). LoongArch version. +/* Get integer exponent of a floating-point value. LoongArch version. Copyright (C) 2022-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,20 +16,18 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define NO_MATH_REDIRECT -#include <math.h> #include <fpu_control.h> -int -__ieee754_ilogb (double x) +static inline RET_TYPE +IMPL_NAME (double x) { int x_cond; asm volatile ("fclass.d \t%0, %1" : "=f" (x_cond) : "f" (x)); if (__glibc_unlikely (x_cond & _FCLASS_ZERO)) - return FP_ILOGB0; + return RET_INVALID (RET_LOGB0); else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF))) - return FP_ILOGBNAN; + return RET_INVALID (RET_LOGBNAN); else { asm volatile ("fabs.d \t%0, %1" : "=f" (x) : "f" (x)); diff --git a/sysdeps/loongarch/fpu/w_ilogbf-impl.h b/sysdeps/loongarch/fpu/w_ilogbf-impl.h new file mode 100644 index 0000000..9cb4172 --- /dev/null +++ b/sysdeps/loongarch/fpu/w_ilogbf-impl.h @@ -0,0 +1,37 @@ +/* Get integer exponent of a floating-point value. LoongArch version. + Copyright (C) 2022-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <fpu_control.h> + +static inline RET_TYPE +IMPL_NAME (float x) +{ + int x_cond; + asm volatile ("fclass.s \t%0, %1" : "=f" (x_cond) : "f" (x)); + + if (__glibc_unlikely (x_cond & _FCLASS_ZERO)) + return RET_INVALID (RET_LOGB0); + else if (__glibc_unlikely (x_cond & ( _FCLASS_NAN | _FCLASS_INF))) + return RET_INVALID (RET_LOGBNAN); + else + { + asm volatile ("fabs.s \t%0, %1" : "=f" (x) : "f" (x)); + asm volatile ("flogb.s \t%0, %1" : "=f" (x) : "f" (x)); + return x; + } +} diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure index 0d1e9ed..6726ab8 100644 --- a/sysdeps/loongarch/preconfigure +++ b/sysdeps/loongarch/preconfigure @@ -44,6 +44,7 @@ loongarch*) base_machine=loongarch mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac index df07dbf..5640226 100644 --- a/sysdeps/loongarch/preconfigure.ac +++ b/sysdeps/loongarch/preconfigure.ac @@ -42,6 +42,7 @@ loongarch*) base_machine=loongarch mtls_descriptor=desc + mtls_traditional=trad ;; esac diff --git a/sysdeps/m68k/m680x0/fpu/math_err.c b/sysdeps/m68k/m680x0/fpu/math_err.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/m68k/m680x0/fpu/math_err.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/m68k/m680x0/w_ilogb.c b/sysdeps/m68k/m680x0/w_ilogb.c new file mode 100644 index 0000000..9c26217 --- /dev/null +++ b/sysdeps/m68k/m680x0/w_ilogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/m68k/m680x0/w_ilogbf.c b/sysdeps/m68k/m680x0/w_ilogbf.c new file mode 100644 index 0000000..047ad4b --- /dev/null +++ b/sysdeps/m68k/m680x0/w_ilogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/m68k/m680x0/w_llogb.c b/sysdeps/m68k/m680x0/w_llogb.c new file mode 100644 index 0000000..5e8891a --- /dev/null +++ b/sysdeps/m68k/m680x0/w_llogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_llogb_template.c> diff --git a/sysdeps/m68k/m680x0/w_llogbf.c b/sysdeps/m68k/m680x0/w_llogbf.c new file mode 100644 index 0000000..edb7e9a --- /dev/null +++ b/sysdeps/m68k/m680x0/w_llogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_llogb_template.c> diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile index 994de00..32bba61 100644 --- a/sysdeps/mach/hurd/Makefile +++ b/sysdeps/mach/hurd/Makefile @@ -311,9 +311,6 @@ endif ifeq ($(subdir),htl) tests-unsupported += tst-basic7 endif -ifeq ($(subdir),io) -tests-unsupported += test-lfs -endif ifeq ($(subdir),libio) tests-unsupported += tst-asprintf-null endif @@ -323,6 +320,7 @@ tests-unsupported += tst-malloc-thread-fail-malloc-check tests-unsupported += tst-malloc-thread-fail-mcheck tests-unsupported += tst-malloc-thread-fail-malloc-hugetlb1 tests-unsupported += tst-malloc-thread-fail-malloc-hugetlb2 +tests-unsupported += tst-malloc-thread-fail-malloc-largetcache tests-unsupported += tst-dynarray-fail endif ifeq ($(subdir),misc) @@ -339,7 +337,8 @@ ifeq ($(subdir),stdlib) tests-unsupported += test-bz22786 tst-strtod-overflow # pthread_cleanup_combined_push/pthread_cleanup_combined_pop requires cleanup # support (BZ 32058). -test-xfail-tst-qsortx7 = yes +test-xfail-tst-qsort7-mem = yes +test-xfail-tst-qsortx7-mem = yes endif ifeq ($(subdir),timezone) tests-unsupported += tst-tzset diff --git a/sysdeps/mach/hurd/bits/ioctls.h b/sysdeps/mach/hurd/bits/ioctls.h index f01316d..faf1373 100644 --- a/sysdeps/mach/hurd/bits/ioctls.h +++ b/sysdeps/mach/hurd/bits/ioctls.h @@ -324,15 +324,8 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 }; From 4.4 <sys/ioctl_compat.h>. */ #ifdef __USE_MISC -#ifdef USE_OLD_TTY -# undef TIOCGETD -# define TIOCGETD _IOR('t', 0, int) /* get line discipline */ -# undef TIOCSETD -# define TIOCSETD _IOW('t', 1, int) /* set line discipline */ -#else -# define OTIOCGETD _IOR('t', 0, int) /* get line discipline */ -# define OTIOCSETD _IOW('t', 1, int) /* set line discipline */ -#endif +#define OTIOCGETD _IOR('t', 0, int) /* get line discipline */ +#define OTIOCSETD _IOW('t', 1, int) /* set line discipline */ #define TIOCHPCL _IO('t', 2) /* hang up on last close */ #define TIOCGETP _IOR('t', 8,struct sgttyb)/* get parameters -- gtty */ #define TIOCSETP _IOW('t', 9,struct sgttyb)/* set parameters -- stty */ @@ -411,26 +404,6 @@ enum __ioctl_datum { IOC_8, IOC_16, IOC_32, IOC_64 }; #define OTTYDISC 0 #define NETLDISC 1 #define NTTYDISC 2 - -/* From 4.4 <sys/ttydev.h>. */ -#ifdef USE_OLD_TTY -# define B0 0 -# define B50 1 -# define B75 2 -# define B110 3 -# define B134 4 -# define B150 5 -# define B200 6 -# define B300 7 -# define B600 8 -# define B1200 9 -# define B1800 10 -# define B2400 11 -# define B4800 12 -# define B9600 13 -# define EXTA 14 -# define EXTB 15 -#endif /* USE_OLD_TTY */ #endif #endif /* bits/ioctls.h */ diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S b/sysdeps/mach/hurd/getrandom-internal.h index c9d2f4e..8bd718b 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr-power10.S +++ b/sysdeps/mach/hurd/getrandom-internal.h @@ -1,5 +1,5 @@ -/* Optimized memchr implementation for POWER10/PPC64. - Copyright (C) 2016-2025 Free Software Foundation, Inc. +/* Internal definitions for Hurd getrandom implementation. + Copyright (C) 2024-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,15 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define MEMCHR __memchr_power10 +#ifndef _GETRANDOM_INTERNAL_H +#define _GETRANDOM_INTERNAL_H -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(name,alias) +extern void __mach_init (void); + +static inline void __getrandom_early_init (_Bool initial) +{ + /* getrandom needs RPCs for time etc. */ + __mach_init (); +} -#include <sysdeps/powerpc/powerpc64/le/power10/memchr.S> #endif diff --git a/sysdeps/mach/hurd/i386/libc.abilist b/sysdeps/mach/hurd/i386/libc.abilist index 3e183f5..aac3cb3 100644 --- a/sysdeps/mach/hurd/i386/libc.abilist +++ b/sysdeps/mach/hurd/i386/libc.abilist @@ -28,6 +28,8 @@ GLIBC_2.11 mkostemps F GLIBC_2.11 mkostemps64 F GLIBC_2.11 mkstemps F GLIBC_2.11 mkstemps64 F +GLIBC_2.12 __pthread_get_cleanup_stack F +GLIBC_2.12 __pthread_key_create F GLIBC_2.12 __pthread_self F GLIBC_2.12 pthread_attr_destroy F GLIBC_2.12 pthread_attr_getdetachstate F @@ -70,6 +72,9 @@ GLIBC_2.12 pthread_condattr_setclock F GLIBC_2.12 pthread_condattr_setpshared F GLIBC_2.12 pthread_equal F GLIBC_2.12 pthread_getschedparam F +GLIBC_2.12 pthread_getspecific F +GLIBC_2.12 pthread_key_create F +GLIBC_2.12 pthread_key_delete F GLIBC_2.12 pthread_mutex_destroy F GLIBC_2.12 pthread_mutex_getprioceiling F GLIBC_2.12 pthread_mutex_init F @@ -106,6 +111,7 @@ GLIBC_2.12 pthread_self F GLIBC_2.12 pthread_setcancelstate F GLIBC_2.12 pthread_setcanceltype F GLIBC_2.12 pthread_setschedparam F +GLIBC_2.12 pthread_setspecific F GLIBC_2.12 pthread_sigmask F GLIBC_2.13 __fentry__ F GLIBC_2.14 syncfs F @@ -2586,6 +2592,11 @@ GLIBC_2.41 pthread_mutexattr_settype F GLIBC_2.41 pthread_sigmask F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetobaud F GLIBC_2.42 pthread_barrier_destroy F GLIBC_2.42 pthread_barrier_init F GLIBC_2.42 pthread_barrier_wait F @@ -2593,6 +2604,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F GLIBC_2.42 pthread_barrierattr_getpshared F GLIBC_2.42 pthread_barrierattr_init F GLIBC_2.42 pthread_barrierattr_setpshared F +GLIBC_2.42 pthread_getspecific F +GLIBC_2.42 pthread_key_create F +GLIBC_2.42 pthread_key_delete F GLIBC_2.42 pthread_mutex_consistent F GLIBC_2.42 pthread_mutex_consistent_np F GLIBC_2.42 pthread_mutex_getprioceiling F @@ -2614,6 +2628,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F GLIBC_2.42 pthread_rwlockattr_getpshared F GLIBC_2.42 pthread_rwlockattr_init F GLIBC_2.42 pthread_rwlockattr_setpshared F +GLIBC_2.42 pthread_setspecific F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/mach/hurd/i386/libm.abilist b/sysdeps/mach/hurd/i386/libm.abilist index 6948b42..47d215f 100644 --- a/sysdeps/mach/hurd/i386/libm.abilist +++ b/sysdeps/mach/hurd/i386/libm.abilist @@ -1301,6 +1301,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/mach/hurd/i386/libpthread.abilist b/sysdeps/mach/hurd/i386/libpthread.abilist index b067d37..9d2c4cd 100644 --- a/sysdeps/mach/hurd/i386/libpthread.abilist +++ b/sysdeps/mach/hurd/i386/libpthread.abilist @@ -1,7 +1,5 @@ GLIBC_2.12 __mutex_lock_solid F GLIBC_2.12 __mutex_unlock_solid F -GLIBC_2.12 __pthread_get_cleanup_stack F -GLIBC_2.12 __pthread_key_create F GLIBC_2.12 __pthread_kill F GLIBC_2.12 __pthread_mutex_transfer_np F GLIBC_2.12 __pthread_spin_destroy F @@ -29,15 +27,11 @@ GLIBC_2.12 pthread_exit F GLIBC_2.12 pthread_getattr_np F GLIBC_2.12 pthread_getconcurrency F GLIBC_2.12 pthread_getcpuclockid F -GLIBC_2.12 pthread_getspecific F GLIBC_2.12 pthread_join F -GLIBC_2.12 pthread_key_create F -GLIBC_2.12 pthread_key_delete F GLIBC_2.12 pthread_kill F GLIBC_2.12 pthread_mutex_transfer_np F GLIBC_2.12 pthread_setconcurrency F GLIBC_2.12 pthread_setschedprio F -GLIBC_2.12 pthread_setspecific F GLIBC_2.12 pthread_spin_destroy F GLIBC_2.12 pthread_spin_init F GLIBC_2.12 pthread_spin_lock F diff --git a/sysdeps/mach/hurd/x86_64/libc.abilist b/sysdeps/mach/hurd/x86_64/libc.abilist index 688ee26..8f9d6aa 100644 --- a/sysdeps/mach/hurd/x86_64/libc.abilist +++ b/sysdeps/mach/hurd/x86_64/libc.abilist @@ -392,6 +392,7 @@ GLIBC_2.38 __profile_frequency F GLIBC_2.38 __progname D 0x8 GLIBC_2.38 __progname_full D 0x8 GLIBC_2.38 __pthread_get_cleanup_stack F +GLIBC_2.38 __pthread_key_create F GLIBC_2.38 __pthread_self F GLIBC_2.38 __ptsname_r_chk F GLIBC_2.38 __pwrite64 F @@ -1554,6 +1555,9 @@ GLIBC_2.38 pthread_condattr_setpshared F GLIBC_2.38 pthread_equal F GLIBC_2.38 pthread_exit F GLIBC_2.38 pthread_getschedparam F +GLIBC_2.38 pthread_getspecific F +GLIBC_2.38 pthread_key_create F +GLIBC_2.38 pthread_key_delete F GLIBC_2.38 pthread_mutex_clocklock F GLIBC_2.38 pthread_mutex_consistent F GLIBC_2.38 pthread_mutex_consistent_np F @@ -1599,6 +1603,7 @@ GLIBC_2.38 pthread_self F GLIBC_2.38 pthread_setcancelstate F GLIBC_2.38 pthread_setcanceltype F GLIBC_2.38 pthread_setschedparam F +GLIBC_2.38 pthread_setspecific F GLIBC_2.38 pthread_sigmask F GLIBC_2.38 ptrace F GLIBC_2.38 ptsname F @@ -2269,6 +2274,11 @@ GLIBC_2.41 pthread_mutexattr_settype F GLIBC_2.41 pthread_sigmask F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetobaud F GLIBC_2.42 pthread_barrier_destroy F GLIBC_2.42 pthread_barrier_init F GLIBC_2.42 pthread_barrier_wait F @@ -2276,6 +2286,9 @@ GLIBC_2.42 pthread_barrierattr_destroy F GLIBC_2.42 pthread_barrierattr_getpshared F GLIBC_2.42 pthread_barrierattr_init F GLIBC_2.42 pthread_barrierattr_setpshared F +GLIBC_2.42 pthread_getspecific F +GLIBC_2.42 pthread_key_create F +GLIBC_2.42 pthread_key_delete F GLIBC_2.42 pthread_mutex_consistent F GLIBC_2.42 pthread_mutex_consistent_np F GLIBC_2.42 pthread_mutex_getprioceiling F @@ -2297,6 +2310,7 @@ GLIBC_2.42 pthread_rwlockattr_destroy F GLIBC_2.42 pthread_rwlockattr_getpshared F GLIBC_2.42 pthread_rwlockattr_init F GLIBC_2.42 pthread_rwlockattr_setpshared F +GLIBC_2.42 pthread_setspecific F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/mach/hurd/x86_64/libm.abilist b/sysdeps/mach/hurd/x86_64/libm.abilist index 4810dfb..52c9d56 100644 --- a/sysdeps/mach/hurd/x86_64/libm.abilist +++ b/sysdeps/mach/hurd/x86_64/libm.abilist @@ -1158,6 +1158,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/mach/hurd/x86_64/libpthread.abilist b/sysdeps/mach/hurd/x86_64/libpthread.abilist index 6b8acec..81d355a 100644 --- a/sysdeps/mach/hurd/x86_64/libpthread.abilist +++ b/sysdeps/mach/hurd/x86_64/libpthread.abilist @@ -5,8 +5,6 @@ GLIBC_2.38 __errno_location F GLIBC_2.38 __h_errno_location F GLIBC_2.38 __mutex_lock_solid F GLIBC_2.38 __mutex_unlock_solid F -GLIBC_2.38 __pthread_get_cleanup_stack F -GLIBC_2.38 __pthread_key_create F GLIBC_2.38 __pthread_kill F GLIBC_2.38 __pthread_mutex_transfer_np F GLIBC_2.38 __pthread_spin_destroy F @@ -47,17 +45,13 @@ GLIBC_2.38 pthread_exit F GLIBC_2.38 pthread_getattr_np F GLIBC_2.38 pthread_getconcurrency F GLIBC_2.38 pthread_getcpuclockid F -GLIBC_2.38 pthread_getspecific F GLIBC_2.38 pthread_hurd_cond_timedwait_np F GLIBC_2.38 pthread_hurd_cond_wait_np F GLIBC_2.38 pthread_join F -GLIBC_2.38 pthread_key_create F -GLIBC_2.38 pthread_key_delete F GLIBC_2.38 pthread_kill F GLIBC_2.38 pthread_mutex_transfer_np F GLIBC_2.38 pthread_setconcurrency F GLIBC_2.38 pthread_setschedprio F -GLIBC_2.38 pthread_setspecific F GLIBC_2.38 pthread_spin_destroy F GLIBC_2.38 pthread_spin_init F GLIBC_2.38 pthread_spin_lock F diff --git a/sysdeps/mach/sysdep.h b/sysdeps/mach/sysdep.h index 8293c66..581bdcd 100644 --- a/sysdeps/mach/sysdep.h +++ b/sysdeps/mach/sysdep.h @@ -20,6 +20,11 @@ /* Get the Mach definitions of ENTRY and kernel_trap. */ #include <mach/machine/syscall_sw.h> +/* This macro is defined in Mach system headers, but string functions use it + with different definitions depending on whether being compiled for + wide-characters or not. */ +#undef P2ALIGN + /* The Mach definitions assume underscores should be prepended to symbol names. Redefine them to do so only when appropriate. */ #undef EXT diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c index d90cc6c..6f75197 100644 --- a/sysdeps/posix/libc_fatal.c +++ b/sysdeps/posix/libc_fatal.c @@ -16,23 +16,13 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <atomic.h> -#include <errno.h> -#include <fcntl.h> +#include <assert.h> #include <ldsodefs.h> -#include <libc-pointer-arith.h> -#include <paths.h> +#include <setvmaname.h> #include <stdarg.h> -#include <stdbool.h> #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sysdep.h> -#include <unistd.h> -#include <sys/mman.h> #include <sys/uio.h> -#include <not-cancel.h> -#include <setvmaname.h> +#include <unistd.h> #ifdef FATAL_PREPARE_INCLUDE #include FATAL_PREPARE_INCLUDE @@ -47,6 +37,10 @@ writev_for_fatal (int fd, const struct iovec *iov, size_t niov, size_t total) } #endif +/* At most a substring before each conversion specification and the + trailing substring (the plus one). */ +#define IOVEC_MAX (LIBC_MESSAGE_MAX_ARGS * 2 + 1) + /* Abort with an error message. */ void __libc_message_impl (const char *fmt, ...) @@ -61,7 +55,7 @@ __libc_message_impl (const char *fmt, ...) if (fd == -1) fd = STDERR_FILENO; - struct iovec iov[LIBC_MESSAGE_MAX_ARGS * 2 - 1]; + struct iovec iov[IOVEC_MAX]; int iovcnt = 0; ssize_t total = 0; @@ -99,6 +93,16 @@ __libc_message_impl (const char *fmt, ...) iov[iovcnt].iov_len = len; total += len; iovcnt++; + + if (__glibc_unlikely (iovcnt > IOVEC_MAX)) + { + len = IOVEC_MAX_ERR_MSG_LEN; + iov[0].iov_base = (char *) IOVEC_MAX_ERR_MSG; + iov[0].iov_len = len; + total = len; + iovcnt = 1; + break; + } } va_end (ap); diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index 5e6cb07..5cdb64f 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so + +# The test checks if the __tls_get_addr does not clobber caller-saved +# register, so disable the powerpc specific optimization to force a +# __tls_get_addr call. +LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize endif ifneq (no,$(multi-arch)) diff --git a/sysdeps/powerpc/fpu/math-use-builtins-trunc.h b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h new file mode 100644 index 0000000..3e6a55d --- /dev/null +++ b/sysdeps/powerpc/fpu/math-use-builtins-trunc.h @@ -0,0 +1,9 @@ +#ifdef _ARCH_PWR5X +# define USE_TRUNCF_BUILTIN 1 +# define USE_TRUNC_BUILTIN 1 +#else +# define USE_TRUNCF_BUILTIN 0 +# define USE_TRUNC_BUILTIN 0 +#endif +#define USE_TRUNCL_BUILTIN 0 +#define USE_TRUNCF128_BUILTIN 0 diff --git a/sysdeps/powerpc/fpu/s_modf.c b/sysdeps/powerpc/fpu/s_modf.c deleted file mode 100644 index 831072b..0000000 --- a/sysdeps/powerpc/fpu/s_modf.c +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <https://www.gnu.org/licenses/>. */ - -/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make - generic implementation faster. Also disables for old ISAs that do not - have ceil/floor instructions. */ -#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X) -# include <sysdeps/ieee754/ldbl-opt/s_modf.c> -#else -# include <math.h> -# include <math_ldbl_opt.h> -# include <libm-alias-double.h> - -double -__modf (double x, double *iptr) -{ - if (__builtin_isinf (x)) - { - *iptr = x; - return copysign (0.0, x); - } - else if (__builtin_isnan (x)) - { - *iptr = NAN; - return NAN; - } - - if (x >= 0.0) - { - *iptr = floor (x); - return copysign (x - *iptr, x); - } - else - { - *iptr = ceil (x); - return copysign (x - *iptr, x); - } -} -# ifndef __modf -libm_alias_double (__modf, modf) -# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) -compat_symbol (libc, __modf, modfl, GLIBC_2_0); -# endif -# endif -#endif diff --git a/sysdeps/powerpc/fpu/s_modff.c b/sysdeps/powerpc/fpu/s_modff.c deleted file mode 100644 index 79eeb7b..0000000 --- a/sysdeps/powerpc/fpu/s_modff.c +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (C) 2013-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <https://www.gnu.org/licenses/>. */ - -/* ISA 2.07 provides fast GPR to FP instruction (mfvsr{d,wz}) which make - generic implementation faster. Also disables for old ISAs that do not - have ceil/floor instructions. */ -#if defined(_ARCH_PWR8) || !defined(_ARCH_PWR5X) -# include <sysdeps/ieee754/flt-32/s_modff.c> -#else -# include <math.h> -# include <libm-alias-float.h> - -float -__modff (float x, float *iptr) -{ - if (__builtin_isinff (x)) - { - *iptr = x; - return copysignf (0.0, x); - } - else if (__builtin_isnanf (x)) - { - *iptr = NAN; - return NAN; - } - - if (x >= 0.0) - { - *iptr = floorf (x); - return copysignf (x - *iptr, x); - } - else - { - *iptr = ceilf (x); - return copysignf (x - *iptr, x); - } -} -# ifndef __modff -libm_alias_float (__modf, modf) -# endif -#endif diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c index b8315c5..48f3a19 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modf __modf_power5plus -#include <sysdeps/powerpc/fpu/s_modf.c> +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c index 69591da..15bfa0b 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modff __modff_power5plus -#include <sysdeps/powerpc/fpu/s_modff.c> +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c index b8315c5..48f3a19 100644 --- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c +++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modf-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modf __modf_power5plus -#include <sysdeps/powerpc/fpu/s_modf.c> +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c index 69591da..15bfa0b 100644 --- a/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c +++ b/sysdeps/powerpc/powerpc64/be/fpu/multiarch/s_modff-power5+.c @@ -17,4 +17,4 @@ <https://www.gnu.org/licenses/>. */ #define __modff __modff_power5plus -#include <sysdeps/powerpc/fpu/s_modff.c> +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/powerpc/powerpc64/le/configure b/sysdeps/powerpc/powerpc64/le/configure index 7092f61..ef17f24 100644 --- a/sysdeps/powerpc/powerpc64/le/configure +++ b/sysdeps/powerpc/powerpc64/le/configure @@ -137,75 +137,5 @@ then : critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously." fi -for ac_prog in $OBJCOPY -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -printf %s "checking for $ac_word... " >&6; } -if test ${ac_cv_prog_OBJCOPY+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) if test -n "$OBJCOPY"; then - ac_cv_prog_OBJCOPY="$OBJCOPY" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - case $as_dir in #((( - '') as_dir=./ ;; - */) ;; - *) as_dir=$as_dir/ ;; - esac - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_OBJCOPY="$ac_prog" - printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi ;; -esac -fi -OBJCOPY=$ac_cv_prog_OBJCOPY -if test -n "$OBJCOPY"; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJCOPY" >&5 -printf "%s\n" "$OBJCOPY" >&6; } -else - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } -fi - - - test -n "$OBJCOPY" && break -done - -if test -z "$OBJCOPY"; then - ac_verc_fail=yes -else - # Found it, now check the version. - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking version of $OBJCOPY" >&5 -printf %s "checking version of $OBJCOPY... " >&6; } - ac_prog_version=`$OBJCOPY --version 2>&1 | sed -n 's/^.*GNU objcopy.* \([0-9]*\.[0-9.]*\).*$/\1/p'` - case $ac_prog_version in - '') ac_prog_version="v. ?.??, bad"; ac_verc_fail=yes;; - 2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*) - ac_prog_version="$ac_prog_version, ok"; ac_verc_fail=no;; - *) ac_prog_version="$ac_prog_version, bad"; ac_verc_fail=yes;; - - esac - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_prog_version" >&5 -printf "%s\n" "$ac_prog_version" >&6; } -fi -if test $ac_verc_fail = yes; then - AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le" -fi - - test -n "$critic_missing" && as_fn_error $? "*** $critic_missing" "$LINENO" 5 diff --git a/sysdeps/powerpc/powerpc64/le/configure.ac b/sysdeps/powerpc/powerpc64/le/configure.ac index 48d7089..79b3d43 100644 --- a/sysdeps/powerpc/powerpc64/le/configure.ac +++ b/sysdeps/powerpc/powerpc64/le/configure.ac @@ -66,11 +66,4 @@ CFLAGS="$save_CFLAGS"]) AS_IF([test "$libc_cv_compiler_powerpc64le_ldbl128_mabi" = "no"], [critic_missing="$critic_missing The compiler must support -mabi=ieeelongdouble and -mlong-double-128 simultaneously."]) -dnl objcopy (binutils) 2.26 or newer required to support the --update-section -dnl feature for fixing up .gnu.attribute section with IEEE ldbl. -AC_CHECK_PROG_VER(OBJCOPY, $OBJCOPY, --version, - [GNU objcopy.* \([0-9]*\.[0-9.]*\)], - [2.1[0-9][0-9]*|2.2[6-9]*|2.[3-9][0-9]*|[3-9].*|[1-9][0-9]*], - AS=: critic_missing="$critic_missing objcopy >= 2.26 is required on powerpc64le") - test -n "$critic_missing" && AC_MSG_ERROR([*** $critic_missing]) diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c new file mode 100644 index 0000000..89e7498 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogb.c @@ -0,0 +1,41 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include "math_config.h" + +int +__ieee754_ilogb (double x) +{ + uint64_t ux = asuint64 (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (ex == 0) /* zero or subnormal */ + { + /* Clear sign and exponent */ + ux <<= 12; + if (ux == 0) + return FP_ILOGB0; + /* subnormal */ + return -1023 - stdc_leading_zeros (ux); + } + if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */ + return ux << 12 ? FP_ILOGBNAN : INT_MAX; + return ex - 1023; +} diff --git a/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c new file mode 100644 index 0000000..1c2a8a5 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/e_ilogbf.c @@ -0,0 +1,41 @@ +/* Get integer exponent of a floating-point value. + Copyright (C) 1999-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <stdbit.h> +#include "sysdeps/ieee754/flt-32/math_config.h" + +int +__ieee754_ilogbf (float x) +{ + uint32_t ux = asuint (x); + int ex = (ux & ~SIGN_MASK) >> MANTISSA_WIDTH; + if (ex == 0) /* zero or subnormal */ + { + /* Clear sign and exponent. */ + ux <<= 1 + EXPONENT_WIDTH; + if (ux == 0) + return FP_ILOGB0; + /* sbunormal */ + return -127 - stdc_leading_zeros (ux); + } + if (ex == EXPONENT_MASK >> MANTISSA_WIDTH) /* NaN or Inf */ + return ux << (1 + EXPONENT_WIDTH) ? FP_ILOGBNAN : INT_MAX; + return ex - 127; +} diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c new file mode 100644 index 0000000..9c26217 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c new file mode 100644 index 0000000..047ad4b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_ilogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_ilogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c new file mode 100644 index 0000000..5e8891a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogb.c @@ -0,0 +1,2 @@ +#include <math-type-macros-double.h> +#include <w_llogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c new file mode 100644 index 0000000..edb7e9a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/le/fpu/w_llogbf.c @@ -0,0 +1,2 @@ +#include <math-type-macros-float.h> +#include <w_llogb_template.c> diff --git a/sysdeps/powerpc/powerpc64/le/power10/memchr.S b/sysdeps/powerpc/powerpc64/le/power10/memchr.S deleted file mode 100644 index 96ad5a2..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/memchr.S +++ /dev/null @@ -1,315 +0,0 @@ -/* Optimized memchr implementation for POWER10 LE. - Copyright (C) 2021-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -# ifndef MEMCHR -# define MEMCHR __memchr -# endif -# define M_VREG_ZERO v20 -# define M_OFF_START_LOOP 256 -# define MEMCHR_SUBTRACT_VECTORS \ - vsububm v4,v4,v18; \ - vsububm v5,v5,v18; \ - vsububm v6,v6,v18; \ - vsububm v7,v7,v18; -# define M_TAIL(vreg,increment) \ - vctzlsbb r4,vreg; \ - cmpld r5,r4; \ - ble L(null); \ - addi r4,r4,increment; \ - add r3,r6,r4; \ - blr - -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define M_VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define M_LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define CHECK16B(vreg,offset,addr,label) \ - lxv vreg+32,offset(addr); \ - vcmpequb. vreg,vreg,v18; \ - bne cr6,L(label); \ - cmpldi r5,16; \ - ble L(null); \ - addi r5,r5,-16; - -/* Load 4 quadwords, merge into one VR for speed and check for NULLs. r6 has # - of bytes already checked. */ -#define CHECK64B(offset,addr,label) \ - M_LXVP(v4+32,offset,addr); \ - M_LXVP(v6+32,offset+32,addr); \ - MEMCHR_SUBTRACT_VECTORS; \ - vminub v14,v4,v5; \ - vminub v15,v6,v7; \ - vminub v16,v14,v15; \ - vcmpequb. v0,v16,M_VREG_ZERO; \ - beq cr6,$+12; \ - li r7,offset; \ - b L(label); \ - cmpldi r5,64; \ - ble L(null); \ - addi r5,r5,-64 - -/* Implements the function - void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]). */ - - .machine power9 - -ENTRY_TOCLESS (MEMCHR) - CALL_MCOUNT 3 - - cmpldi r5,0 - beq L(null) - mr r0,r5 - xori r6,r4,0xff - - mtvsrd v18+32,r4 /* matching char in v18 */ - mtvsrd v19+32,r6 /* non matching char in v19 */ - - vspltb v18,v18,7 /* replicate */ - vspltb v19,v19,7 /* replicate */ - vspltisb M_VREG_ZERO,0 - - /* Next 16B-aligned address. Prepare address for L(aligned). */ - addi r6,r3,16 - clrrdi r6,r6,4 - - /* Align data and fill bytes not loaded with non matching char. */ - lvx v0,0,r3 - lvsr v1,0,r3 - vperm v0,v19,v0,v1 - - vcmpequb. v6,v0,v18 - bne cr6,L(found) - sub r4,r6,r3 - cmpld r5,r4 - ble L(null) - sub r5,r5,r4 - - /* Test up to OFF_START_LOOP-16 bytes in 16B chunks. The main loop is - optimized for longer strings, so checking the first bytes in 16B - chunks benefits a lot small strings. */ - .p2align 5 -L(aligned): - cmpldi r5,0 - beq L(null) - - CHECK16B(v0,0,r6,tail1) - CHECK16B(v1,16,r6,tail2) - CHECK16B(v2,32,r6,tail3) - CHECK16B(v3,48,r6,tail4) - CHECK16B(v4,64,r6,tail5) - CHECK16B(v5,80,r6,tail6) - CHECK16B(v6,96,r6,tail7) - CHECK16B(v7,112,r6,tail8) - CHECK16B(v8,128,r6,tail9) - CHECK16B(v9,144,r6,tail10) - CHECK16B(v10,160,r6,tail11) - CHECK16B(v0,176,r6,tail12) - CHECK16B(v1,192,r6,tail13) - CHECK16B(v2,208,r6,tail14) - CHECK16B(v3,224,r6,tail15) - - cmpdi cr5,r4,0 /* Check if c == 0. This will be useful to - choose how we will perform the main loop. */ - - /* Prepare address for the loop. */ - addi r4,r3,M_OFF_START_LOOP - clrrdi r4,r4,6 - sub r6,r4,r3 - sub r5,r0,r6 - addi r6,r4,128 - - /* If c == 0, use the loop without the vsububm. */ - beq cr5,L(loop) - - /* This is very similar to the block after L(loop), the difference is - that here MEMCHR_SUBTRACT_VECTORS is not empty, and we subtract - each byte loaded by the char we are looking for, this way we can keep - using vminub to merge the results and checking for nulls. */ - .p2align 5 -L(memchr_loop): - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(memchr_loop) - /* Switch to a more aggressive approach checking 64B each time. Use 2 - pointers 128B apart and unroll the loop once to make the pointer - updates and usages separated enough to avoid stalls waiting for - address calculation. */ - .p2align 5 -L(loop): -#undef MEMCHR_SUBTRACT_VECTORS -#define MEMCHR_SUBTRACT_VECTORS /* nothing */ - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - CHECK64B(0,r4,pre_tail_64b) - CHECK64B(64,r4,pre_tail_64b) - addi r4,r4,256 - - CHECK64B(0,r6,tail_64b) - CHECK64B(64,r6,tail_64b) - addi r6,r6,256 - - b L(loop) - - .p2align 5 -L(pre_tail_64b): - mr r6,r4 -L(tail_64b): - /* OK, we found a null byte. Let's look for it in the current 64-byte - block and mark it in its corresponding VR. lxvp vx,0(ry) puts the - low 16B bytes into vx+1, and the high into vx, so the order here is - v5, v4, v7, v6. */ - vcmpequb v1,v5,M_VREG_ZERO - vcmpequb v2,v4,M_VREG_ZERO - vcmpequb v3,v7,M_VREG_ZERO - vcmpequb v4,v6,M_VREG_ZERO - - /* Take into account the other 64B blocks we had already checked. */ - add r6,r6,r7 - /* Extract first bit of each byte. */ - M_VEXTRACTBM(r8,v1) - M_VEXTRACTBM(r9,v2) - M_VEXTRACTBM(r10,v3) - M_VEXTRACTBM(r11,v4) - - /* Shift each value into their corresponding position. */ - sldi r9,r9,16 - sldi r10,r10,32 - sldi r11,r11,48 - - /* Merge the results. */ - or r8,r8,r9 - or r9,r10,r11 - or r11,r9,r8 - - cnttzd r0,r11 /* Count trailing zeros before the match. */ - cmpld r5,r0 - ble L(null) - add r3,r6,r0 /* Compute final address. */ - blr - - .p2align 5 -L(tail1): - M_TAIL(v0,0) - - .p2align 5 -L(tail2): - M_TAIL(v1,16) - - .p2align 5 -L(tail3): - M_TAIL(v2,32) - - .p2align 5 -L(tail4): - M_TAIL(v3,48) - - .p2align 5 -L(tail5): - M_TAIL(v4,64) - - .p2align 5 -L(tail6): - M_TAIL(v5,80) - - .p2align 5 -L(tail7): - M_TAIL(v6,96) - - .p2align 5 -L(tail8): - M_TAIL(v7,112) - - .p2align 5 -L(tail9): - M_TAIL(v8,128) - - .p2align 5 -L(tail10): - M_TAIL(v9,144) - - .p2align 5 -L(tail11): - M_TAIL(v10,160) - - .p2align 5 -L(tail12): - M_TAIL(v0,176) - - .p2align 5 -L(tail13): - M_TAIL(v1,192) - - .p2align 5 -L(tail14): - M_TAIL(v2,208) - - .p2align 5 -L(tail15): - M_TAIL(v3,224) - - .p2align 5 -L(found): - vctzlsbb r7,v6 - cmpld r5,r7 - ble L(null) - add r3,r3,r7 - blr - - .p2align 5 -L(null): - li r3,0 - blr - -END (MEMCHR) - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S index f32dc38..734bf5f 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S @@ -18,26 +18,10 @@ #include <sysdep.h> -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - /* Compare 32 bytes. */ #define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\ - LXVP(32+vr1,offset,r3); \ - LXVP(32+vr2,offset,r4); \ + lxvp 32+vr1,offset(r3); \ + lxvp 32+vr2,offset(r4); \ vcmpneb. v5,vr1+1,vr2+1; \ bne cr6,L(tail_2); \ vcmpneb. v4,vr1,vr2; \ @@ -56,7 +40,7 @@ #ifndef MEMCMP # define MEMCMP memcmp #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMCMP, 4) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S index ed7a9f5..f2a503e 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memcpy.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memcpy.S @@ -26,7 +26,7 @@ /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. */ - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMCPY, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memmove.S b/sysdeps/powerpc/powerpc64/le/power10/memmove.S index 47c2ac3..4aaa1ef 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memmove.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memmove.S @@ -28,7 +28,7 @@ #ifndef MEMMOVE # define MEMMOVE memmove #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMMOVE, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S index 29d5114..f9442e7 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memset.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S @@ -25,7 +25,7 @@ # define MEMSET memset #endif - .machine power9 + .machine power10 ENTRY_TOCLESS (MEMSET, 5) CALL_MCOUNT 3 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S deleted file mode 100644 index fffa1ee..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S +++ /dev/null @@ -1,233 +0,0 @@ -/* Optimized strcmp implementation for PowerPC64/POWER10. - Copyright (C) 2021-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ -#include <sysdep.h> - -#ifndef STRCMP -# define STRCMP strcmp -#endif - -/* Implements the function - int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */ - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - -#define TAIL(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; \ - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r0,len_reg,56; \ - lxvl 32+v4,reg1,r0; \ - lxvl 32+v5,reg2,r0; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - - .machine power9 -ENTRY_TOCLESS (STRCMP, 4) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r5,r9,16 /* r5(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r5 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r5) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r3-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - - .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) - - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 - mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) - -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - - clrldi r6,r3,59 - subfic r5,r6,32 - add r3,r3,r5 - add r4,r4,r5 - andi. r5,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - TAIL(v4,v5) - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL(v15,v17) -L(tail2): TAIL(v14,v16) -L(tail3): TAIL(v19,v21) -L(tail4): TAIL(v18,v20) -L(tail5): TAIL(v23,v25) -L(tail6): TAIL(v22,v24) -L(tail7): TAIL(v27,v29) -L(tail8): TAIL(v26,v28) - -END (STRCMP) -libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S index 4985a92..ec644d5 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S @@ -63,22 +63,6 @@ blr #endif /* USE_AS_RAWMEMCHR */ -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - #define CHECK16(vreg,offset,addr,label) \ lxv vreg+32,offset(addr); \ vcmpequb. vreg,vreg,v18; \ @@ -88,8 +72,8 @@ of bytes already checked. */ #define CHECK64(offset,addr,label) \ li r6,offset; \ - LXVP(v4+32,offset,addr); \ - LXVP(v6+32,offset+32,addr); \ + lxvp v4+32,offset(addr); \ + lxvp v6+32,offset+32(addr); \ RAWMEMCHR_SUBTRACT_VECTORS; \ vminub v14,v4,v5; \ vminub v15,v6,v7; \ @@ -108,7 +92,7 @@ The implementation can load bytes past a matching byte, but only up to the next 64B boundary, so it never crosses a page. */ -.machine power9 +.machine power10 ENTRY_TOCLESS (FUNCNAME, 4) CALL_MCOUNT MCOUNT_NARGS @@ -234,10 +218,10 @@ L(tail_64b): add r5,r5,r6 /* Extract first bit of each byte. */ - VEXTRACTBM(r7,v1) - VEXTRACTBM(r8,v2) - VEXTRACTBM(r9,v3) - VEXTRACTBM(r10,v4) + vextractbm r7,v1 + vextractbm r8,v2 + vextractbm r9,v3 + vextractbm r10,v4 /* Shift each value into their corresponding position. */ sldi r8,r8,16 diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S deleted file mode 100644 index 10700dd..0000000 --- a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S +++ /dev/null @@ -1,271 +0,0 @@ -/* Optimized strncmp implementation for PowerPC64/POWER10. - Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* Implements the function - - int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) - - The implementation uses unaligned doubleword access to avoid specialized - code paths depending of data alignment for first 32 bytes and uses - vectorised loops after that. */ - -#ifndef STRNCMP -# define STRNCMP strncmp -#endif - -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - -#define COMPARE_16(vreg1,vreg2,offset) \ - lxv vreg1+32,offset(r3); \ - lxv vreg2+32,offset(r4); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(different); \ - cmpldi cr7,r5,16; \ - ble cr7,L(ret0); \ - addi r5,r5,-16; - -#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \ - LXVP(vreg1+32,offset,r3); \ - LXVP(vreg2+32,offset,r4); \ - vcmpnezb. v7,vreg1+1,vreg2+1; \ - bne cr6,L(label1); \ - vcmpnezb. v7,vreg1,vreg2; \ - bne cr6,L(label2); \ - cmpldi cr7,r5,32; \ - ble cr7,L(ret0); \ - addi r5,r5,-32; - -#define TAIL_FIRST_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - cmpld cr7,r5,r6; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define TAIL_SECOND_16B(vreg1,vreg2) \ - vctzlsbb r6,v7; \ - addi r0,r6,16; \ - cmpld cr7,r5,r0; \ - ble cr7,L(ret0); \ - vextubrx r5,r6,vreg1; \ - vextubrx r4,r6,vreg2; \ - subf r3,r4,r5; \ - blr; - -#define CHECK_N_BYTES(reg1,reg2,len_reg) \ - sldi r6,len_reg,56; \ - lxvl 32+v4,reg1,r6; \ - lxvl 32+v5,reg2,r6; \ - add reg1,reg1,len_reg; \ - add reg2,reg2,len_reg; \ - vcmpnezb v7,v4,v5; \ - vctzlsbb r6,v7; \ - cmpld cr7,r6,len_reg; \ - blt cr7,L(different); \ - cmpld cr7,r5,len_reg; \ - ble cr7,L(ret0); \ - sub r5,r5,len_reg; \ - - /* TODO: change this to .machine power10 when the minimum required - binutils allows it. */ - .machine power9 -ENTRY_TOCLESS (STRNCMP, 4) - /* Check if size is 0. */ - cmpdi cr0,r5,0 - beq cr0,L(ret0) - andi. r7,r3,4095 - andi. r8,r4,4095 - cmpldi cr0,r7,4096-16 - cmpldi cr1,r8,4096-16 - bgt cr0,L(crosses) - bgt cr1,L(crosses) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - -L(crosses): - andi. r7,r3,15 - subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */ - andi. r9,r4,15 - subfic r8,r9,16 /* r8(nalign2) = 16 - (str2 & 15). */ - cmpld cr7,r7,r8 - beq cr7,L(same_aligned) - blt cr7,L(nalign1_min) - - /* nalign2 is minimum and s2 pointer is aligned. */ - CHECK_N_BYTES(r3,r4,r8) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r3,63 /* Determine offset into 64B hunk. */ - andi. r8,r3,15 /* The offset into the 16B hunk. */ - neg r7,r3 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - b L(compare_64B_unaligned) - - /* nalign1 is minimum and s1 pointer is aligned. */ -L(nalign1_min): - CHECK_N_BYTES(r3,r4,r7) - /* Are we on the 64B hunk which crosses a page? */ - andi. r10,r4,63 /* Determine offset into 64B hunk. */ - andi. r8,r4,15 /* The offset into the 16B hunk. */ - neg r7,r4 - andi. r9,r7,15 /* Number of bytes after a 16B cross. */ - rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */ - beq L(compare_64_pagecross) - mtctr r7 - - .p2align 5 -L(compare_64B_unaligned): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - bdnz L(compare_64B_unaligned) - - /* Cross the page boundary of s2, carefully. Only for first - iteration we have to get the count of 64B blocks to be checked. - From second iteration and beyond, loop counter is always 63. */ -L(compare_64_pagecross): - li r11, 63 - mtctr r11 - cmpldi r10,16 - ble L(cross_4) - cmpldi r10,32 - ble L(cross_3) - cmpldi r10,48 - ble L(cross_2) -L(cross_1): - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - b L(compare_64B_unaligned) -L(cross_2): - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - b L(compare_64B_unaligned) -L(cross_3): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r3,r3,32 - addi r4,r4,32 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - COMPARE_16(v4,v5,0) - addi r3,r3,16 - addi r4,r4,16 - b L(compare_64B_unaligned) -L(cross_4): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - addi r3,r3,48 - addi r4,r4,48 - CHECK_N_BYTES(r3,r4,r9) - CHECK_N_BYTES(r3,r4,r8) - b L(compare_64B_unaligned) - -L(same_aligned): - CHECK_N_BYTES(r3,r4,r7) - /* Align s1 to 32B and adjust s2 address. - Use lxvp only if both s1 and s2 are 32B aligned. */ - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - addi r5,r5,32 - - clrldi r6,r3,59 - subfic r7,r6,32 - add r3,r3,r7 - add r4,r4,r7 - subf r5,r7,r5 - andi. r7,r4,0x1F - beq cr0,L(32B_aligned_loop) - - .p2align 5 -L(16B_aligned_loop): - COMPARE_16(v4,v5,0) - COMPARE_16(v4,v5,16) - COMPARE_16(v4,v5,32) - COMPARE_16(v4,v5,48) - addi r3,r3,64 - addi r4,r4,64 - b L(16B_aligned_loop) - - /* Calculate and return the difference. */ -L(different): - TAIL_FIRST_16B(v4,v5) - - .p2align 5 -L(32B_aligned_loop): - COMPARE_32(v14,v16,0,tail1,tail2) - COMPARE_32(v18,v20,32,tail3,tail4) - COMPARE_32(v22,v24,64,tail5,tail6) - COMPARE_32(v26,v28,96,tail7,tail8) - addi r3,r3,128 - addi r4,r4,128 - b L(32B_aligned_loop) - -L(tail1): TAIL_FIRST_16B(v15,v17) -L(tail2): TAIL_SECOND_16B(v14,v16) -L(tail3): TAIL_FIRST_16B(v19,v21) -L(tail4): TAIL_SECOND_16B(v18,v20) -L(tail5): TAIL_FIRST_16B(v23,v25) -L(tail6): TAIL_SECOND_16B(v22,v24) -L(tail7): TAIL_FIRST_16B(v27,v29) -L(tail8): TAIL_SECOND_16B(v26,v28) - - .p2align 5 -L(ret0): - li r3,0 - blr - -END(STRNCMP) -libc_hidden_builtin_def(strncmp) diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S index 83b21c6..f0cde81 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S @@ -28,21 +28,6 @@ The implementation uses unaligned doubleword access for first 32 bytes as in POWER8 patch and uses vectorised loops after that. */ -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -61,10 +46,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when the minimum required binutils - allows it. */ - - .machine power7 + .machine power9 ENTRY_TOCLESS (STRCMP, 4) li r0, 0 @@ -116,7 +98,7 @@ L(align): /* Both s1 and s2 are unaligned. */ GET16BYTES(v4, r7, v10) GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -136,28 +118,28 @@ L(match): L(s1_align): lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(s1_align) @@ -167,37 +149,37 @@ L(s1_align): L(aligned): lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vctzlsbb r6, v7 + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S index 60c74ab..5a25f94 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S @@ -29,21 +29,6 @@ # define STRNCMP strncmp #endif -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -64,9 +49,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when minimum binutils - is upgraded to 2.27. */ - .machine power7 + .machine power9 ENTRY_TOCLESS (STRNCMP, 4) /* Check if size is 0. */ cmpdi cr0, r5, 0 @@ -163,7 +146,7 @@ L(align): clrldi r6, r3, 60 subfic r11, r6, 16 GET16BYTES(v4, r3, v10) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -186,7 +169,7 @@ L(match): L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -196,7 +179,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -206,7 +189,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -216,7 +199,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -228,7 +211,7 @@ L(s1_align): L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -238,7 +221,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -248,7 +231,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -258,7 +241,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -268,11 +251,11 @@ L(aligned): b L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) + vctzlsbb r6, v7 cmplw cr7, r5, r6 ble cr7, L(ret0) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index 624439d..e321ce5 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -30,12 +30,11 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strncase-power8 ifneq (,$(filter %le,$(config-machine))) -sysdep_routines += memchr-power10 memcmp-power10 memcpy-power10 \ - memmove-power10 memset-power10 rawmemchr-power9 \ - rawmemchr-power10 strcmp-power9 strcmp-power10 \ - strncmp-power9 strncmp-power10 strcpy-power9 strcat-power10 \ - stpcpy-power9 strlen-power9 strncpy-power9 stpncpy-power9 \ - strlen-power10 +sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \ + rawmemchr-power9 rawmemchr-power10 \ + strcmp-power9 strncmp-power9 \ + strcpy-power9 strcat-power10 stpcpy-power9 \ + strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10 endif endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index f3acd38..016d05f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -164,9 +164,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ IFUNC_IMPL (i, name, strncmp, #ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __strncmp_power10) IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strncmp_power9) @@ -229,12 +226,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, -#ifdef __LITTLE_ENDIAN__ - IFUNC_IMPL_ADD (array, i, memchr, - hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX, - __memchr_power10) -#endif IFUNC_IMPL_ADD (array, i, memchr, hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC, @@ -376,10 +367,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcmp, #ifdef __LITTLE_ENDIAN__ IFUNC_IMPL_ADD (array, i, strcmp, - (hwcap2 & PPC_FEATURE2_ARCH_3_1) - && (hwcap & PPC_FEATURE_HAS_VSX), - __strcmp_power10) - IFUNC_IMPL_ADD (array, i, strcmp, hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC, __strcmp_power9) diff --git a/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/sysdeps/powerpc/powerpc64/multiarch/memchr.c index b63c796..3abd64a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memchr.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -25,23 +25,15 @@ extern __typeof (__memchr) __memchr_ppc attribute_hidden; extern __typeof (__memchr) __memchr_power7 attribute_hidden; extern __typeof (__memchr) __memchr_power8 attribute_hidden; -# ifdef __LITTLE_ENDIAN__ -extern __typeof (__memchr) __memchr_power10 attribute_hidden; -# endif /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ libc_ifunc (__memchr, -# ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __memchr_power10 : -# endif - (hwcap2 & PPC_FEATURE2_ARCH_2_07 - && hwcap & PPC_FEATURE_HAS_ALTIVEC) - ? __memchr_power8 : - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __memchr_power7 - : __memchr_ppc); + (hwcap2 & PPC_FEATURE2_ARCH_2_07 + && hwcap & PPC_FEATURE_HAS_ALTIVEC) + ? __memchr_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memchr_power7 + : __memchr_ppc); weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c index 3c636e3..7c77c08 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -29,16 +29,12 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden; extern __typeof (strcmp) __strcmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strcmp) __strcmp_power9 attribute_hidden; -extern __typeof (strcmp) __strcmp_power10 attribute_hidden; # endif # undef strcmp libc_ifunc_redirected (__redirect_strcmp, strcmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strcmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strcmp_power9 : diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c index 0a664a6..4cfe27f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -29,7 +29,6 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; extern __typeof (strncmp) __strncmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strncmp) __strncmp_power9 attribute_hidden; -extern __typeof (strncmp) __strncmp_power10 attribute_hidden; # endif # undef strncmp @@ -37,9 +36,6 @@ extern __typeof (strncmp) __strncmp_power10 attribute_hidden; ifunc symbol properly. */ libc_ifunc_redirected (__redirect_strncmp, strncmp, # ifdef __LITTLE_ENDIAN__ - (hwcap2 & PPC_FEATURE2_ARCH_3_1 - && hwcap & PPC_FEATURE_HAS_VSX) - ? __strncmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strncmp_power9 : diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile index de146dd..7572f62 100644 --- a/sysdeps/pthread/Makefile +++ b/sysdeps/pthread/Makefile @@ -62,7 +62,6 @@ tests += \ tst-abstime \ tst-atfork1 \ tst-attr1 \ - tst-backtrace1 \ tst-bad-schedattr \ tst-barrier1 \ tst-barrier2 \ diff --git a/sysdeps/pthread/tst-backtrace1.c b/sysdeps/pthread/tst-backtrace1.c deleted file mode 100644 index 01b8a0c..0000000 --- a/sysdeps/pthread/tst-backtrace1.c +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (C) 2004-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#include <execinfo.h> -#include <pthread.h> -#include <stdio.h> - -#define BT_SIZE 64 -void *bt_array[BT_SIZE]; -int bt_cnt; - -int -do_bt (void) -{ - bt_cnt = backtrace (bt_array, BT_SIZE); - return 56; -} - -int -call_do_bt (void) -{ - return do_bt () + 1; -} - -void * -tf (void *arg) -{ - if (call_do_bt () != 57) - return (void *) 1L; - return NULL; -} - -int -do_test (void) -{ - pthread_t th; - if (pthread_create (&th, NULL, tf, NULL)) - { - puts ("create failed"); - return 1; - } - - void *res; - if (pthread_join (th, &res)) - { - puts ("join failed"); - return 1; - } - - if (res != NULL) - { - puts ("thread failed"); - return 1; - } - - char **text = backtrace_symbols (bt_array, bt_cnt); - if (text == NULL) - { - puts ("backtrace_symbols failed"); - return 1; - } - - for (int i = 0; i < bt_cnt; ++i) - puts (text[i]); - - return 0; -} - -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" diff --git a/sysdeps/pthread/tst-cond23.c b/sysdeps/pthread/tst-cond23.c index 0a68472..a338397 100644 --- a/sysdeps/pthread/tst-cond23.c +++ b/sysdeps/pthread/tst-cond23.c @@ -151,7 +151,7 @@ do_test (void) #if !defined _POSIX_CLOCK_SELECTION || _POSIX_CLOCK_SELECTION == -1 puts ("_POSIX_CLOCK_SELECTION not supported, test skipped"); - return 0; + return EXIT_UNSUPPORTED; #else diff --git a/sysdeps/pthread/tst-fopen-threaded.c b/sysdeps/pthread/tst-fopen-threaded.c index ade58ad..c17f1ea 100644 --- a/sysdeps/pthread/tst-fopen-threaded.c +++ b/sysdeps/pthread/tst-fopen-threaded.c @@ -34,11 +34,13 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <stdlib.h> #include <support/check.h> #include <support/temp_file.h> #include <support/xstdio.h> #include <support/xthread.h> +#include <support/support.h> #define NUM_THREADS 100 #define ITERS 10 @@ -111,7 +113,8 @@ threadOpenCloseRoutine (void *argv) /* Wait for all threads to be ready to call fopen and fclose. */ xpthread_barrier_wait (&barrier); - FILE *fd = xfopen ("/tmp/openclosetest", "w+"); + char *file = (char *) argv; + FILE *fd = xfopen (file, "w+"); xfclose (fd); return NULL; } @@ -235,6 +238,10 @@ do_test (void) xfclose (fd_file); } + char *tempdir = support_create_temp_directory ("openclosetest-"); + char *file = xasprintf ("%s/file", tempdir); + add_temp_file (file); + /* Test 3: Concurrent open/close. */ for (int reps = 1; reps <= ITERS; reps++) { @@ -243,7 +250,7 @@ do_test (void) { threads[i] = xpthread_create (support_small_stack_thread_attribute (), - threadOpenCloseRoutine, NULL); + threadOpenCloseRoutine, file); } for (int i = 0; i < NUM_THREADS; i++) { @@ -252,6 +259,9 @@ do_test (void) xpthread_barrier_destroy (&barrier); } + free (file); + free (tempdir); + return 0; } diff --git a/sysdeps/s390/s390-32/s390-mcount.S b/sysdeps/s390/s390-32/s390-mcount.S index 59614ee..7f8457f 100644 --- a/sysdeps/s390/s390-32/s390-mcount.S +++ b/sysdeps/s390/s390-32/s390-mcount.S @@ -54,11 +54,7 @@ C_LABEL(_mcount) /* Save the caller-clobbered registers. */ ahi %r15,-128 cfi_adjust_cfa_offset (128) - /* binutils 2.28+: .cfi_val_offset r15, -96 */ - .cfi_escape \ - /* DW_CFA_val_offset */ 0x14, \ - /* r15 */ 0x0f, \ - /* scaled offset */ 0x18 + cfi_val_offset (r15, -96) stm %r14,%r5,96(%r15) cfi_offset (r14, -128) l %r2,132(%r15) # callers address = first parameter diff --git a/sysdeps/s390/s390-64/s390x-mcount.h b/sysdeps/s390/s390-64/s390x-mcount.h index b82f1a8..c5bd70d 100644 --- a/sysdeps/s390/s390-64/s390x-mcount.h +++ b/sysdeps/s390/s390-64/s390x-mcount.h @@ -68,11 +68,7 @@ C_LABEL(MCOUNT_SYMBOL) /* Save the caller-clobbered registers. */ aghi %r15,-224 cfi_adjust_cfa_offset (224) - /* binutils 2.28+: .cfi_val_offset r15, -160 */ - .cfi_escape \ - /* DW_CFA_val_offset */ 0x14, \ - /* r15 */ 0x0f, \ - /* scaled offset */ 0x14 + cfi_val_offset (r15, -160) stmg %r14,%r5,160(%r15) cfi_offset (r14, -224) cfi_offset (r0, -224+16) diff --git a/sysdeps/sparc/sparc32/start.S b/sysdeps/sparc/sparc32/start.S index 694b020..8393760 100644 --- a/sysdeps/sparc/sparc32/start.S +++ b/sysdeps/sparc/sparc32/start.S @@ -35,6 +35,7 @@ #include <sysdep.h> +#define FRAME_SIZE 104 .section ".text" .align 4 @@ -48,12 +49,12 @@ _start: /* Terminate the stack frame, and reserve space for functions to drop their arguments. */ mov %g0, %fp - sub %sp, 6*4, %sp + sub %sp, FRAME_SIZE, %sp /* Extract the arguments and environment as encoded on the stack. The argument info starts after one register window (16 words) past the SP. */ - ld [%sp+22*4], %o1 - add %sp, 23*4, %o2 + ld [%sp+168], %o1 + add %sp, 172, %o2 /* Load the addresses of the user entry points. */ #ifndef PIC @@ -73,6 +74,10 @@ _start: be NULL. */ mov %g1, %o5 + /* Provide the highest stack address to update the __libc_stack_end (used + to enable executable stacks if required). */ + st %sp, [%sp+23*4] + /* Let libc do the rest of the initialization, and call main. */ call __libc_start_main nop diff --git a/sysdeps/sparc/sparc64/start.S b/sysdeps/sparc/sparc64/start.S index c9c25c2..08e1e77 100644 --- a/sysdeps/sparc/sparc64/start.S +++ b/sysdeps/sparc/sparc64/start.S @@ -74,6 +74,10 @@ _start: be NULL. */ mov %g1, %o5 + /* Provide the highest stack address to update the __libc_stack_end (used + to enable executable stacks if required). */ + stx %sp, [%sp+STACK_BIAS+22*8] + /* Let libc do the rest of the initialization, and call main. */ call __libc_start_main nop diff --git a/sysdeps/unix/bsd/tcsetattr.c b/sysdeps/unix/bsd/tcsetattr.c index 38b5f71..8693d94 100644 --- a/sysdeps/unix/bsd/tcsetattr.c +++ b/sysdeps/unix/bsd/tcsetattr.c @@ -32,7 +32,7 @@ /* Set the state of FD to *TERMIOS_P. */ int -tcsetattr (int fd, int optional_actions, const struct termios *termios_p) +__tcsetattr (int fd, int optional_actions, const struct termios *termios_p) { struct termios myt; @@ -56,4 +56,6 @@ tcsetattr (int fd, int optional_actions, const struct termios *termios_p) return __ioctl (fd, TIOCSETAF, termios_p); } } -libc_hidden_def (tcsetattr) + +libc_hidden_def (__tcsetattr) +weak_alias (__tcsetattr, tcsetattr) diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index ebcf820..2c5bf42 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -151,15 +151,6 @@ sysdep_headers += \ bits/struct_stat.h \ bits/struct_stat_time64_helper.h \ bits/syscall.h \ - bits/termios-baud.h \ - bits/termios-c_cc.h \ - bits/termios-c_cflag.h \ - bits/termios-c_iflag.h \ - bits/termios-c_lflag.h \ - bits/termios-c_oflag.h \ - bits/termios-misc.h \ - bits/termios-struct.h \ - bits/termios-tcflow.h \ bits/timerfd.h \ bits/types/struct_msqid64_ds.h \ bits/types/struct_msqid64_ds_helper.h \ @@ -201,6 +192,7 @@ tests += \ tst-clone \ tst-clone2 \ tst-clone3 \ + tst-copy_file_range-large \ tst-epoll \ tst-epoll-ioctls \ tst-fanotify \ @@ -421,6 +413,24 @@ tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0 endif # $(subdir) == misc +ifeq ($(subdir),termios) +sysdep_headers += \ + bits/termios-c_cc.h \ + bits/termios-c_cflag.h \ + bits/termios-c_iflag.h \ + bits/termios-c_lflag.h \ + bits/termios-c_oflag.h \ + bits/termios-cbaud.h \ + bits/termios-misc.h \ + bits/termios-struct.h \ + bits/termios-tcflow.h \ + # sysdep_headers + +tests += \ + tst-termios-linux \ + # tests +endif + ifeq ($(subdir),time) sysdep_headers += \ bits/timex.h \ @@ -603,6 +613,7 @@ endif ifeq ($(subdir),io) sysdep_routines += \ close_nocancel \ + close_nocancel_nostatus \ fallocate \ fallocate64 \ fcntl_nocancel \ diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions index 55d5655..585dec7 100644 --- a/sysdeps/unix/sysv/linux/Versions +++ b/sysdeps/unix/sysv/linux/Versions @@ -332,6 +332,13 @@ libc { sched_getattr; sched_setattr; } + GLIBC_2.42 { + cfgetospeed; + cfgetispeed; + cfsetospeed; + cfsetispeed; + cfsetspeed; + } GLIBC_PRIVATE { # functions used in other libraries __syscall_rt_sigqueueinfo; @@ -339,6 +346,7 @@ libc { __read_nocancel; __pread64_nocancel; __close_nocancel; + __close_nocancel_nostatus; __sigtimedwait; # functions used by nscd __netlink_assert_response; diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h index 89aced0..ba4a461 100644 --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h @@ -175,6 +175,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index 6d63c8a..1acc82d 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -23,6 +23,7 @@ #include <sys/prctl.h> #include <sys/utsname.h> #include <dl-tunables-parse.h> +#include <dl-symbol-redir-ifunc.h> #define DCZID_DZP_MASK (1 << 4) #define DCZID_BS_MASK (0xf) diff --git a/sysdeps/unix/sysv/linux/aarch64/libc.abilist b/sysdeps/unix/sysv/linux/aarch64/libc.abilist index aa6bf48..a22e651 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libc.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libc.abilist @@ -2752,6 +2752,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/aarch64/libm.abilist b/sysdeps/unix/sysv/linux/aarch64/libm.abilist index ecdabe6..bb8114b 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libm.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libm.abilist @@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist index a56ce7f..f7f72b6 100644 --- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist @@ -148,3 +148,23 @@ GLIBC_2.41 _ZGVsMxv_sinpi F GLIBC_2.41 _ZGVsMxv_sinpif F GLIBC_2.41 _ZGVsMxv_tanpi F GLIBC_2.41 _ZGVsMxv_tanpif F +GLIBC_2.42 _ZGVnN2v_acospi F +GLIBC_2.42 _ZGVnN2v_acospif F +GLIBC_2.42 _ZGVnN2v_asinpi F +GLIBC_2.42 _ZGVnN2v_asinpif F +GLIBC_2.42 _ZGVnN2v_atanpi F +GLIBC_2.42 _ZGVnN2v_atanpif F +GLIBC_2.42 _ZGVnN2vv_atan2pi F +GLIBC_2.42 _ZGVnN2vv_atan2pif F +GLIBC_2.42 _ZGVnN4v_acospif F +GLIBC_2.42 _ZGVnN4v_asinpif F +GLIBC_2.42 _ZGVnN4v_atanpif F +GLIBC_2.42 _ZGVnN4vv_atan2pif F +GLIBC_2.42 _ZGVsMxv_acospi F +GLIBC_2.42 _ZGVsMxv_acospif F +GLIBC_2.42 _ZGVsMxv_asinpi F +GLIBC_2.42 _ZGVsMxv_asinpif F +GLIBC_2.42 _ZGVsMxv_atanpi F +GLIBC_2.42 _ZGVsMxv_atanpif F +GLIBC_2.42 _ZGVsMxvv_atan2pi F +GLIBC_2.42 _ZGVsMxvv_atan2pif F diff --git a/sysdeps/unix/sysv/linux/aarch64/makecontext.c b/sysdeps/unix/sysv/linux/aarch64/makecontext.c index a2eab9e..4485723 100644 --- a/sysdeps/unix/sysv/linux/aarch64/makecontext.c +++ b/sysdeps/unix/sysv/linux/aarch64/makecontext.c @@ -36,9 +36,7 @@ static struct _aarch64_ctx *extension (void *p) static void * alloc_makecontext_gcs (size_t stack_size) { - void *base; - size_t size; - void *gcsp = __alloc_gcs (stack_size, &base, &size); + void *gcsp = __alloc_gcs (stack_size, NULL); if (gcsp == NULL) /* ENOSYS, bad size or OOM. */ abort (); diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S index 022a263..d9716f0 100644 --- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S @@ -48,25 +48,16 @@ ENTRY (__setcontext) cbz x0, 1f b C_SYMBOL_NAME (__syscall_error) 1: - /* Disable ZA of SME. */ -#if HAVE_AARCH64_PAC_RET - PACIASP - cfi_window_save -#endif - stp x29, x30, [sp, -16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - mov x29, sp + /* Clear ZA state of SME. */ + /* The calling convention of __libc_arm_za_disable allows to do + this thus allowing to avoid saving to and reading from stack. + As a result we also don't need to sign the return address and + check it after returning because it is not stored to stack. */ + mov x13, x30 + cfi_register (x30, x13) bl __libc_arm_za_disable - ldp x29, x30, [sp], 16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) -#if HAVE_AARCH64_PAC_RET - AUTIASP - cfi_window_save -#endif + mov x30, x13 + cfi_register (x13, x30) /* Restore the general purpose registers. */ mov x0, x9 cfi_def_cfa (x0, 0) diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S index cc41253..58ddb95 100644 --- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S +++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S @@ -119,7 +119,7 @@ L(gcs_done): 2: /* The oucp context is restored here via an indirect branch, x1 must be restored too which has the real return address. */ - BTI_J + bti j mov x30, x1 RET PSEUDO_END (__swapcontext) diff --git a/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h b/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h new file mode 100644 index 0000000..9d5d345 --- /dev/null +++ b/sysdeps/unix/sysv/linux/aarch64/uw-sigframe.h @@ -0,0 +1,78 @@ +/* Signal frame backtracing support for SFrame on AARCH64. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License + as published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* This code is inspired from libgcc's MD_FALLBACK_FRAME_STATE_FOR + implementation. See libgcc/config/aarch64/linux-unwind.h */ + +#include <signal.h> +#include <sys/ucontext.h> +#include <kernel_rt_sigframe.h> + +#ifdef __AARCH64EL__ +#define MOVZ_X8_8B 0xd2801168 +#define SVC_0 0xd4000001 +#else +#define MOVZ_X8_8B 0x681180d2 +#define SVC_0 0x010000d4 +#endif + +#define MD_DECODE_SIGNAL_FRAME aarch64_decode_signal_frame + +static _Unwind_Reason_Code +aarch64_decode_signal_frame (frame *frame) +{ + unsigned int *pc = (unsigned int *) frame->pc; + mcontext_t *mt; + struct kernel_rt_sigframe *rt_; + + if ((frame->pc & 3) != 0) + return _URC_END_OF_STACK; + + /* A signal frame will have a return address pointing to + __kernel_rt_sigreturn. This code is hardwired as: + + 0xd2801168 movz x8, #0x8b + 0xd4000001 svc 0x0 + */ + if (pc[0] != MOVZ_X8_8B || pc[1] != SVC_0) + return _URC_END_OF_STACK; + + rt_ = (struct kernel_rt_sigframe *) frame->sp; + mt = &rt_->uc.uc_mcontext; + + /* Frame pointer register number. */ +#define FP_REGNUM 30 + + frame->pc = (_Unwind_Ptr) mt->pc; + frame->sp = (_Unwind_Ptr) mt->sp; + frame->fp = (_Unwind_Ptr) mt->regs[FP_REGNUM]; + return _URC_NO_REASON; +} + +#define MD_DETECT_OUTERMOST_FRAME aarch64_detect_outermost_frame + +static _Unwind_Reason_Code +aarch64_detect_outermost_frame (frame *frame) +{ + /* Initial frame has LR and FP set to zero. We track only FP. */ + if (frame->fp == 0) + return _URC_END_OF_STACK; + + return _URC_NO_REASON; +} diff --git a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h index 455da93..840d6fe 100644 --- a/sysdeps/unix/sysv/linux/alpha/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/alpha/arch-syscall.h @@ -209,6 +209,7 @@ #define __NR_open 45 #define __NR_open_by_handle_at 498 #define __NR_open_tree 538 +#define __NR_open_tree_attr 577 #define __NR_openat 450 #define __NR_openat2 547 #define __NR_osf_adjtime 140 diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h index 1f9f7f2..d830884 100644 --- a/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h +++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-c_cflag.h @@ -36,4 +36,6 @@ #ifdef __USE_MISC # define ADDRB 04000000000 +# define CMSPAR 010000000000 /* Mark or space (stick) parity. */ +# define CRTSCTS 020000000000 /* Flow control. */ #endif diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h index 324d5d8..69421f6 100644 --- a/sysdeps/unix/sysv/linux/alpha/bits/termios-baud.h +++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-cbaud.h @@ -17,30 +17,29 @@ <https://www.gnu.org/licenses/>. */ #ifndef _TERMIOS_H -# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead." +# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead." #endif #ifdef __USE_MISC -# define CBAUD 0000037 -# define CBAUDEX 0000000 -# define CMSPAR 010000000000 /* mark or space (stick) parity */ -# define CRTSCTS 020000000000 /* flow control */ +# define CBAUD 000000037 +# define CBAUDEX 000000000 +# define CIBAUD 007600000 +# define IBSHIFT 16 #endif -#define B57600 00020 -#define B115200 00021 -#define B230400 00022 -#define B460800 00023 -#define B500000 00024 -#define B576000 00025 -#define B921600 00026 -#define B1000000 00027 -#define B1152000 00030 -#define B1500000 00031 -#define B2000000 00032 -#define B2500000 00033 -#define B3000000 00034 -#define B3500000 00035 -#define B4000000 00036 - -#define __MAX_BAUD B4000000 +#define __B57600 00020 +#define __B115200 00021 +#define __B230400 00022 +#define __B460800 00023 +#define __B500000 00024 +#define __B576000 00025 +#define __B921600 00026 +#define __B1000000 00027 +#define __B1152000 00030 +#define __B1500000 00031 +#define __B2000000 00032 +#define __B2500000 00033 +#define __B3000000 00034 +#define __B3500000 00035 +#define __B4000000 00036 +#define __BOTHER 00037 diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h index de4d5fc..f50e9ef 100644 --- a/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h +++ b/sysdeps/unix/sysv/linux/alpha/bits/termios-struct.h @@ -30,8 +30,15 @@ struct termios tcflag_t c_lflag; /* local mode flags */ cc_t c_cc[NCCS]; /* control characters */ cc_t c_line; /* line discipline (== c_cc[33]) */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ + /* Input and output baud rates. */ + __extension__ union { + speed_t __ispeed; + speed_t c_ispeed; + }; #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1 + __extension__ union { + speed_t __ospeed; + speed_t c_ospeed; + }; #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1 }; diff --git a/sysdeps/unix/sysv/linux/alpha/kernel-features.h b/sysdeps/unix/sysv/linux/alpha/kernel-features.h index 6eae48f..83fdf91 100644 --- a/sysdeps/unix/sysv/linux/alpha/kernel-features.h +++ b/sysdeps/unix/sysv/linux/alpha/kernel-features.h @@ -54,4 +54,15 @@ #undef __ASSUME_CLONE3 #define __ASSUME_CLONE3 0 +/* Alpha did not provide BOTHER, CIBAUD or the termios2 ioctls until + kernel 4.20. Even though struct __kernel_termios and struct + termios2 are the same on Alpha, Calling the legacy TCSETS* ioctls + with BOTHER set triggers a bug in these old kernels, so only use + the legacy TCSETS* ioctl numbers if neither BOTHER nor split speed is + needed; that way the code will fail gracefully. */ +#if __LINUX_KERNEL_VERSION < 0x041400 +# undef __ASSUME_TERMIOS2 +# define __ASSUME_TERMIOS2 0 +#endif + #endif /* _KERNEL_FEATURES_H */ diff --git a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h b/sysdeps/unix/sysv/linux/alpha/kernel_termios.h deleted file mode 100644 index 6a777dd..0000000 --- a/sysdeps/unix/sysv/linux/alpha/kernel_termios.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (C) 1997-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _KERNEL_TERMIOS_H -#define _KERNEL_TERMIOS_H 1 - -/* The following corresponds to the values from the Linux 2.1.20 kernel. */ - -/* We need the definition of tcflag_t, cc_t, and speed_t. */ -#include <termios.h> - -#define __KERNEL_NCCS 19 - -struct __kernel_termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_cc[__KERNEL_NCCS]; /* control characters */ - cc_t c_line; /* line discipline */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ - }; - -#define _HAVE_C_ISPEED 1 -#define _HAVE_C_OSPEED 1 - -#endif /* kernel_termios.h */ diff --git a/sysdeps/unix/sysv/linux/alpha/libc.abilist b/sysdeps/unix/sysv/linux/alpha/libc.abilist index d5df965..4b5736a 100644 --- a/sysdeps/unix/sysv/linux/alpha/libc.abilist +++ b/sysdeps/unix/sysv/linux/alpha/libc.abilist @@ -3099,6 +3099,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/alpha/libm.abilist b/sysdeps/unix/sysv/linux/alpha/libm.abilist index db08345..4b383b1 100644 --- a/sysdeps/unix/sysv/linux/alpha/libm.abilist +++ b/sysdeps/unix/sysv/linux/alpha/libm.abilist @@ -1428,6 +1428,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/unix/sysv/linux/alpha/termios_arch.h index 7b45fcd..20025f2 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S +++ b/sysdeps/unix/sysv/linux/alpha/termios_arch.h @@ -1,5 +1,6 @@ -/* Optimized strcmp implementation for POWER10/PPC64. - Copyright (C) 2021-2025 Free Software Foundation, Inc. +/* Architectural parameters for Linux termios - Alpha/PowerPC version + + Copyright (C) 1997-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,11 +17,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRCMP __strcmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) +#ifndef TERMIOS_INTERNALS_H +# error "<termios_arch.h> should only be included from <termios_internals.h>" +#endif -#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S> -#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */ +#define _TERMIOS2_NCCS 19 +#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1 +#define _HAVE_STRUCT_OLD_TERMIOS 0 diff --git a/sysdeps/unix/sysv/linux/arc/arch-syscall.h b/sysdeps/unix/sysv/linux/arc/arch-syscall.h index 01075e8..2534f0f 100644 --- a/sysdeps/unix/sysv/linux/arc/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/arc/arch-syscall.h @@ -177,6 +177,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/arc/libc.abilist b/sysdeps/unix/sysv/linux/arc/libc.abilist index c46c08d..b8a4478 100644 --- a/sysdeps/unix/sysv/linux/arc/libc.abilist +++ b/sysdeps/unix/sysv/linux/arc/libc.abilist @@ -2513,6 +2513,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/arc/libm.abilist b/sysdeps/unix/sysv/linux/arc/libm.abilist index 30b13b9..c865ec8 100644 --- a/sysdeps/unix/sysv/linux/arc/libm.abilist +++ b/sysdeps/unix/sysv/linux/arc/libm.abilist @@ -847,6 +847,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/arm/arch-syscall.h b/sysdeps/unix/sysv/linux/arm/arch-syscall.h index 9704472..8e585a4 100644 --- a/sysdeps/unix/sysv/linux/arm/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/arm/arch-syscall.h @@ -223,6 +223,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 371 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 322 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/arm/be/libc.abilist b/sysdeps/unix/sysv/linux/arm/be/libc.abilist index 4df150c..959e446 100644 --- a/sysdeps/unix/sysv/linux/arm/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/be/libc.abilist @@ -2805,6 +2805,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/arm/be/libm.abilist b/sysdeps/unix/sysv/linux/arm/be/libm.abilist index 825ba11..63bad09 100644 --- a/sysdeps/unix/sysv/linux/arm/be/libm.abilist +++ b/sysdeps/unix/sysv/linux/arm/be/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/arm/le/libc.abilist b/sysdeps/unix/sysv/linux/arm/le/libc.abilist index be29478..a930d1a 100644 --- a/sysdeps/unix/sysv/linux/arm/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/arm/le/libc.abilist @@ -2802,6 +2802,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/arm/le/libm.abilist b/sysdeps/unix/sysv/linux/arm/le/libm.abilist index 825ba11..63bad09 100644 --- a/sysdeps/unix/sysv/linux/arm/le/libm.abilist +++ b/sysdeps/unix/sysv/linux/arm/le/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/bits/ioctls.h b/sysdeps/unix/sysv/linux/bits/ioctls.h index 7e226e4..1ddcd4f 100644 --- a/sysdeps/unix/sysv/linux/bits/ioctls.h +++ b/sysdeps/unix/sysv/linux/bits/ioctls.h @@ -22,87 +22,4 @@ /* Use the definitions from the kernel header files. */ #include <asm/ioctls.h> -/* Routing table calls. */ -#define SIOCADDRT 0x890B /* add routing table entry */ -#define SIOCDELRT 0x890C /* delete routing table entry */ -#define SIOCRTMSG 0x890D /* call to routing system */ - -/* Socket configuration controls. */ -#define SIOCGIFNAME 0x8910 /* get iface name */ -#define SIOCSIFLINK 0x8911 /* set iface channel */ -#define SIOCGIFCONF 0x8912 /* get iface list */ -#define SIOCGIFFLAGS 0x8913 /* get flags */ -#define SIOCSIFFLAGS 0x8914 /* set flags */ -#define SIOCGIFADDR 0x8915 /* get PA address */ -#define SIOCSIFADDR 0x8916 /* set PA address */ -#define SIOCGIFDSTADDR 0x8917 /* get remote PA address */ -#define SIOCSIFDSTADDR 0x8918 /* set remote PA address */ -#define SIOCGIFBRDADDR 0x8919 /* get broadcast PA address */ -#define SIOCSIFBRDADDR 0x891a /* set broadcast PA address */ -#define SIOCGIFNETMASK 0x891b /* get network PA mask */ -#define SIOCSIFNETMASK 0x891c /* set network PA mask */ -#define SIOCGIFMETRIC 0x891d /* get metric */ -#define SIOCSIFMETRIC 0x891e /* set metric */ -#define SIOCGIFMEM 0x891f /* get memory address (BSD) */ -#define SIOCSIFMEM 0x8920 /* set memory address (BSD) */ -#define SIOCGIFMTU 0x8921 /* get MTU size */ -#define SIOCSIFMTU 0x8922 /* set MTU size */ -#define SIOCSIFNAME 0x8923 /* set interface name */ -#define SIOCSIFHWADDR 0x8924 /* set hardware address */ -#define SIOCGIFENCAP 0x8925 /* get/set encapsulations */ -#define SIOCSIFENCAP 0x8926 -#define SIOCGIFHWADDR 0x8927 /* Get hardware address */ -#define SIOCGIFSLAVE 0x8929 /* Driver slaving support */ -#define SIOCSIFSLAVE 0x8930 -#define SIOCADDMULTI 0x8931 /* Multicast address lists */ -#define SIOCDELMULTI 0x8932 -#define SIOCGIFINDEX 0x8933 /* name -> if_index mapping */ -#define SIOGIFINDEX SIOCGIFINDEX /* misprint compatibility :-) */ -#define SIOCSIFPFLAGS 0x8934 /* set/get extended flags set */ -#define SIOCGIFPFLAGS 0x8935 -#define SIOCDIFADDR 0x8936 /* delete PA address */ -#define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */ -#define SIOCGIFCOUNT 0x8938 /* get number of devices */ - -#define SIOCGIFBR 0x8940 /* Bridging support */ -#define SIOCSIFBR 0x8941 /* Set bridging options */ - -#define SIOCGIFTXQLEN 0x8942 /* Get the tx queue length */ -#define SIOCSIFTXQLEN 0x8943 /* Set the tx queue length */ - - -/* ARP cache control calls. */ - /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ -#define SIOCDARP 0x8953 /* delete ARP table entry */ -#define SIOCGARP 0x8954 /* get ARP table entry */ -#define SIOCSARP 0x8955 /* set ARP table entry */ - -/* RARP cache control calls. */ -#define SIOCDRARP 0x8960 /* delete RARP table entry */ -#define SIOCGRARP 0x8961 /* get RARP table entry */ -#define SIOCSRARP 0x8962 /* set RARP table entry */ - -/* Driver configuration calls */ - -#define SIOCGIFMAP 0x8970 /* Get device parameters */ -#define SIOCSIFMAP 0x8971 /* Set device parameters */ - -/* DLCI configuration calls */ - -#define SIOCADDDLCI 0x8980 /* Create new DLCI device */ -#define SIOCDELDLCI 0x8981 /* Delete DLCI device */ - -/* Device private ioctl calls. */ - -/* These 16 ioctls are available to devices via the do_ioctl() device - vector. Each device should include this file and redefine these - names as their own. Because these are device dependent it is a good - idea _NOT_ to issue them to random objects and hope. */ - -#define SIOCDEVPRIVATE 0x89F0 /* to 89FF */ - -/* - * These 16 ioctl calls are protocol private - */ - -#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */ +#include <linux/sockios.h> diff --git a/sysdeps/unix/sysv/linux/bits/mman-shared.h b/sysdeps/unix/sysv/linux/bits/mman-shared.h index 3159097..0be4b47 100644 --- a/sysdeps/unix/sysv/linux/bits/mman-shared.h +++ b/sysdeps/unix/sysv/linux/bits/mman-shared.h @@ -43,10 +43,9 @@ # endif /* Access restrictions for pkey_alloc. */ -# ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -# define PKEY_DISABLE_WRITE 0x2 -# endif +# define PKEY_UNRESTRICTED 0x0 +# define PKEY_DISABLE_ACCESS 0x1 +# define PKEY_DISABLE_WRITE 0x2 __BEGIN_DECLS diff --git a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h index bbbb621..befd25a 100644 --- a/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h +++ b/sysdeps/unix/sysv/linux/bits/termios-c_cflag.h @@ -34,5 +34,7 @@ #define CLOCAL 0004000 #ifdef __USE_MISC -# define ADDRB 04000000000 +# define ADDRB 04000000000 +# define CMSPAR 010000000000 /* Mark or space (stick) parity. */ +# define CRTSCTS 020000000000 /* Flow control. */ #endif diff --git a/sysdeps/unix/sysv/linux/bits/termios-baud.h b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h index e63a3eb..b9aadff 100644 --- a/sysdeps/unix/sysv/linux/bits/termios-baud.h +++ b/sysdeps/unix/sysv/linux/bits/termios-cbaud.h @@ -17,32 +17,31 @@ <https://www.gnu.org/licenses/>. */ #ifndef _TERMIOS_H -# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead." +# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead." #endif #ifdef __USE_MISC # define CBAUD 000000010017 /* Baud speed mask (not in POSIX). */ # define CBAUDEX 000000010000 /* Extra baud speed mask, included in CBAUD. (not in POSIX). */ -# define CIBAUD 002003600000 /* Input baud rate (not used). */ -# define CMSPAR 010000000000 /* Mark or space (stick) parity. */ -# define CRTSCTS 020000000000 /* Flow control. */ +# define CIBAUD 002003600000 /* Input baud rate. */ +# define IBSHIFT 16 #endif /* Extra output baud rates (not in POSIX). */ -#define B57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B500000 0010005 -#define B576000 0010006 -#define B921600 0010007 -#define B1000000 0010010 -#define B1152000 0010011 -#define B1500000 0010012 -#define B2000000 0010013 -#define B2500000 0010014 -#define B3000000 0010015 -#define B3500000 0010016 -#define B4000000 0010017 -#define __MAX_BAUD B4000000 +#define __BOTHER 0010000 +#define __B57600 0010001 +#define __B115200 0010002 +#define __B230400 0010003 +#define __B460800 0010004 +#define __B500000 0010005 +#define __B576000 0010006 +#define __B921600 0010007 +#define __B1000000 0010010 +#define __B1152000 0010011 +#define __B1500000 0010012 +#define __B2000000 0010013 +#define __B2500000 0010014 +#define __B3000000 0010015 +#define __B3500000 0010016 +#define __B4000000 0010017 diff --git a/sysdeps/unix/sysv/linux/bits/termios-struct.h b/sysdeps/unix/sysv/linux/bits/termios-struct.h index 4c501a5..0aba1a4 100644 --- a/sysdeps/unix/sysv/linux/bits/termios-struct.h +++ b/sysdeps/unix/sysv/linux/bits/termios-struct.h @@ -29,8 +29,15 @@ struct termios tcflag_t c_lflag; /* local mode flags */ cc_t c_line; /* line discipline */ cc_t c_cc[NCCS]; /* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ + /* Input and output baud rates. */ + __extension__ union { + speed_t __ispeed; + speed_t c_ispeed; + }; #define _HAVE_STRUCT_TERMIOS_C_ISPEED 1 + __extension__ union { + speed_t __ospeed; + speed_t c_ospeed; + }; #define _HAVE_STRUCT_TERMIOS_C_OSPEED 1 }; diff --git a/sysdeps/unix/sysv/linux/bits/termios.h b/sysdeps/unix/sysv/linux/bits/termios.h index 3bd1e22..20746a0 100644 --- a/sysdeps/unix/sysv/linux/bits/termios.h +++ b/sysdeps/unix/sysv/linux/bits/termios.h @@ -24,35 +24,41 @@ typedef unsigned char cc_t; typedef unsigned int speed_t; typedef unsigned int tcflag_t; -#include <bits/termios-struct.h> +#ifdef _TERMIOS_H +# include <bits/termios-struct.h> +#endif + #include <bits/termios-c_cc.h> #include <bits/termios-c_iflag.h> #include <bits/termios-c_oflag.h> /* c_cflag bit meaning */ -#define B0 0000000 /* hang up */ -#define B50 0000001 -#define B75 0000002 -#define B110 0000003 -#define B134 0000004 -#define B150 0000005 -#define B200 0000006 -#define B300 0000007 -#define B600 0000010 -#define B1200 0000011 -#define B1800 0000012 -#define B2400 0000013 -#define B4800 0000014 -#define B9600 0000015 -#define B19200 0000016 -#define B38400 0000017 +#include <bits/termios-c_cflag.h> + #ifdef __USE_MISC -# define EXTA B19200 -# define EXTB B38400 +#define __B0 0000000 /* hang up */ +#define __B50 0000001 +#define __B75 0000002 +#define __B110 0000003 +#define __B134 0000004 +#define __B150 0000005 +#define __B200 0000006 +#define __B300 0000007 +#define __B600 0000010 +#define __B1200 0000011 +#define __B1800 0000012 +#define __B2400 0000013 +#define __B4800 0000014 +#define __B9600 0000015 +#define __B19200 0000016 +#define __B38400 0000017 +#include <bits/termios-cbaud.h> + +# define __EXTA __B19200 +# define __EXTB __B38400 +# define BOTHER __BOTHER #endif -#include <bits/termios-baud.h> -#include <bits/termios-c_cflag.h> #include <bits/termios-c_lflag.h> #ifdef __USE_MISC @@ -74,3 +80,5 @@ typedef unsigned int tcflag_t; #include <bits/termios-tcflow.h> #include <bits/termios-misc.h> + +#include <bits/termios-baud.h> diff --git a/sysdeps/unix/sysv/linux/cfsetspeed.c b/sysdeps/unix/sysv/linux/cfsetspeed.c new file mode 100644 index 0000000..8ce46f8 --- /dev/null +++ b/sysdeps/unix/sysv/linux/cfsetspeed.c @@ -0,0 +1,59 @@ +/* cfsetspeed(), Linux version. + Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <termios_internals.h> + +/* Set both the input and output baud rates stored in *TERMIOS_P to SPEED. */ +int +__cfsetspeed (struct termios *termios_p, speed_t speed) +{ + tcflag_t cbaud = ___speed_to_cbaud (speed); + + termios_p->c_ospeed = speed; + termios_p->c_ispeed = speed; + termios_p->c_cflag &= ~(CBAUD | CIBAUD); + termios_p->c_cflag |= cbaud | (cbaud << IBSHIFT); + + return 0; +} +libc_hidden_def (__cfsetspeed) +versioned_symbol (libc, __cfsetspeed, cfsetspeed, GLIBC_2_42); + +#if _TERMIOS_OLD_COMPAT + +int +attribute_compat_text_section +__old_cfsetspeed (old_termios_t *termios_p, speed_t speed) +{ + speed_t real_speed = ___cbaud_to_speed (speed, -1); + if (real_speed == (speed_t)-1) + return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); + +#if !_HAVE_STRUCT_OLD_TERMIOS + /* Otherwise these fields don't exist in old_termios_t */ + termios_p->c_ospeed = real_speed; + termios_p->c_ispeed = real_speed; +#endif + termios_p->c_cflag &= ~(CBAUD | CIBAUD); + termios_p->c_cflag |= speed | (speed << IBSHIFT); + + return 0; +} +compat_symbol (libc, __old_cfsetspeed, cfsetspeed, GLIBC_2_0); + +#endif /* _TERMIOS_OLD_COMPAT */ diff --git a/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c new file mode 100644 index 0000000..b1df5ed --- /dev/null +++ b/sysdeps/unix/sysv/linux/close_nocancel_nostatus.c @@ -0,0 +1,28 @@ +/* Linux close syscall implementation -- non-cancellable, no errno update. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <sysdep-cancel.h> +#include <not-cancel.h> + +void +__close_nocancel_nostatus (int fd) +{ + INTERNAL_SYSCALL_CALL (close, fd); +} +libc_hidden_def (__close_nocancel_nostatus) diff --git a/sysdeps/unix/sysv/linux/csky/arch-syscall.h b/sysdeps/unix/sysv/linux/csky/arch-syscall.h index a719a55..73fdba1 100644 --- a/sysdeps/unix/sysv/linux/csky/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/csky/arch-syscall.h @@ -184,6 +184,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/csky/libc.abilist b/sysdeps/unix/sysv/linux/csky/libc.abilist index f123757..6325fc1 100644 --- a/sysdeps/unix/sysv/linux/csky/libc.abilist +++ b/sysdeps/unix/sysv/linux/csky/libc.abilist @@ -2789,6 +2789,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/csky/libm.abilist b/sysdeps/unix/sysv/linux/csky/libm.abilist index 6560f3e..4ed463c 100644 --- a/sysdeps/unix/sysv/linux/csky/libm.abilist +++ b/sysdeps/unix/sysv/linux/csky/libm.abilist @@ -913,6 +913,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h index dc592c5..d8ffab9 100644 --- a/sysdeps/unix/sysv/linux/hppa/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/hppa/arch-syscall.h @@ -214,6 +214,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 326 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 275 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/hppa/libc.abilist b/sysdeps/unix/sysv/linux/hppa/libc.abilist index 2dc85b9..86b3fbd 100644 --- a/sysdeps/unix/sysv/linux/hppa/libc.abilist +++ b/sysdeps/unix/sysv/linux/hppa/libc.abilist @@ -2826,6 +2826,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/hppa/libm.abilist b/sysdeps/unix/sysv/linux/hppa/libm.abilist index 2938d9d..d681d6e 100644 --- a/sysdeps/unix/sysv/linux/hppa/libm.abilist +++ b/sysdeps/unix/sysv/linux/hppa/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/i386/arch-syscall.h b/sysdeps/unix/sysv/linux/i386/arch-syscall.h index c10897f..196dfec 100644 --- a/sysdeps/unix/sysv/linux/i386/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/i386/arch-syscall.h @@ -245,6 +245,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 342 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 295 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/i386/libc.abilist b/sysdeps/unix/sysv/linux/i386/libc.abilist index 1e38217..6555592 100644 --- a/sysdeps/unix/sysv/linux/i386/libc.abilist +++ b/sysdeps/unix/sysv/linux/i386/libc.abilist @@ -3009,6 +3009,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/i386/libm.abilist b/sysdeps/unix/sysv/linux/i386/libm.abilist index e9f296c..de77b0f 100644 --- a/sysdeps/unix/sysv/linux/i386/libm.abilist +++ b/sysdeps/unix/sysv/linux/i386/libm.abilist @@ -1308,6 +1308,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/if_index.c b/sysdeps/unix/sysv/linux/if_index.c index 0b01fd1..5d13759 100644 --- a/sysdeps/unix/sysv/linux/if_index.c +++ b/sysdeps/unix/sysv/linux/if_index.c @@ -32,35 +32,23 @@ unsigned int __if_nametoindex (const char *ifname) { -#ifndef SIOCGIFINDEX - __set_errno (ENOSYS); - return 0; -#else - struct ifreq ifr; if (strlen (ifname) >= IFNAMSIZ) { __set_errno (ENODEV); return 0; } - strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name)); - int fd = __opensock (); - if (fd < 0) return 0; - if (__ioctl (fd, SIOCGIFINDEX, &ifr) < 0) - { - int saved_errno = errno; - __close_nocancel_nostatus (fd); - if (saved_errno == EINVAL) - __set_errno (ENOSYS); - return 0; - } + struct ifreq ifr; + strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name)); + + int status = __ioctl (fd, SIOCGIFINDEX, &ifr); __close_nocancel_nostatus (fd); - return ifr.ifr_ifindex; -#endif + + return status < 0 ? 0 : ifr.ifr_ifindex; } libc_hidden_def (__if_nametoindex) weak_alias (__if_nametoindex, if_nametoindex) @@ -83,8 +71,8 @@ weak_alias (__if_freenameindex, if_freenameindex) libc_hidden_weak (if_freenameindex) -static struct if_nameindex * -if_nameindex_netlink (void) +struct if_nameindex * +__if_nameindex (void) { struct netlink_handle nh = { 0, 0, 0, NULL, NULL }; struct if_nameindex *idx = NULL; @@ -196,19 +184,6 @@ if_nameindex_netlink (void) return idx; } - - -struct if_nameindex * -__if_nameindex (void) -{ -#ifndef SIOCGIFINDEX - __set_errno (ENOSYS); - return NULL; -#else - struct if_nameindex *result = if_nameindex_netlink (); - return result; -#endif -} weak_alias (__if_nameindex, if_nameindex) libc_hidden_weak (if_nameindex) diff --git a/sysdeps/unix/sysv/linux/isatty.c b/sysdeps/unix/sysv/linux/isatty.c new file mode 100644 index 0000000..3faaec5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/isatty.c @@ -0,0 +1,29 @@ +/* Test whether a file descriptor refers to a terminal. Linux version. + Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <termios_internals.h> + +/* Return 1 if FD is a terminal, 0 if not. This simply does a + TCGETS2 ioctl into a dummy buffer without parsing the result. */ +int +__isatty (int fd) +{ + struct termios2 k_termios; + return INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0; +} +weak_alias (__isatty, isatty) diff --git a/sysdeps/unix/sysv/linux/isatty_nostatus.c b/sysdeps/unix/sysv/linux/isatty_nostatus.c new file mode 100644 index 0000000..406decb --- /dev/null +++ b/sysdeps/unix/sysv/linux/isatty_nostatus.c @@ -0,0 +1,26 @@ +/* Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <termios_internals.h> + +/* Return 1 if FD is a terminal, 0 if not, without changing errno */ +int +__isatty_nostatus (int fd) +{ + struct termios2 k_termios; + return INTERNAL_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios) == 0; +} diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h index 86b2d3c..a49a915 100644 --- a/sysdeps/unix/sysv/linux/kernel-features.h +++ b/sysdeps/unix/sysv/linux/kernel-features.h @@ -54,6 +54,10 @@ configurations). */ #define __ASSUME_SET_ROBUST_LIST 1 +/* The termios2 interface was introduced across all architectures except + Alpha in kernel 2.6.22. */ +#define __ASSUME_TERMIOS2 1 + /* Support for various CLOEXEC and NONBLOCK flags was added in 2.6.27. */ #define __ASSUME_IN_NONBLOCK 1 diff --git a/sysdeps/unix/sysv/linux/libc_sigaction.c b/sysdeps/unix/sysv/linux/libc_sigaction.c index bbfc177..67dbc04 100644 --- a/sysdeps/unix/sysv/linux/libc_sigaction.c +++ b/sysdeps/unix/sysv/linux/libc_sigaction.c @@ -49,7 +49,7 @@ __libc_sigaction (int sig, const struct sigaction *act, struct sigaction *oact) { kact.k_sa_handler = act->sa_handler; memcpy (&kact.sa_mask, &act->sa_mask, sizeof (sigset_t)); - kact.sa_flags = act->sa_flags; + kact.sa_flags = (unsigned int) act->sa_flags; SET_SA_RESTORER (&kact, act); } diff --git a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h index f123d84..f57a152 100644 --- a/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/loongarch/arch-syscall.h @@ -171,6 +171,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist index 927fc21..a6cab96 100644 --- a/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist +++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist @@ -2273,6 +2273,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist index 8e35285..4b3ea80 100644 --- a/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist +++ b/sysdeps/unix/sysv/linux/loongarch/lp64/libm.abilist @@ -1148,6 +1148,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h index 715809a..a95cb41 100644 --- a/sysdeps/unix/sysv/linux/m68k/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/m68k/arch-syscall.h @@ -234,6 +234,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 341 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 288 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist index 74da49d..7b7b72a 100644 --- a/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist @@ -2785,6 +2785,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist index 825ba11..63bad09 100644 --- a/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist +++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist index e5d6781..df398e4 100644 --- a/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist +++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist @@ -2952,6 +2952,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist index 45026df..9dba60b 100644 --- a/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist +++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libm.abilist @@ -974,6 +974,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h index 24e218f..fe08f5c 100644 --- a/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/microblaze/arch-syscall.h @@ -244,6 +244,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 372 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 295 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist index 4dbd4b6..ca8df6f 100644 --- a/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/be/libc.abilist @@ -2838,6 +2838,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist index a428778..5596e08 100644 --- a/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/be/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist index c5965bb..9508154 100644 --- a/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/le/libc.abilist @@ -2835,6 +2835,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist index a428778..5596e08 100644 --- a/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist +++ b/sysdeps/unix/sysv/linux/microblaze/le/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/mips/Versions b/sysdeps/unix/sysv/linux/mips/Versions index 9ea0fa6..48f0037 100644 --- a/sysdeps/unix/sysv/linux/mips/Versions +++ b/sysdeps/unix/sysv/linux/mips/Versions @@ -26,6 +26,10 @@ libc { pthread_attr_setstack; pthread_attr_setstacksize; } + GLIBC_2.42 { + tcgetattr; + tcsetattr; + } GLIBC_PRIVATE { # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime). __vdso_clock_gettime; diff --git a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h b/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h deleted file mode 100644 index ef69821..0000000 --- a/sysdeps/unix/sysv/linux/mips/bits/termios-struct.h +++ /dev/null @@ -1,34 +0,0 @@ -/* struct termios definition. Linux/mips version. - Copyright (C) 2019-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _TERMIOS_H -# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead." -#endif - -#define NCCS 32 -struct termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ -#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0 -#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0 - }; diff --git a/sysdeps/unix/sysv/linux/mips/kernel_termios.h b/sysdeps/unix/sysv/linux/mips/kernel_termios.h deleted file mode 100644 index fd8d35a..0000000 --- a/sysdeps/unix/sysv/linux/mips/kernel_termios.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 1997-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _KERNEL_TERMIOS_H -#define _KERNEL_TERMIOS_H 1 -/* The following corresponds to the values from the Linux 2.1.24 kernel. */ - -#define __KERNEL_NCCS 23 - -struct __kernel_termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[__KERNEL_NCCS]; /* control characters */ - }; - -#define _HAVE_C_ISPEED 0 -#define _HAVE_C_OSPEED 0 - -#endif /* kernel_termios.h */ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h index a7615cb..7d76d65 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/arch-syscall.h @@ -229,6 +229,7 @@ #define __NR_open 4005 #define __NR_open_by_handle_at 4340 #define __NR_open_tree 4428 +#define __NR_open_tree_attr 4467 #define __NR_openat 4288 #define __NR_openat2 4437 #define __NR_pause 4029 diff --git a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist index 10715e0..4d51cc4 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist @@ -2913,7 +2913,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist index 1e13743..cdcc488 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist index 3d229b9..7f90fad 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist @@ -2911,7 +2911,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist index 8182a71..888164b 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/libm.abilist @@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h index 4d863c2..bca3ea6 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/arch-syscall.h @@ -212,6 +212,7 @@ #define __NR_open 6002 #define __NR_open_by_handle_at 6304 #define __NR_open_tree 6428 +#define __NR_open_tree_attr 6467 #define __NR_openat 6251 #define __NR_openat2 6437 #define __NR_pause 6033 diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist index e4cb452..fc366d1 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist @@ -2919,7 +2919,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h index 9b6683e..5bcd929 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/arch-syscall.h @@ -201,6 +201,7 @@ #define __NR_open 5002 #define __NR_open_by_handle_at 5299 #define __NR_open_tree 5428 +#define __NR_open_tree_attr 5467 #define __NR_openat 5247 #define __NR_openat2 5437 #define __NR_pause 5033 diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist index 8a32d25..debd5c3 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist @@ -2821,7 +2821,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/mips/termios_arch.h b/sysdeps/unix/sysv/linux/mips/termios_arch.h new file mode 100644 index 0000000..392d9aa --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/termios_arch.h @@ -0,0 +1,34 @@ +/* Architectural parameters for Linux termios - MIPS version + + Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _TERMIOS2_NCCS 23 +#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0 + +#define _HAVE_STRUCT_OLD_TERMIOS 1 + +#define OLD_NCCS 32 +struct old_termios +{ + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[OLD_NCCS]; /* control characters */ +}; diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h index ece3297..5ac6dd4 100644 --- a/sysdeps/unix/sysv/linux/not-cancel.h +++ b/sysdeps/unix/sysv/linux/not-cancel.h @@ -53,6 +53,9 @@ __typeof (__write) __write_nocancel; /* Uncancelable close. */ __typeof (__close) __close_nocancel; +/* Uncancellable close that does not also set errno in case of failure. */ +void __close_nocancel_nostatus (int); + /* Uncancelable fcntl. */ int __fcntl64_nocancel (int, int, ...); @@ -65,17 +68,10 @@ hidden_proto (__read_nocancel) hidden_proto (__pread64_nocancel) hidden_proto (__write_nocancel) hidden_proto (__close_nocancel) +hidden_proto (__close_nocancel_nostatus) hidden_proto (__fcntl64_nocancel) #endif -/* Non cancellable close syscall that does not also set errno in case of - failure. */ -static inline void -__close_nocancel_nostatus (int fd) -{ - __close_nocancel (fd); -} - /* Non cancellable writev syscall that does not also set errno in case of failure. */ static inline void diff --git a/sysdeps/unix/sysv/linux/old_termios.h b/sysdeps/unix/sysv/linux/old_termios.h new file mode 100644 index 0000000..56d19ba --- /dev/null +++ b/sysdeps/unix/sysv/linux/old_termios.h @@ -0,0 +1,23 @@ +/* old_termios.h for Linux other than MIPS and SPARC + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* By default, no old termios structure */ +#define _HAVE_STRUCT_OLD_TERMIOS 0 +#define OLD_NCCS NCCS +typedef struct termios old_termios_t; diff --git a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h index a071c76..c2a1d51 100644 --- a/sysdeps/unix/sysv/linux/or1k/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/or1k/arch-syscall.h @@ -183,6 +183,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_or1k_atomic 244 diff --git a/sysdeps/unix/sysv/linux/or1k/libc.abilist b/sysdeps/unix/sysv/linux/or1k/libc.abilist index 64dac95..b62d59f 100644 --- a/sysdeps/unix/sysv/linux/or1k/libc.abilist +++ b/sysdeps/unix/sysv/linux/or1k/libc.abilist @@ -2263,6 +2263,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/or1k/libm.abilist b/sysdeps/unix/sysv/linux/or1k/libm.abilist index 029c3cb..bef7a98 100644 --- a/sysdeps/unix/sysv/linux/or1k/libm.abilist +++ b/sysdeps/unix/sysv/linux/or1k/libm.abilist @@ -847,6 +847,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h index 9ea8cfb..a90d581 100644 --- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h +++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-c_cflag.h @@ -35,5 +35,7 @@ #define CLOCAL 00100000 #ifdef __USE_MISC -# define ADDRB 04000000000 +# define ADDRB 04000000000 +# define CMSPAR 010000000000 /* Mark or space (stick) parity. */ +# define CRTSCTS 020000000000 /* Flow control. */ #endif diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h index 374d9f8..7bcbba4 100644 --- a/sysdeps/unix/sysv/linux/powerpc/bits/termios-baud.h +++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios-cbaud.h @@ -17,29 +17,29 @@ <https://www.gnu.org/licenses/>. */ #ifndef _TERMIOS_H -# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead." +# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead." #endif #ifdef __USE_MISC -# define CBAUD 0000377 -# define CBAUDEX 0000020 -# define CMSPAR 010000000000 /* mark or space (stick) parity */ -# define CRTSCTS 020000000000 /* flow control */ +# define CBAUD 000000377 +# define CBAUDEX 000000020 +# define CIBAUD 077600000 +# define IBSHIFT 16 #endif -#define B57600 00020 -#define B115200 00021 -#define B230400 00022 -#define B460800 00023 -#define B500000 00024 -#define B576000 00025 -#define B921600 00026 -#define B1000000 00027 -#define B1152000 00030 -#define B1500000 00031 -#define B2000000 00032 -#define B2500000 00033 -#define B3000000 00034 -#define B3500000 00035 -#define B4000000 00036 -#define __MAX_BAUD B4000000 +#define __B57600 00020 +#define __B115200 00021 +#define __B230400 00022 +#define __B460800 00023 +#define __B500000 00024 +#define __B576000 00025 +#define __B921600 00026 +#define __B1000000 00027 +#define __B1152000 00030 +#define __B1500000 00031 +#define __B2000000 00032 +#define __B2500000 00033 +#define __B3000000 00034 +#define __B3500000 00035 +#define __B4000000 00036 +#define __BOTHER 00037 diff --git a/sysdeps/unix/sysv/linux/powerpc/configure b/sysdeps/unix/sysv/linux/powerpc/configure index 61ae675..ef2055d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/configure +++ b/sysdeps/unix/sysv/linux/powerpc/configure @@ -40,48 +40,7 @@ fi printf "%s\n" "$libc_cv_mlong_double_128ibm" >&6; } if test "$libc_cv_mlong_double_128ibm" = no; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC $CFLAGS supports -mabi=ibmlongdouble" >&5 -printf %s "checking whether $CC $CFLAGS supports -mabi=ibmlongdouble... " >&6; } -if test ${libc_cv_mabi_ibmlongdouble+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include <float.h> -int -main (void) -{ - -#if LDBL_MANT_DIG != 106 -# error "compiler doesn't implement IBM extended format of long double" -#endif -long double foobar (long double x) { return x; } - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - libc_cv_mabi_ibmlongdouble=yes -else case e in #( - e) libc_cv_mabi_ibmlongdouble=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS="$save_CFLAGS" ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mabi_ibmlongdouble" >&5 -printf "%s\n" "$libc_cv_mabi_ibmlongdouble" >&6; } - - if test "$libc_cv_mabi_ibmlongdouble" = yes; then - CFLAGS="$CFLAGS -mabi=ibmlongdouble" - else - as_fn_error $? "this configuration requires -mlong-double-128 IBM extended format support" "$LINENO" 5 - fi + CFLAGS="$CFLAGS -mabi=ibmlongdouble" fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports --no-tls-get-addr-optimize" >&5 diff --git a/sysdeps/unix/sysv/linux/powerpc/configure.ac b/sysdeps/unix/sysv/linux/powerpc/configure.ac index 8d2ec60..42347a6 100644 --- a/sysdeps/unix/sysv/linux/powerpc/configure.ac +++ b/sysdeps/unix/sysv/linux/powerpc/configure.ac @@ -16,24 +16,7 @@ long double foobar (long double x) { return x; }]])], CFLAGS="$save_CFLAGS"]) if test "$libc_cv_mlong_double_128ibm" = no; then - AC_CACHE_CHECK(whether $CC $CFLAGS supports -mabi=ibmlongdouble, - libc_cv_mabi_ibmlongdouble, [dnl - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -mlong-double-128 -mabi=ibmlongdouble" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <float.h>]], [[ -#if LDBL_MANT_DIG != 106 -# error "compiler doesn't implement IBM extended format of long double" -#endif -long double foobar (long double x) { return x; }]])], - libc_cv_mabi_ibmlongdouble=yes, - libc_cv_mabi_ibmlongdouble=no) - CFLAGS="$save_CFLAGS"]) - - if test "$libc_cv_mabi_ibmlongdouble" = yes; then - CFLAGS="$CFLAGS -mabi=ibmlongdouble" - else - AC_MSG_ERROR([this configuration requires -mlong-double-128 IBM extended format support]) - fi + CFLAGS="$CFLAGS -mabi=ibmlongdouble" fi LIBC_LINKER_FEATURE([--no-tls-get-addr-optimize], [-Wl,--no-tls-get-addr-optimize], diff --git a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h b/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h deleted file mode 100644 index f6ea570..0000000 --- a/sysdeps/unix/sysv/linux/powerpc/kernel_termios.h +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (C) 1997-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _KERNEL_TERMIOS_H -#define _KERNEL_TERMIOS_H 1 - -/* We need the definition of tcflag_t, cc_t, and speed_t. */ -#include <termios.h> - -#define __KERNEL_NCCS 19 - -struct __kernel_termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_cc[__KERNEL_NCCS]; /* control characters */ - cc_t c_line; /* line discipline */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ - }; - -#define _HAVE_C_ISPEED 1 -#define _HAVE_C_OSPEED 1 - -/* We have the kernel termios structure, so we can presume this code knows - what it's doing... */ - -#undef TCGETS -#undef TCSETS -#undef TCSETSW -#undef TCSETSF -#define TCGETS _IOR ('t', 19, struct __kernel_termios) -#define TCSETS _IOW ('t', 20, struct __kernel_termios) -#define TCSETSW _IOW ('t', 21, struct __kernel_termios) -#define TCSETSF _IOW ('t', 22, struct __kernel_termios) - -#endif /* kernel_termios.h */ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h index b3481e4..c371df8 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/arch-syscall.h @@ -235,6 +235,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 346 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 286 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist index cc5e93c..883e66f 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist @@ -3142,6 +3142,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist index a43cb2c..7f584d3 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libm.abilist @@ -1085,6 +1085,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist index 9814997..84cd9e0 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist @@ -3187,6 +3187,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist index 3a08e9f..d1cd4b1 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libm.abilist @@ -1084,6 +1084,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h index 45108e8..df8844d 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/arch-syscall.h @@ -220,6 +220,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 346 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 286 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist index 7f46295..8832568 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist @@ -2896,6 +2896,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist index 93796cd..bfc5310 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libm.abilist @@ -1078,6 +1078,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist index f24f81b..b6ff801 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist @@ -2972,6 +2972,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist index 7fe20c0..dedfefc 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libm.abilist @@ -1432,6 +1432,7 @@ GLIBC_2.41 tanpil F GLIBC_2.42 __compoundnieee128 F GLIBC_2.42 __pownieee128 F GLIBC_2.42 __powrieee128 F +GLIBC_2.42 __rootnieee128 F GLIBC_2.42 __rsqrtieee128 F GLIBC_2.42 compoundn F GLIBC_2.42 compoundnf F @@ -1457,6 +1458,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/powerpc/termios_arch.h b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h new file mode 100644 index 0000000..919b437 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/termios_arch.h @@ -0,0 +1,33 @@ +/* Architectural parameters for Linux termios - PowerPC version + + Copyright (C) 1997-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _TERMIOS2_NCCS 19 +#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 1 +#define _HAVE_STRUCT_OLD_TERMIOS 0 + +/* PowerPC quirk: on PowerPC only, ioctl() emulates the TCGETS/TCSETS* + ioctls with tcgetattr/tcsetattr using the glibc struct termios. + As struct termios2 is the same as the kernel struct termios on PowerPC, + simply consider the kernel ones as the termios2 interface, even + though the kernel doesn't call it that. */ + +#define TCGETS2 _IOR ('t', 19, struct termios2) +#define TCSETS2 _IOW ('t', 20, struct termios2) +#define TCSETSW2 _IOW ('t', 21, struct termios2) +#define TCSETSF2 _IOW ('t', 22, struct termios2) diff --git a/sysdeps/unix/sysv/linux/riscv/hwprobe.c b/sysdeps/unix/sysv/linux/riscv/hwprobe.c index e0cbd22..bc7f6f3 100644 --- a/sysdeps/unix/sysv/linux/riscv/hwprobe.c +++ b/sysdeps/unix/sysv/linux/riscv/hwprobe.c @@ -23,13 +23,13 @@ #include <sysdep-vdso.h> int __riscv_hwprobe (struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long int *cpus, + size_t cpusetsize, __RISCV_HWPROBE_CPUS_TYPE cpus, unsigned int flags) { int r; r = INTERNAL_VSYSCALL (riscv_hwprobe, 5, pairs, pair_count, - cpu_count, cpus, flags); + cpusetsize, cpus.__ul, flags); /* Negate negative errno values to match pthreads API. */ return -r; diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h index 5333879..1bae763 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/riscv/rv32/arch-syscall.h @@ -168,6 +168,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist index 9330c7a..1771a23 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist @@ -2516,6 +2516,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist index 454235d..9342294 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv32/libm.abilist @@ -1148,6 +1148,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h index eed1dff..1a1ebf8 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/riscv/rv64/arch-syscall.h @@ -175,6 +175,7 @@ #define __NR_nfsservctl 42 #define __NR_open_by_handle_at 265 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 56 #define __NR_openat2 437 #define __NR_perf_event_open 241 diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist index ea4555d..4b48352 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist @@ -2716,6 +2716,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist index b01d2b4..76e74c9 100644 --- a/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist +++ b/sysdeps/unix/sysv/linux/riscv/rv64/libm.abilist @@ -1245,6 +1245,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h index bebad6c..40415aa 100644 --- a/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h +++ b/sysdeps/unix/sysv/linux/riscv/sys/hwprobe.h @@ -21,6 +21,7 @@ #define _SYS_HWPROBE_H 1 #include <features.h> +#include <sched.h> #include <stddef.h> #include <errno.h> #ifdef __has_include @@ -63,22 +64,39 @@ struct riscv_hwprobe { __BEGIN_DECLS -extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs, size_t __pair_count, - size_t __cpu_count, unsigned long int *__cpus, +#if defined __cplusplus || !__GNUC_PREREQ (2, 7) +# define __RISCV_HWPROBE_CPUS_TYPE cpu_set_t * +#else +/* The fourth argument to __riscv_hwprobe should be a null pointer or a + pointer to a cpu_set_t (either the fixed-size type or allocated with + CPU_ALLOC). However, early versions of this header file used the + argument type unsigned long int *. The transparent union allows + the argument to be either cpu_set_t * or unsigned long int * for + compatibility. The older header file requiring unsigned long int * + can be identified by the lack of the __RISCV_HWPROBE_CPUS_TYPE macro. + In C++ and with compilers that do not support transparent unions, the + argument type must be cpu_set_t *. */ +typedef union { + cpu_set_t *__cs; + unsigned long int *__ul; +} __RISCV_HWPROBE_CPUS_TYPE __attribute__ ((__transparent_union__)); +# define __RISCV_HWPROBE_CPUS_TYPE __RISCV_HWPROBE_CPUS_TYPE +#endif + +extern int __riscv_hwprobe (struct riscv_hwprobe *__pairs, + size_t __pair_count, size_t __cpusetsize, + __RISCV_HWPROBE_CPUS_TYPE __cpus, unsigned int __flags) - __nonnull ((1)) __wur - __fortified_attr_access (__read_write__, 1, 2) - __fortified_attr_access (__read_only__, 4, 3); + __THROW __nonnull ((1)) __attr_access ((__read_write__, 1, 2)); -/* A pointer to the __riscv_hwprobe vDSO function is passed as the second +/* A pointer to the __riscv_hwprobe function is passed as the second argument to ifunc selector routines. Include a function pointer type for convenience in calling the function in those settings. */ -typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs, size_t __pair_count, - size_t __cpu_count, unsigned long int *__cpus, +typedef int (*__riscv_hwprobe_t) (struct riscv_hwprobe *__pairs, + size_t __pair_count, size_t __cpusetsize, + __RISCV_HWPROBE_CPUS_TYPE __cpus, unsigned int __flags) - __nonnull ((1)) __wur - __fortified_attr_access (__read_write__, 1, 2) - __fortified_attr_access (__read_only__, 4, 3); + __nonnull ((1)) __attr_access ((__read_write__, 1, 2)); /* Helper function usable from ifunc selectors that probes a single key. */ static __inline int diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h index ee015df..05e0e05 100644 --- a/sysdeps/unix/sysv/linux/riscv/sysdep.h +++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h @@ -145,11 +145,12 @@ # define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres" # define HAVE_CLOCK_GETTIME64_VSYSCALL "__vdso_clock_gettime" # define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday" +# define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom" # else # define VDSO_NAME "LINUX_5.4" # define VDSO_HASH 61765876 -/* RV32 does not support the gettime VDSO syscalls. */ +/* RV32 does not support the gettime and getrandom VDSO syscalls. */ # endif # define HAVE_CLONE3_WRAPPER 1 diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h index 0bf8f95..f77f39f 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/s390/s390-32/arch-syscall.h @@ -232,6 +232,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 336 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 288 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist index 3e625fa..f0decc7 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist @@ -3140,6 +3140,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist index b37c0b5..be2d177 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-32/libm.abilist @@ -1372,6 +1372,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h index 061f8db..65d6644 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/s390/s390-64/arch-syscall.h @@ -204,6 +204,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 336 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 288 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist index 46b4a04..da8a2bf 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist @@ -2933,6 +2933,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist index 42bfa28..7d7ba26 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist +++ b/sysdeps/unix/sysv/linux/s390/s390-64/libm.abilist @@ -1372,6 +1372,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/sh/arch-syscall.h b/sysdeps/unix/sysv/linux/sh/arch-syscall.h index 52cc320..5948ab0 100644 --- a/sysdeps/unix/sysv/linux/sh/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sh/arch-syscall.h @@ -228,6 +228,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 360 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 295 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/sh/be/libc.abilist b/sysdeps/unix/sysv/linux/sh/be/libc.abilist index 36a94c9..fb30341 100644 --- a/sysdeps/unix/sysv/linux/sh/be/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/be/libc.abilist @@ -2832,6 +2832,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/sh/be/libm.abilist b/sysdeps/unix/sysv/linux/sh/be/libm.abilist index 8ba29d2..5b0b080 100644 --- a/sysdeps/unix/sysv/linux/sh/be/libm.abilist +++ b/sysdeps/unix/sysv/linux/sh/be/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/sh/le/libc.abilist b/sysdeps/unix/sysv/linux/sh/le/libc.abilist index f79aba6..d716673 100644 --- a/sysdeps/unix/sysv/linux/sh/le/libc.abilist +++ b/sysdeps/unix/sysv/linux/sh/le/libc.abilist @@ -2829,6 +2829,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/sh/le/libm.abilist b/sysdeps/unix/sysv/linux/sh/le/libm.abilist index 8ba29d2..5b0b080 100644 --- a/sysdeps/unix/sysv/linux/sh/le/libm.abilist +++ b/sysdeps/unix/sysv/linux/sh/le/libm.abilist @@ -938,6 +938,12 @@ GLIBC_2.42 powrf32 F GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf32 F diff --git a/sysdeps/unix/sysv/linux/sparc/Versions b/sysdeps/unix/sysv/linux/sparc/Versions index f127bdf..7dd61a5 100644 --- a/sysdeps/unix/sysv/linux/sparc/Versions +++ b/sysdeps/unix/sysv/linux/sparc/Versions @@ -29,6 +29,10 @@ libc { __getshmlba; } + GLIBC_2.42 { + tcgetattr; + tcsetattr; + } GLIBC_PRIVATE { # nptl/pthread_cond_timedwait.c uses INTERNAL_VSYSCALL(clock_gettime). __vdso_clock_gettime; diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h index 677db7b..34eba18 100644 --- a/sysdeps/unix/sysv/linux/sparc/bits/termios-baud.h +++ b/sysdeps/unix/sysv/linux/sparc/bits/termios-cbaud.h @@ -17,30 +17,29 @@ <https://www.gnu.org/licenses/>. */ #ifndef _TERMIOS_H -# error "Never include <bits/termios-baud.h> directly; use <termios.h> instead." +# error "Never include <bits/termios-cbaud.h> directly; use <termios.h> instead." #endif #ifdef __USE_MISC # define CBAUD 0x0000100f # define CBAUDEX 0x00001000 -# define CIBAUD 0x100f0000 /* input baud rate (not used) */ -# define CMSPAR 0x40000000 /* mark or space (stick) parity */ -# define CRTSCTS 0x80000000 /* flow control */ +# define CIBAUD 0x100f0000 /* input baud rate */ +# define IBSHIFT 16 #endif -#define B57600 0x00001001 -#define B115200 0x00001002 -#define B230400 0x00001003 -#define B460800 0x00001004 -#define B76800 0x00001005 -#define B153600 0x00001006 -#define B307200 0x00001007 -#define B614400 0x00001008 -#define B921600 0x00001009 -#define B500000 0x0000100a -#define B576000 0x0000100b -#define B1000000 0x0000100c -#define B1152000 0x0000100d -#define B1500000 0x0000100e -#define B2000000 0x0000100f -#define __MAX_BAUD B2000000 +#define __B57600 0x00001001 +#define __B115200 0x00001002 +#define __B230400 0x00001003 +#define __B460800 0x00001004 +#define __B76800 0x00001005 +#define __B153600 0x00001006 +#define __B307200 0x00001007 +#define __B614400 0x00001008 +#define __B921600 0x00001009 +#define __B500000 0x0000100a +#define __B576000 0x0000100b +#define __B1000000 0x0000100c +#define __B1152000 0x0000100d +#define __B1500000 0x0000100e +#define __B2000000 0x0000100f +#define __BOTHER 0x00001000 diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h b/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h deleted file mode 100644 index 269ca9d..0000000 --- a/sysdeps/unix/sysv/linux/sparc/bits/termios-struct.h +++ /dev/null @@ -1,34 +0,0 @@ -/* struct termios definition. Linux/sparc version. - Copyright (C) 2019-2025 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - <https://www.gnu.org/licenses/>. */ - -#ifndef _TERMIOS_H -# error "Never include <bits/termios-struct.h> directly; use <termios.h> instead." -#endif - -#define NCCS 17 -struct termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ -#define _HAVE_STRUCT_TERMIOS_C_ISPEED 0 -#define _HAVE_STRUCT_TERMIOS_C_OSPEED 0 - }; diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h index ee870bc..85828a8 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/arch-syscall.h @@ -230,6 +230,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 333 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 284 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist index 4a6acc0..6deedf2 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist @@ -3161,7 +3161,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist index 4d10689..8107101 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libm.abilist @@ -1379,6 +1379,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h index 3acbebe..d83ecd1 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/arch-syscall.h @@ -211,6 +211,7 @@ #define __NR_open 5 #define __NR_open_by_handle_at 333 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 284 #define __NR_openat2 437 #define __NR_pause 29 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist index 931109d..1ce22bf 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist @@ -2797,7 +2797,19 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F +GLIBC_2.42 tcgetattr F +GLIBC_2.42 tcsetattr F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F GLIBC_2.42 ulabs F diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist index 6c64126..418ed9d 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libm.abilist @@ -1269,6 +1269,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/sparc/termios_arch.h b/sysdeps/unix/sysv/linux/sparc/termios_arch.h new file mode 100644 index 0000000..f3b3f65 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/termios_arch.h @@ -0,0 +1,34 @@ +/* Architectural parameters for Linux termios - SPARC version + + Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define _TERMIOS2_NCCS 19 +#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0 + +#define _HAVE_STRUCT_OLD_TERMIOS 1 + +#define OLD_NCCS 17 +struct old_termios +{ + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[OLD_NCCS]; /* control characters */ +}; diff --git a/sysdeps/unix/sysv/linux/speed.c b/sysdeps/unix/sysv/linux/speed.c index 017f741..4efb0de 100644 --- a/sysdeps/unix/sysv/linux/speed.c +++ b/sysdeps/unix/sysv/linux/speed.c @@ -16,82 +16,351 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <stddef.h> -#include <errno.h> -#include <termios.h> -#include <sysdep.h> +#include <termios_internals.h> -/* This is a gross hack around a kernel bug. If the cfsetispeed functions - is called with the SPEED argument set to zero this means use the same - speed as for output. But we don't have independent input and output - speeds and therefore cannot record this. +/* Conversions between legacy c_cflag fields and actual baud rates */ - We use an unused bit in the `c_iflag' field to keep track of this - use of `cfsetispeed'. The value here must correspond to the one used - in `tcsetattr.c'. */ -#define IBAUD0 020000000000 +/* These expressions may seem complicated; the _cbix() macro + compresses the CBAUD field into an index in the range 0-31. On most + Linux platforms, the CBAUD field is 5 bits, but the topmost bit + indicated by CBAUDEX, is discontinous with the rest. + + The resulting masks look like: + + Alpha PowerPC others + + CBAUD 0x001f 0x00ff 0x100f + CBAUDEX 0x0000 0x0010 0x1000 + + LOWCBAUD 0x001f 0x000f 0x000f + CBAUDMASK 0x001f 0x001f 0x100f + + CBAUDMASK is used to test for invalid values passed to the + compatibility functions or in termios::c_cflag on PowerPC. + + The divide-multiply sequence in the _cbix() macro gets converted + to shift and masks as necessary by the compiler. */ + +#define LOWCBAUD (CBAUD & (CBAUDEX-1)) +#define _cbix(x) (((x) & LOWCBAUD) | \ + (CBAUDEX ? ((x) & CBAUDEX)/CBAUDEX * (LOWCBAUD+1) : 0)) +#define CBAUDMASK (LOWCBAUD | CBAUDEX) + +/* Compile time sanity checks for broken CBAUD or CIBAUD definitions */ +#if CIBAUD != (CBAUD << IBSHIFT) +# error "CIBAUD should == CBAUD << IBSHIFT" +#elif CBAUDEX & (CBAUDEX-1) +# error "CBAUDEX should either be 0 or a single bit" +#elif !(CBAUD & 1) +# error "The CBAUD field should start at bit 0" +#elif CBAUDEX & ~CBAUD +# error "CBAUD should include the CBAUDEX bit" +#endif + +speed_t +___cbaud_to_speed (tcflag_t c_cflag, speed_t other) +{ + static const speed_t cbaudix_to_speed [] = + { + [0 ... _cbix(CBAUDMASK)] = -1, + [_cbix(__B0)] = 0, + [_cbix(__B50)] = 50, + [_cbix(__B75)] = 75, + [_cbix(__B110)] = 110, + [_cbix(__B134)] = 134, + [_cbix(__B150)] = 150, + [_cbix(__B200)] = 200, + [_cbix(__B300)] = 300, + [_cbix(__B600)] = 600, + [_cbix(__B1200)] = 1200, + [_cbix(__B1800)] = 1800, + [_cbix(__B2400)] = 2400, + [_cbix(__B4800)] = 4800, + [_cbix(__B9600)] = 9600, + [_cbix(__B19200)] = 19200, + [_cbix(__B38400)] = 38400, + [_cbix(__B57600)] = 57600, + [_cbix(__B115200)] = 115200, + [_cbix(__B230400)] = 230400, + [_cbix(__B460800)] = 460800, + [_cbix(__B500000)] = 500000, + [_cbix(__B576000)] = 576000, + [_cbix(__B921600)] = 921600, + [_cbix(__B1000000)] = 1000000, + [_cbix(__B1152000)] = 1152000, + [_cbix(__B1500000)] = 1500000, + [_cbix(__B2000000)] = 2000000, +#ifdef __B7200 + [_cbix(__B7200)] = 7200, +#endif +#ifdef __B14400 + [_cbix(__B14400)] = 14400, +#endif +#ifdef __B28800 + [_cbix(__B28800)] = 28800, +#endif +#ifdef __B76800 + [_cbix(__B76800)] = 76800, +#endif +#ifdef __B153600 + [_cbix(__B153600)] = 153600, +#endif +#ifdef __B307200 + [_cbix(__B307200)] = 307200, +#endif +#ifdef __B614400 + [_cbix(__B614400)] = 614400, +#endif +#ifdef __B2500000 + [_cbix(__B2500000)] = 2500000, +#endif +#ifdef __B3000000 + [_cbix(__B3000000)] = 3000000, +#endif +#ifdef __B3500000 + [_cbix(__B3500000)] = 3500000, +#endif +#ifdef __B4000000 + [_cbix(__B4000000)] = 4000000, +#endif + }; + speed_t speed; + + if (c_cflag & (tcflag_t)(~CBAUDMASK)) + return other; + + speed = cbaudix_to_speed[_cbix(c_cflag)]; + return speed == (speed_t)-1 ? other : speed; +} + +tcflag_t +___speed_to_cbaud (speed_t speed) +{ + switch (speed) { + case 0: + return __B0; + case 50: + return __B50; + case 75: + return __B75; + case 110: + return __B110; + case 134: + return __B134; + case 150: + return __B150; + case 200: + return __B200; + case 300: + return __B300; + case 600: + return __B600; + case 1200: + return __B1200; + case 1800: + return __B1800; + case 2400: + return __B2400; + case 4800: + return __B4800; + case 9600: + return __B9600; + case 19200: + return __B19200; + case 38400: + return __B38400; + case 57600: + return __B57600; + case 115200: + return __B115200; + case 230400: + return __B230400; + case 460800: + return __B460800; + case 500000: + return __B500000; + case 576000: + return __B576000; + case 921600: + return __B921600; + case 1000000: + return __B1000000; + case 1152000: + return __B1152000; + case 1500000: + return __B1500000; + case 2000000: + return __B2000000; +#ifdef __B76800 + case 76800: + return __B76800; +#endif +#ifdef __B153600 + case 153600: + return __B153600; +#endif +#ifdef __B307200 + case 307200: + return __B307200; +#endif +#ifdef __B614400 + case 614400: + return __B614400; +#endif +#ifdef __B2500000 + case 2500000: + return __B2500000; +#endif +#ifdef __B3000000 + case 3000000: + return __B3000000; +#endif +#ifdef __B3500000 + case 3500000: + return __B3500000; +#endif +#ifdef __B4000000 + case 4000000: + return __B4000000; +#endif + default: + return __BOTHER; + } +} + + +/* Canonicalize the representation of speed fields in a kernel + termios2 structure. Specifically, if there is a valid legacy cbaud + representation (not __BOTHER), use it and propagate the + corresponding speed value to ispeed/ospeed, otherwise the other way + around if possible. Finally, if the input speed is zero, copy the + output speed to the input speed. + + The kernel doesn't do this canonicalization, which can affect + legacy utilities, so do it here. + + This is used by tcgetattr() and tcsetattr(). */ +void +___termios2_canonicalize_speeds (struct termios2 *k_termios_p) +{ + k_termios_p->c_ospeed = + ___cbaud_to_speed (cbaud (k_termios_p->c_cflag), k_termios_p->c_ospeed); + k_termios_p->c_ispeed = + ___cbaud_to_speed (cibaud (k_termios_p->c_cflag), k_termios_p->c_ispeed); + + if (!k_termios_p->c_ispeed) + k_termios_p->c_ispeed = k_termios_p->c_ospeed; + + k_termios_p->c_cflag &= ~(CBAUD | CIBAUD); + k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ospeed); + k_termios_p->c_cflag |= ___speed_to_cbaud (k_termios_p->c_ispeed) << IBSHIFT; +} /* Return the output baud rate stored in *TERMIOS_P. */ speed_t -cfgetospeed (const struct termios *termios_p) +__cfgetospeed (const struct termios *termios_p) { - return termios_p->c_cflag & (CBAUD | CBAUDEX); + return termios_p->c_ospeed; } +libc_hidden_def (__cfgetospeed) +versioned_symbol (libc, __cfgetospeed, cfgetospeed, GLIBC_2_42); -/* Return the input baud rate stored in *TERMIOS_P. - Although for Linux there is no difference between input and output - speed, the numerical 0 is a special case for the input baud rate. It - should set the input baud rate to the output baud rate. */ +/* Return the input baud rate stored in *TERMIOS_P. */ speed_t -cfgetispeed (const struct termios *termios_p) +__cfgetispeed (const struct termios *termios_p) { - return ((termios_p->c_iflag & IBAUD0) - ? 0 : termios_p->c_cflag & (CBAUD | CBAUDEX)); + return termios_p->c_ispeed; } +libc_hidden_def (__cfgetispeed) +versioned_symbol (libc, __cfgetispeed, cfgetispeed, GLIBC_2_42); /* Set the output baud rate stored in *TERMIOS_P to SPEED. */ int -cfsetospeed (struct termios *termios_p, speed_t speed) +__cfsetospeed (struct termios *termios_p, speed_t speed) { - if ((speed & ~CBAUD) != 0 - && (speed < B57600 || speed > __MAX_BAUD)) - return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); + tcflag_t cbaud = ___speed_to_cbaud (speed); -#if _HAVE_STRUCT_TERMIOS_C_OSPEED termios_p->c_ospeed = speed; + termios_p->c_cflag &= ~CBAUD; + termios_p->c_cflag |= cbaud; + + return 0; +} +libc_hidden_def (__cfsetospeed) +versioned_symbol (libc, __cfsetospeed, cfsetospeed, GLIBC_2_42); + +/* Set the input baud rate stored in *TERMIOS_P to SPEED. */ +int +__cfsetispeed (struct termios *termios_p, speed_t speed) +{ + tcflag_t cbaud = ___speed_to_cbaud (speed); + + termios_p->c_ispeed = speed; + termios_p->c_cflag &= ~CIBAUD; + termios_p->c_cflag |= cbaud << IBSHIFT; + + return 0; +} +libc_hidden_def (__cfsetispeed) +versioned_symbol (libc, __cfsetispeed, cfsetispeed, GLIBC_2_42); + +#if _TERMIOS_OLD_COMPAT + +/* Legacy versions which returns cbaud-encoded speed_t values */ + +speed_t +attribute_compat_text_section +__old_cfgetospeed (const old_termios_t *termios_p) +{ + return cbaud (termios_p->c_cflag); +} +compat_symbol (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0); + +speed_t +attribute_compat_text_section +__old_cfgetispeed (const old_termios_t *termios_p) +{ + return cibaud (termios_p->c_cflag); +} +compat_symbol (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0); + +int +attribute_compat_text_section +__old_cfsetospeed (old_termios_t *termios_p, speed_t speed) +{ + speed_t real_speed = ___cbaud_to_speed (speed, -1); + if (real_speed == (speed_t)-1) + return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); + +#if !_HAVE_STRUCT_OLD_TERMIOS + /* Otherwise this field doesn't exist in old_termios_t */ + termios_p->c_ospeed = real_speed; #endif - termios_p->c_cflag &= ~(CBAUD | CBAUDEX); + termios_p->c_cflag &= ~CBAUD; termios_p->c_cflag |= speed; return 0; } -libc_hidden_def (cfsetospeed) +compat_symbol (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0); - -/* Set the input baud rate stored in *TERMIOS_P to SPEED. - Although for Linux there is no difference between input and output - speed, the numerical 0 is a special case for the input baud rate. It - should set the input baud rate to the output baud rate. */ int -cfsetispeed (struct termios *termios_p, speed_t speed) +attribute_compat_text_section +__old_cfsetispeed (old_termios_t *termios_p, speed_t speed) { - if ((speed & ~CBAUD) != 0 - && (speed < B57600 || speed > __MAX_BAUD)) + speed_t real_speed = ___cbaud_to_speed (speed, -1); + if (real_speed == (speed_t)-1) return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); -#if _HAVE_STRUCT_TERMIOS_C_ISPEED - termios_p->c_ispeed = speed; +#if !_HAVE_STRUCT_OLD_TERMIOS + /* Otherwise this field doesn't exist in old_termios_t */ + termios_p->c_ispeed = real_speed; #endif - if (speed == 0) - termios_p->c_iflag |= IBAUD0; - else - { - termios_p->c_iflag &= ~IBAUD0; - termios_p->c_cflag &= ~(CBAUD | CBAUDEX); - termios_p->c_cflag |= speed; - } + termios_p->c_cflag &= ~CIBAUD; + termios_p->c_cflag |= speed << IBSHIFT; return 0; } -libc_hidden_def (cfsetispeed) +compat_symbol (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0); + +#endif /* _TERMIOS_OLD_COMPAT */ diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 6f3351a..bde20e4 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 6.14. -kernel 6.14 +# The list of system calls is current as of Linux 6.15. +kernel 6.15 FAST_atomic_update FAST_cmpxchg @@ -316,6 +316,7 @@ olduname open open_by_handle_at open_tree +open_tree_attr openat openat2 or1k_atomic diff --git a/sysdeps/unix/sysv/linux/tcgetattr.c b/sysdeps/unix/sysv/linux/tcgetattr.c index d672e0c..ca17569 100644 --- a/sysdeps/unix/sysv/linux/tcgetattr.c +++ b/sysdeps/unix/sysv/linux/tcgetattr.c @@ -15,66 +15,56 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <errno.h> -#include <string.h> -#include <termios.h> -#include <unistd.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sysdep.h> - -/* The difference here is that the termios structure used in the - kernel is not the same as we use in the libc. Therefore we must - translate it here. */ -#include <kernel_termios.h> +#include <termios_internals.h> /* Put the state of FD into *TERMIOS_P. */ int __tcgetattr (int fd, struct termios *termios_p) { - struct __kernel_termios k_termios; - int retval; - - retval = INLINE_SYSCALL (ioctl, 3, fd, TCGETS, &k_termios); + struct termios2 k_termios; + long int retval = INLINE_SYSCALL_CALL (ioctl, fd, TCGETS2, &k_termios); - if (__glibc_likely (retval == 0)) + if (__glibc_likely (retval != -1)) { - termios_p->c_iflag = k_termios.c_iflag; - termios_p->c_oflag = k_termios.c_oflag; - termios_p->c_cflag = k_termios.c_cflag; - termios_p->c_lflag = k_termios.c_lflag; - termios_p->c_line = k_termios.c_line; -#if _HAVE_STRUCT_TERMIOS_C_ISPEED -# if _HAVE_C_ISPEED - termios_p->c_ispeed = k_termios.c_ispeed; -# else - termios_p->c_ispeed = k_termios.c_cflag & (CBAUD | CBAUDEX); -# endif -#endif -#if _HAVE_STRUCT_TERMIOS_C_OSPEED -# if _HAVE_C_OSPEED + ___termios2_canonicalize_speeds (&k_termios); + + memset (termios_p, 0, sizeof (*termios_p)); + termios_p->c_iflag = k_termios.c_iflag; + termios_p->c_oflag = k_termios.c_oflag; + termios_p->c_cflag = k_termios.c_cflag; + termios_p->c_lflag = k_termios.c_lflag; + termios_p->c_line = k_termios.c_line; termios_p->c_ospeed = k_termios.c_ospeed; -# else - termios_p->c_ospeed = k_termios.c_cflag & (CBAUD | CBAUDEX); -# endif -#endif - if (sizeof (cc_t) == 1 || _POSIX_VDISABLE == 0 - || (unsigned char) _POSIX_VDISABLE == (unsigned char) -1) - memset (__mempcpy (&termios_p->c_cc[0], &k_termios.c_cc[0], - __KERNEL_NCCS * sizeof (cc_t)), - _POSIX_VDISABLE, (NCCS - __KERNEL_NCCS) * sizeof (cc_t)); - else - { - memcpy (&termios_p->c_cc[0], &k_termios.c_cc[0], - __KERNEL_NCCS * sizeof (cc_t)); + termios_p->c_ispeed = k_termios.c_ispeed; - for (size_t cnt = __KERNEL_NCCS; cnt < NCCS; ++cnt) - termios_p->c_cc[cnt] = _POSIX_VDISABLE; - } + copy_c_cc (termios_p->c_cc, NCCS, k_termios.c_cc, _TERMIOS2_NCCS); } return retval; } - libc_hidden_def (__tcgetattr) + +#if _TERMIOS_OLD_COMPAT && _HAVE_STRUCT_OLD_TERMIOS + +versioned_symbol (libc, __tcgetattr, tcgetattr, GLIBC_2_42); + +/* Legacy version for shorter struct termios */ +int +attribute_compat_text_section +__old_tcgetattr (int fd, old_termios_t *termios_p) +{ + struct termios new_termios; + int retval = __tcgetattr (fd, &new_termios); + if (__glibc_likely (retval != -1)) + { + memcpy (termios_p, &new_termios, sizeof (*termios_p)); + } + return retval; +} +compat_symbol (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0); + +#else + weak_alias (__tcgetattr, tcgetattr) + +#endif diff --git a/sysdeps/unix/sysv/linux/tcsetattr.c b/sysdeps/unix/sysv/linux/tcsetattr.c index 5a13ad8..4f07a03 100644 --- a/sysdeps/unix/sysv/linux/tcsetattr.c +++ b/sysdeps/unix/sysv/linux/tcsetattr.c @@ -15,67 +15,94 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <errno.h> -#include <string.h> -#include <termios.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sysdep.h> - -/* The difference here is that the termios structure used in the - kernel is not the same as we use in the libc. Therefore we must - translate it here. */ -#include <kernel_termios.h> - - -/* This is a gross hack around a kernel bug. If the cfsetispeed functions - is called with the SPEED argument set to zero this means use the same - speed as for output. But we don't have independent input and output - speeds and therefore cannot record this. - - We use an unused bit in the `c_iflag' field to keep track of this - use of `cfsetispeed'. The value here must correspond to the one used - in `speed.c'. */ -#define IBAUD0 020000000000 +#include <termios_internals.h> +#define static_assert_equal(x,y) _Static_assert ((x) == (y), #x " != " #y) /* Set the state of FD to *TERMIOS_P. */ int __tcsetattr (int fd, int optional_actions, const struct termios *termios_p) { - struct __kernel_termios k_termios; - unsigned long int cmd; + struct termios2 k_termios; + unsigned long cmd; - switch (optional_actions) - { - case TCSANOW: - cmd = TCSETS; - break; - case TCSADRAIN: - cmd = TCSETSW; - break; - case TCSAFLUSH: - cmd = TCSETSF; - break; - default: - return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); - } + memset (&k_termios, 0, sizeof k_termios); - k_termios.c_iflag = termios_p->c_iflag & ~IBAUD0; + k_termios.c_iflag = termios_p->c_iflag; k_termios.c_oflag = termios_p->c_oflag; k_termios.c_cflag = termios_p->c_cflag; k_termios.c_lflag = termios_p->c_lflag; - k_termios.c_line = termios_p->c_line; -#if _HAVE_C_ISPEED && _HAVE_STRUCT_TERMIOS_C_ISPEED - k_termios.c_ispeed = termios_p->c_ispeed; -#endif -#if _HAVE_C_OSPEED && _HAVE_STRUCT_TERMIOS_C_OSPEED + k_termios.c_line = termios_p->c_line; + k_termios.c_ospeed = termios_p->c_ospeed; -#endif - memcpy (&k_termios.c_cc[0], &termios_p->c_cc[0], - __KERNEL_NCCS * sizeof (cc_t)); + k_termios.c_ispeed = termios_p->c_ispeed; + + ___termios2_canonicalize_speeds (&k_termios); + + copy_c_cc (k_termios.c_cc, _TERMIOS2_NCCS, termios_p->c_cc, NCCS); + + /* + * Choose the proper ioctl number to invoke. + * + * Alpha got TCSETS2 late (Linux 4.20), but has the same structure + * format, and it only needs TCSETS2 if either it needs to use + * __BOTHER or split speed. All other architectures have TCSETS2 as + * far back as the current glibc supports. Calling TCSETS with + * __BOTHER causes unpredictable results on old Alpha kernels and + * could even crash them. + */ + static_assert_equal(TCSADRAIN, TCSANOW + 1); + static_assert_equal(TCSAFLUSH, TCSANOW + 2); + static_assert_equal(TCSETSW2, TCSETS2 + 1); + static_assert_equal(TCSETSF2, TCSETS2 + 2); + static_assert_equal(TCSETSW, TCSETS + 1); + static_assert_equal(TCSETSF, TCSETS + 2); + + cmd = (long)optional_actions - TCSANOW; + if (cmd > 2) + return INLINE_SYSCALL_ERROR_RETURN_VALUE (EINVAL); + + if (__ASSUME_TERMIOS2 || + k_termios.c_ospeed != k_termios.c_ispeed || + cbaud (k_termios.c_cflag) == __BOTHER) + { + cmd += TCSETS2; + } + else + { + cmd += TCSETS; + k_termios.c_cflag &= ~CIBAUD; + } + + return INLINE_SYSCALL_CALL (ioctl, fd, cmd, &k_termios); +} +libc_hidden_def (__tcsetattr) + +#if _HAVE_STRUCT_OLD_TERMIOS && _TERMIOS_OLD_COMPAT + +versioned_symbol (libc, __tcsetattr, tcsetattr, GLIBC_2_42); - return INLINE_SYSCALL (ioctl, 3, fd, cmd, &k_termios); +/* Legacy version for shorter struct termios without speed fields */ +int +attribute_compat_text_section +__old_tcsetattr (int fd, int optional_actions, const old_termios_t *termios_p) +{ + struct termios new_termios; + + memset (&new_termios, 0, sizeof (new_termios)); + new_termios.c_iflag = termios_p->c_iflag; + new_termios.c_oflag = termios_p->c_oflag; + new_termios.c_cflag = termios_p->c_cflag; + new_termios.c_lflag = termios_p->c_lflag; + new_termios.c_line = termios_p->c_line; + copy_c_cc(new_termios.c_cc, NCCS, termios_p->c_cc, OLD_NCCS); + + return __tcsetattr (fd, optional_actions, &new_termios); } +compat_symbol (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0); + +#else + weak_alias (__tcsetattr, tcsetattr) -libc_hidden_def (tcsetattr) + +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/unix/sysv/linux/termios_arch.h index 4387908..8dbf420 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S +++ b/sysdeps/unix/sysv/linux/termios_arch.h @@ -1,4 +1,6 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. +/* Architectural parameters for Linux termios - generic version + + Copyright (C) 1997-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -15,11 +17,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if defined __LITTLE_ENDIAN__ && IS_IN (libc) -#define STRNCMP __strncmp_power10 - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) +#define _TERMIOS2_NCCS 19 +#define _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE 0 -#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S> -#endif +#define _HAVE_STRUCT_OLD_TERMIOS 0 diff --git a/sysdeps/unix/sysv/linux/termios_internals.h b/sysdeps/unix/sysv/linux/termios_internals.h new file mode 100644 index 0000000..e8dbfe7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/termios_internals.h @@ -0,0 +1,143 @@ +/* termios functions internal implementation header for Linux + + Copyright (C) 1991-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef TERMIOS_INTERNALS_H +#define TERMIOS_INTERNALS_H 1 + +#include <stddef.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <termios.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sysdep.h> +#include <shlib-compat.h> + +#include <termios_arch.h> + +/* ---- Kernel interface definitions ---- */ + +/* The the termios2 structure used in the kernel interfaces is not the + same as the termios structure we use in the libc. Therefore we + must translate it here. */ + +struct termios2 +{ + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ +#if _HAVE_TERMIOS2_C_CC_BEFORE_C_LINE + cc_t c_cc[_TERMIOS2_NCCS]; /* control characters */ + cc_t c_line; /* line discipline */ +#else + cc_t c_line; /* line discipline */ + cc_t c_cc[_TERMIOS2_NCCS]; /* control characters */ +#endif + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* Alpha got termios2 late, but TCGETS has exactly the same structure + format and function as TCGETS2. On all other platforms, the termios2 + interface exists as far back as this version of glibc supports. + + For TCSETS* it is more complicated; this is handled in tcsetattr.c. + + Some other architectures only have the equivalent of the termios2 + interface, in which case the old ioctl names are the only ones + presented, but are equivalent to the new ones. */ +#ifndef TCGETS2 +# define TCGETS2 TCGETS +# define TCSETS2 TCSETS +# define TCSETSW2 TCSETSW +# define TCSETSF2 TCSETSF +#elif !__ASSUME_TERMIOS2 +/* Hack for Alpha */ +# undef TCGETS2 +# define TCGETS2 TCGETS +#endif + +/* ---- Application interface definitions ---- */ + +/* + * Should old speed_t and struct termios (if applicable) compatibility + * functions be included? + */ +#define _TERMIOS_OLD_COMPAT SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_42) + +/* + * Old struct termios (without c_ispeed and c_ospeed fields) if + * applicable. The new struct termios *must* be binary identical up to + * the sizeof the old structure. + * + * This only applies to SPARC and MIPS; for other architectures the + * new and old speed_t interfaces both use the same struct termios. + */ +#if _HAVE_STRUCT_OLD_TERMIOS +typedef struct old_termios old_termios_t; +#else +# define OLD_NCCS NCCS +typedef struct termios old_termios_t; +#endif + +/* ---- Internal function definitions ---- */ + +/* + * Copy a set of c_cc fields of possibly different width. If the target + * field is longer, then fill with _POSIX_VDISABLE == -1. + */ +static inline void +copy_c_cc (cc_t *to, size_t nto, const cc_t *from, size_t nfrom) +{ + if (nto < nfrom) + nfrom = nto; + + to = __mempcpy (to, from, nfrom * sizeof(cc_t)); + if (nto > nfrom) + memset (to, _POSIX_VDISABLE, (nto - nfrom) * sizeof(cc_t)); +} + +/* Extract the output and input legacy speed fields from c_cflag. */ +static inline tcflag_t +cbaud (tcflag_t c_cflag) +{ + return c_cflag & CBAUD; +} + +static inline tcflag_t +cibaud (tcflag_t c_cflag) +{ + return cbaud (c_cflag >> IBSHIFT); +} + +extern speed_t +___cbaud_to_speed (tcflag_t c_cflag, speed_t other) + __attribute_const__ attribute_hidden; + +extern tcflag_t +___speed_to_cbaud (speed_t speed) + __attribute_const__ attribute_hidden; + +extern void +___termios2_canonicalize_speeds (struct termios2 *k_termios_p) + attribute_hidden; + +#endif /* TERMIOS_INTERNALS_H */ diff --git a/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c b/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c new file mode 100644 index 0000000..14fdf82 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-copy_file_range-large.c @@ -0,0 +1,239 @@ +/* Test for copy_file_range with large sizes (bug 33245). + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* This test exercises copy_file_range with various large file sizes + on FUSE filesystems to verify proper handling of system call return + values. No data is actually copied. */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <support/check.h> +#include <support/fuse.h> +#include <support/support.h> +#include <support/test-driver.h> +#include <support/xthread.h> +#include <support/xunistd.h> +#include <sys/stat.h> +#include <unistd.h> + +static void +fuse_thread (struct support_fuse *f, void *closure) +{ + /* Node IDs for our test files. */ + enum { NODE_SOURCE = 2, NODE_DEST = 3 }; + /* A large size, so that the kernel does not fail the + copy_file_range attempt before performing the FUSE callback. + Only the source file size matters to the kernel, but both files + use the same size for simplicity. */ + const uint64_t file_size = 1LLU << 61; + + struct fuse_in_header *inh; + while ((inh = support_fuse_next (f)) != NULL) + { + if (support_fuse_handle_mountpoint (f) + || (inh->nodeid == 1 && support_fuse_handle_directory (f))) + continue; + + switch (inh->opcode) + { + case FUSE_LOOKUP: + { + char *name = support_fuse_cast (LOOKUP, inh); + int node = 0; + if (inh->nodeid == 1 && strcmp (name, "source") == 0) + node = NODE_SOURCE; + else if (inh->nodeid == 1 && strcmp (name, "dest") == 0) + node = NODE_DEST; + + if (node != 0) + { + struct fuse_entry_out *out + = support_fuse_prepare_entry (f, node); + out->attr.mode = S_IFREG | 0600; + out->attr.size = file_size; + support_fuse_reply_prepared (f); + } + else + support_fuse_reply_error (f, ENOENT); + } + break; + + case FUSE_OPEN: + /* File open */ + { + if (inh->nodeid == NODE_SOURCE || inh->nodeid == NODE_DEST) + { + struct fuse_open_out out = { .fh = inh->nodeid }; + support_fuse_reply (f, &out, sizeof (out)); + } + else + support_fuse_reply_error (f, ENOENT); + } + break; + + case FUSE_GETATTR: + /* Get file attributes */ + if (inh->nodeid == NODE_SOURCE || inh->nodeid == NODE_DEST) + { + struct fuse_attr_out *out = support_fuse_prepare_attr (f); + out->attr.mode = S_IFREG | 0600; + out->attr.size = file_size; + support_fuse_reply_prepared (f); + } + else + support_fuse_reply_error (f, ENOENT); + break; + + case FUSE_COPY_FILE_RANGE: + { + struct fuse_copy_file_range_in *p + = support_fuse_cast (COPY_FILE_RANGE, inh); + + /* Verify this is a copy from source to dest, starting at + offset 0. */ + TEST_COMPARE (p->fh_in, NODE_SOURCE); + TEST_COMPARE (p->nodeid_out, NODE_DEST); + TEST_COMPARE (p->off_in, 0); + TEST_COMPARE (p->off_out, 0); + TEST_VERIFY (p->len > 0); + TEST_VERIFY (p->len <= file_size); + + /* Pretend the copy succeeded. */ + struct fuse_write_out out = { .size = p->len }; + support_fuse_reply (f, &out, sizeof (out)); + } + break; + + case FUSE_FLUSH: + support_fuse_reply_empty (f); + break; + + default: + support_fuse_reply_error (f, EIO); + } + } +} + +static void +test_size (struct support_fuse *f, off64_t size) +{ + /* On 32-bit targets, not all possible return values from + copy_file_range are representable. The current (Linux 6.5.18) + kernel FUSE implementation can produce negative non-error results + from copy_file_range in the range [1UL << 31, UINT_MAX - 4095], + but this seems to be a FUSE bug. */ + if (size != (ssize_t) size) + { + printf ("info:%s size 0x%llx is too large for ssize_t\n", + test_verbose ? " " : "", (unsigned long long int) size); + return; + } + + verbose_printf ("info: testing copy size 0x%llx\n", + (unsigned long long int) size); + + const char *mountpoint = support_fuse_mountpoint (f); + char *source_path = xasprintf ("%s/source", mountpoint); + char *dest_path = xasprintf ("%s/dest", mountpoint); + + int source_fd = xopen (source_path, O_RDONLY, 0); + int dest_fd = xopen (dest_path, O_WRONLY, 0); + + ssize_t copied = copy_file_range (source_fd, NULL, dest_fd, NULL, size, 0); + /* Avoid FAIL_UNSUPPORTED if it is likely bogus due to previous + copy_file_range successes. */ + if (copied == -1 && errno == ENOSYS) + { + /* Unmounting avoids a test hang on exit. */ + xclose (dest_fd); + xclose (source_fd); + support_fuse_unmount (f); + FAIL_UNSUPPORTED ("copy_file_range not supported"); + } + + TEST_COMPARE (copied, size); + + xclose (dest_fd); + xclose (source_fd); + free (dest_path); + free (source_path); +} + +static void +test_all_sizes (struct support_fuse *f) +{ + test_size (f, 0); /* Not actually handled by the callback. */ + test_size (f, 20); + test_size (f, 1 << 30); + test_size (f, INT_MAX); + for (int i = 0; i <= 5; ++i) + test_size (f, (1U << 31) + i); + for (int i = -4100; i <= -4090; ++i) + test_size (f, UINT_MAX + i); + for (int i = -100; i <= 0; ++i) + test_size (f, UINT_MAX + i); + + /* We would like to test larger values than UINT_MAX here, but they + do not work because the FUSE protocol uses uint32_t for the + copy_file_range result in struct fuse_write_out. */ +} + +static void * +test_cancel_state_variants (void *f_ptr) +{ + struct support_fuse *f = (struct support_fuse *) f_ptr; + + verbose_printf ("info: testing default cancellation settings\n"); + test_all_sizes (f); + + verbose_printf ("info: testing with cancellation disabled\n"); + TEST_COMPARE (pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL), 0); + test_all_sizes (f); + + verbose_printf ("info: testing with cancellation enabled\n"); + TEST_COMPARE (pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL), 0); + test_all_sizes (f); + + return NULL; +} + +static int +do_test (void) +{ + support_fuse_init (); + struct support_fuse *f = support_fuse_mount (fuse_thread, NULL); + + verbose_printf ("info: testing on main thread\n"); + test_cancel_state_variants (f); + + verbose_printf ("info: testing on secondary thread\n"); + TEST_VERIFY (xpthread_join (xpthread_create + (NULL, test_cancel_state_variants, f)) + == NULL); + + verbose_printf ("info: testing on separate thread\n"); + + support_fuse_unmount (f); + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-pkey.c b/sysdeps/unix/sysv/linux/tst-pkey.c index 4d12d2e..1000d8f 100644 --- a/sysdeps/unix/sysv/linux/tst-pkey.c +++ b/sysdeps/unix/sysv/linux/tst-pkey.c @@ -191,7 +191,7 @@ do_test (void) pthread_t delayed_thread = xpthread_create (NULL, &delayed_thread_func, &delayed_thread_check_access); - keys[0] = pkey_alloc (0, 0); + keys[0] = pkey_alloc (0, PKEY_UNRESTRICTED); if (keys[0] < 0) { if (errno == ENOSYS) @@ -333,7 +333,7 @@ do_test (void) if (i == allowed_key) { if (do_write) - TEST_COMPARE (pkey_set (keys[i], 0), 0); + TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0); else TEST_COMPARE (pkey_set (keys[i], PKEY_DISABLE_WRITE), 0); } @@ -360,7 +360,7 @@ do_test (void) inherit that access. */ for (int i = 0; i < key_count; ++i) { - TEST_COMPARE (pkey_set (keys[i], 0), 0); + TEST_COMPARE (pkey_set (keys[i], PKEY_UNRESTRICTED), 0); TEST_VERIFY (check_page_access (i, false)); TEST_VERIFY (check_page_access (i, true)); } diff --git a/sysdeps/unix/sysv/linux/tst-termios-linux.c b/sysdeps/unix/sysv/linux/tst-termios-linux.c new file mode 100644 index 0000000..e4b0c8b --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-termios-linux.c @@ -0,0 +1,592 @@ +/* Linux termios regression tests + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <termios.h> +#include <unistd.h> + +#include <shlib-compat.h> +#include <array_length.h> + +#include <support/check.h> +#include <support/namespace.h> +#include <support/support.h> +#include <support/temp_file.h> +#include <support/test-driver.h> +#include <support/tty.h> + +/* Evaluate an expression and make sure errno did not get set; return + the value of the expression */ +#define CHECKERR(expr) \ + ({ \ + errno = 0; \ + const __typeof (expr) _val = (expr); \ + TEST_COMPARE(errno, 0); \ + _val; \ + }) + +/* Evaluate an expression and verify that is return a specific value, + as well as errno not having been set. */ +#define VERIFY(expr,val) TEST_COMPARE(CHECKERR(expr), val) +/* Check for zero and errno not set */ +#define CHECKZERO(expr) VERIFY(expr, 0) + +/* Table of legacy speed constants */ + +#define BOGUS ((speed_t)-1) +#define ANY ((speed_t)-2) + +struct cbaud_table +{ + speed_t speed; + speed_t cbaud; + const char *name; +}; + +static const struct cbaud_table cbaud_table [] = +{ + { 0, __B0, "__B0" }, + { 50, __B50, "__B50" }, + { 75, __B75, "__B75" }, + { 110, __B110, "__B110" }, + { 134, __B134, "__B134" }, + { 150, __B150, "__B150" }, + { 200, __B200, "__B200" }, + { 300, __B300, "__B300" }, + { 600, __B600, "__B600" }, + { 1200, __B1200, "__B1200" }, + { 1800, __B1800, "__B1800" }, + { 2400, __B2400, "__B2400" }, + { 4800, __B4800, "__B4800" }, +#ifdef __B7200 + { 7200, __B7200, "__B7200" }, +#endif + { 9600, __B9600, "__B9600" }, +#ifdef __B14400 + { 14400, __B14400, "__B14400" }, +#endif + { 19200, __B19200, "__B19200" }, +#ifdef __B28800 + { 28800, __B28800, "__B28800" }, +#endif + { 38400, __B38400, "__B38400" }, + { 57600, __B57600, "__B57600" }, +#ifdef __B76800 + { 76800, __B76800, "__B76800" }, +#endif + { 115200, __B115200, "__B115200" }, +#ifdef __B153600 + { 153600, __B153600, "__B153600" }, +#endif + { 230400, __B230400, "__B230400" }, +#ifdef __B307200 + { 307200, __B307200, "__B307200" }, +#endif + { 460800, __B460800, "__B460800" }, + { 500000, __B500000, "__B500000" }, + { 576000, __B576000, "__B576000" }, +#ifdef __B614400 + { 614400, __B614400, "__B614400" }, +#endif + { 921600, __B921600, "__B921600" }, + { 1000000, __B1000000, "__B1000000" }, + { 1152000, __B1152000, "__B1152000" }, + { 1500000, __B1500000, "__B1500000" }, + { 2000000, __B2000000, "__B2000000" }, +#ifdef __B2500000 + { 2500000, __B2500000, "__B2500000" }, +#endif +#ifdef __B3000000 + { 3000000, __B3000000, "__B3000000" }, +#endif +#ifdef __B3500000 + { 3500000, __B3500000, "__B3500000" }, +#endif +#ifdef __B4000000 + { 4000000, __B4000000, "__B4000000" }, +#endif + { ANY, __BOTHER, "__BOTHER" }, + { BOGUS, BOGUS, "invalid" } +}; + +/* List of common speeds to test */ + +static const speed_t test_speeds [] = +{ + 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, + 4800, 7200, 9600, 14400, 19200, 28800, 33600, 38400, 57600, + 76800, 115200, 153600, 230400, 307200, 460800, 500000, + 576000, 614400, 921600, 1000000, 1152000, 1500000, 2000000, + 2500000, 3000000, 3500000, 4000000, 5000000, 10000000 +}; + +/* Speed function tests */ + +/* These intentionally are a separate implementation from speed.c; + these should be "trivially correct" and don't need to be optimized + in any way */ + +/* Returns __BOTHER if there is no legacy value for this speed */ +static speed_t speed_to_cbaud (speed_t speed) +{ + const struct cbaud_table *ct; + for (ct = cbaud_table; ct->speed != ANY; ct++) + { + if (ct->speed == speed) + break; + } + return ct->cbaud; +} + +/* Returns ANY if cbaud is __BOTHER, or BOGUS if invalid */ +static speed_t cbaud_to_speed (speed_t cbaud) +{ + const struct cbaud_table *ct; + for (ct = cbaud_table; ct->cbaud != BOGUS; ct++) + { + if (ct->cbaud == cbaud) + break; + } + return ct->speed; +} + +static const char *cbaud_name (speed_t cbaud) +{ + const struct cbaud_table *ct; + for (ct = cbaud_table; ct->cbaud != BOGUS; ct++) + { + if (ct->cbaud == cbaud) + break; + } + return ct->name; +} + +static int check_speed (speed_t expected, speed_t speed, speed_t cbaud, + speed_t cfspeed, baud_t cfbaud, char io) +{ + speed_t want_cbaud; + cbaud &= CBAUD; + + if (expected != ANY && speed != expected) + FAIL_RET ("c_%cspeed = %u, expected %u", io, speed, expected); + + if (cfspeed != speed) + FAIL_RET ("cfget%cspeed = %u, expected %u", io, cfspeed, speed); + + if (cfbaud != cfspeed) + FAIL_RET ("cfget%cbaud = %u, but cfget%cspeed = %u", + io, cfbaud, io, cfspeed); + + want_cbaud = speed_to_cbaud (speed); + + if (cbaud != want_cbaud) + FAIL_RET ("c_%cspeed = %u: %s = %s (%06o), should be %s (%06o)", + io, speed, + io == 'o' ? "CBAUD" : "CIBAUD", cbaud_name (cbaud), cbaud, + cbaud_name (want_cbaud), want_cbaud); + + return 0; +} + +/* Validate that the speeds in the struct termios are properly normalized. + The difference is the handling of ispeed == 0. */ + +/* Use this after cfset* () */ +static void check_speeds_cf (const struct termios *tio_p, + speed_t ospeed, speed_t ispeed) +{ + check_speed (ospeed, tio_p->c_ospeed, tio_p->c_cflag, + CHECKERR (cfgetospeed (tio_p)), + CHECKERR (cfgetobaud (tio_p)), 'o'); + check_speed (ispeed, tio_p->c_ispeed, tio_p->c_cflag >> IBSHIFT, + CHECKERR (cfgetispeed (tio_p)), + CHECKERR (cfgetibaud (tio_p)), 'i'); +} + +/* Use this after tc[gs]etattr () */ +static void check_speeds_tc (int fd, speed_t ospeed, speed_t ispeed) +{ + struct termios tio; + + CHECKZERO (tcgetattr (fd, &tio)); + check_speeds_cf (&tio, ospeed, ispeed ? ispeed : ospeed); +} + +/* For search and replace convenience */ +#define check_bauds_cf check_speeds_cf +#define check_bauds_tc check_speeds_tc + +/* Common routine for setting speeds, with checking */ +static void +set_speeds (int fd, speed_t ospeed, speed_t ispeed) +{ + struct termios tio; + + CHECKZERO (tcgetattr (fd, &tio)); + CHECKZERO (cfsetospeed (&tio, ospeed)); + CHECKZERO (cfsetispeed (&tio, ispeed)); + check_speeds_cf (&tio, ospeed, ispeed); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, ospeed, ispeed ? ispeed : ospeed); +} + +/* Actual tests */ + +typedef void (*speed_test_t)(int ttyfd, speed_t speed); +static void +run_speed_test (int fd, speed_test_t test); + +/* New interface cfset*speed test */ +static void +new_cfspeed_test (int fd, speed_t speed) +{ + struct termios tio; + speed_t old_ospeed, old_ispeed; + + CHECKZERO (tcgetattr (fd, &tio)); + old_ospeed = CHECKERR (cfgetospeed (&tio)); + old_ispeed = CHECKERR (cfgetispeed (&tio)); + + /* Check initial normalization */ + check_speeds_cf (&tio, old_ospeed, old_ispeed); + + /* Check cfset*speed normalization */ + CHECKZERO (cfsetospeed (&tio, speed)); + check_speeds_cf (&tio, speed, old_ispeed); + CHECKZERO (cfsetispeed (&tio, speed)); + check_speeds_cf (&tio, speed, speed); + CHECKZERO (cfsetospeed (&tio, old_ospeed)); + check_speeds_cf (&tio, old_ospeed, speed); + CHECKZERO (cfsetispeed (&tio, B0)); + check_speeds_cf (&tio, old_ospeed, B0); + CHECKZERO (cfsetspeed (&tio, speed)); + check_speeds_cf (&tio, speed, speed); + CHECKZERO (cfsetospeed (&tio, old_ospeed)); + CHECKZERO (cfsetispeed (&tio, old_ispeed)); + check_speeds_cf (&tio, old_ospeed, old_ispeed); +} + +/* New interface cfset*speed test with tcsetattr */ +static void +new_tcspeed_test (int fd, speed_t speed) +{ + struct termios tio; + speed_t old_ospeed, old_ispeed; + + CHECKZERO (tcgetattr (fd, &tio)); + old_ospeed = CHECKERR (cfgetospeed (&tio)); + old_ispeed = CHECKERR (cfgetispeed (&tio)); + + /* Check initial normalization */ + check_speeds_cf (&tio, old_ospeed, old_ispeed); + + /* Check cfset*speed normalization */ + CHECKZERO (cfsetospeed (&tio, speed)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, speed, old_ispeed); + CHECKZERO (cfsetispeed (&tio, speed)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, speed, speed); + CHECKZERO (cfsetospeed (&tio, old_ospeed)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, old_ospeed, speed); + CHECKZERO (cfsetispeed (&tio, B0)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, old_ospeed, B0); + CHECKZERO (cfsetspeed (&tio, speed)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, speed, speed); + CHECKZERO (cfsetospeed (&tio, old_ospeed)); + CHECKZERO (cfsetispeed (&tio, old_ispeed)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_speeds_tc (fd, old_ospeed, old_ispeed); +} + +/* New interface cfset*baud test */ +static void +new_cfbaud_test (int fd, baud_t baud) +{ + struct termios tio; + baud_t old_obaud, old_ibaud; + + CHECKZERO (tcgetattr (fd, &tio)); + old_obaud = CHECKERR (cfgetobaud (&tio)); + old_ibaud = CHECKERR (cfgetibaud (&tio)); + + /* Check initial normalization */ + check_bauds_cf (&tio, old_obaud, old_ibaud); + + /* Check cfset*baud normalization */ + CHECKZERO (cfsetobaud (&tio, baud)); + check_bauds_cf (&tio, baud, old_ibaud); + CHECKZERO (cfsetibaud (&tio, baud)); + check_bauds_cf (&tio, baud, baud); + CHECKZERO (cfsetobaud (&tio, old_obaud)); + check_bauds_cf (&tio, old_obaud, baud); + CHECKZERO (cfsetibaud (&tio, B0)); + check_bauds_cf (&tio, old_obaud, B0); + CHECKZERO (cfsetbaud (&tio, baud)); + check_bauds_cf (&tio, baud, baud); + CHECKZERO (cfsetobaud (&tio, old_obaud)); + CHECKZERO (cfsetibaud (&tio, old_ibaud)); + check_bauds_cf (&tio, old_obaud, old_ibaud); +} + +/* New interface cfset*baud test with tcsetattr */ +static void +new_tcbaud_test (int fd, baud_t baud) +{ + struct termios tio; + baud_t old_obaud, old_ibaud; + + CHECKZERO (tcgetattr (fd, &tio)); + old_obaud = CHECKERR (cfgetobaud (&tio)); + old_ibaud = CHECKERR (cfgetibaud (&tio)); + + /* Check initial normalization */ + check_bauds_cf (&tio, old_obaud, old_ibaud); + + /* Check cfset*baud normalization */ + CHECKZERO (cfsetobaud (&tio, baud)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, baud, old_ibaud); + CHECKZERO (cfsetibaud (&tio, baud)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, baud, baud); + CHECKZERO (cfsetobaud (&tio, old_obaud)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, old_obaud, baud); + CHECKZERO (cfsetibaud (&tio, B0)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, old_obaud, B0); + CHECKZERO (cfsetbaud (&tio, baud)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, baud, baud); + CHECKZERO (cfsetobaud (&tio, old_obaud)); + CHECKZERO (cfsetibaud (&tio, old_ibaud)); + CHECKZERO (tcsetattr (fd, TCSANOW, &tio)); + check_bauds_tc (fd, old_obaud, old_ibaud); +} + +/* + * Old interface tests. This depends critically on the new struct + * termios being guaranteed to be a superset of the legacy struct + * termios. + */ +#if TEST_COMPAT (libc, GLIBC_2_0, GLIBC_2_42) +extern int __old_cfsetospeed (struct termios *tio_p, speed_t speed); +compat_symbol_reference (libc, __old_cfsetospeed, cfsetospeed, GLIBC_2_0); +extern int __old_cfsetispeed (struct termios *tio_p, speed_t speed); +compat_symbol_reference (libc, __old_cfsetispeed, cfsetispeed, GLIBC_2_0); +extern speed_t __old_cfgetospeed (const struct termios *tio_p); +compat_symbol_reference (libc, __old_cfgetospeed, cfgetospeed, GLIBC_2_0); +extern speed_t __old_cfgetispeed (const struct termios *tio_p); +compat_symbol_reference (libc, __old_cfgetispeed, cfgetispeed, GLIBC_2_0); +extern int __old_tcsetattr (int fd, int act, const struct termios *tio_p); +compat_symbol_reference (libc, __old_tcsetattr, tcsetattr, GLIBC_2_0); +extern int __old_tcgetattr (int fd, struct termios *tio_p); +compat_symbol_reference (libc, __old_tcgetattr, tcgetattr, GLIBC_2_0); + +static int old_tcsetattr (int fd, const struct termios *tio_p) +{ + struct termios old_tio = *tio_p; + + /* Deliberately corrupt c_ispeed and c_ospeed */ + old_tio.c_ispeed = 0xdeadbeef; + old_tio.c_ospeed = 0xfeedface; + return __old_tcsetattr (fd, TCSANOW, &old_tio); +} +static int old_tcgetattr (int fd, struct termios *tio_p) +{ + int rv; + memset (tio_p, 0xde, sizeof *tio_p); + rv = __old_tcgetattr (fd, tio_p); + if (rv) + return rv; + + /* Deliberately corrupt c_ispeed and c_ospeed */ + tio_p->c_ispeed = 0xdeadbeef; + tio_p->c_ospeed = 0xfeedface; + return 0; +} + +/* Old interface test. This relies on the new struct termios always + being a binary superset of the old one. + This doesn't bother testing split speed, since that never worked + on the old glibc. */ +static void +old_tcspeed_test (int fd, speed_t speed) +{ + struct termios tio; + speed_t cbaud; + + if (!speed) + return; /* Skip B0 for this test */ + + cbaud = speed_to_cbaud (speed); + if (cbaud == __BOTHER) + return; + + CHECKZERO (old_tcgetattr (fd, &tio)); + CHECKZERO (__old_cfsetospeed (&tio, cbaud)); + VERIFY (__old_cfgetospeed (&tio), cbaud); + CHECKZERO (__old_cfsetispeed (&tio, cbaud)); + VERIFY (__old_cfgetispeed (&tio), cbaud); + CHECKZERO (old_tcsetattr (fd, &tio)); + check_speeds_tc (fd, speed, speed); +} + +/* Verify that invalid CBAUD values return error for the old interfaces */ +static void +old_invalid_speeds_test (int fd) +{ + struct termios tio; + speed_t cbaud; + + for (cbaud = 0 ; cbaud ; cbaud > 0xffff ? (cbaud <<= 1) : cbaud++) { + speed_t realspeed; + realspeed = (cbaud & ~CBAUD) ? BOGUS : cbaud_to_speed (cbaud); + if (realspeed >= ANY) + { + int rv; + + errno = 0; + rv = __old_cfsetospeed (&tio, cbaud); + if (rv != -1 || errno != EINVAL) + FAIL("__old_cfsetospeed() accepted invalid value %06o", cbaud); + + errno = 0; + rv = __old_cfsetispeed (&tio, cbaud); + if (rv != -1 || errno != EINVAL) + FAIL("__old_cfsetispeed() accepted invalid value %06o", cbaud); + } + else + { + CHECKZERO (__old_cfsetospeed (&tio, cbaud)); + VERIFY (__old_cfgetospeed (&tio), cbaud); + CHECKZERO (__old_cfsetispeed (&tio, cbaud)); + VERIFY (__old_cfgetispeed (&tio), cbaud); + if (cbaud) + { + CHECKZERO (old_tcsetattr (fd, &tio)); + check_speeds_tc (fd, realspeed, realspeed); + } + } + } +} + +static void +compat_tests (int fd) +{ + run_speed_test (fd, old_tcspeed_test); + old_invalid_speeds_test (fd); +} +#else /* No TEST_COMPAT */ +#define compat_tests(fd) ((void)(fd)) +#endif + +static void +run_speed_test (int fd, speed_test_t test) +{ + unsigned short seed [3] = { 0x1234, 0x5678, 0x9abc }; + struct speeds { + speed_t ospeed, ispeed; + }; + static const struct speeds initial_speeds [] = { + { 2400, 2400 }, /* Standard speed, non-split */ + { 123456, 123456 }, /* Nonstandard speed, non-split */ + { 75, 1200 }, /* Standard split speeds */ + { 9600, 456789 }, /* One standard, one nonstandard */ + { 54321, 1234567890 } /* Nonstandard, one very high */ + }; + + array_foreach_const (is, initial_speeds) + { + /* Set up initial conditions */ + set_speeds (fd, is->ospeed, is->ispeed); + + /* Test all common speeds */ + array_foreach_const (ts, test_speeds) + test (fd, *ts); + + /* Test pseudorandom speeds; array_length(test_speeds) + here is an arbitrary value */ + const size_t random_test_count = array_length(test_speeds); + for (size_t i = 0 ; i < random_test_count ; i++) + test (fd, (speed_t) jrand48 (seed)); + + /* Test power-of-2 speeds */ + for (speed_t s = 1 ; s ; s <<= 1) + test (fd, s); + + /* Test power of 2 multiples of 75; 75 << 25 is the maximum below 2^32 */ + for (int i = 0 ; i < 26 ; i++) + test (fd, (speed_t)75 << i); + } +} + +static void +run_speed_tests (int fd) +{ + /* Test proper canonicalization using the new interface */ + run_speed_test (fd, new_cfspeed_test); + run_speed_test (fd, new_tcspeed_test); + + /* Try the new cfset*baud() functions */ + run_speed_test (fd, new_cfbaud_test); + run_speed_test (fd, new_tcbaud_test); + + /* Tests of the legacy functions */ + compat_tests (fd); +} + +/* test dispatch */ + +static void +run_in_chroot (void) +{ + /* Create a pty slave to use as a tty. Most of the termios settings, + including the speeds, have no impact on a pty, but they are still + settable like for any other tty, which makes them very convenient + for testing. */ + int ptmfd, ttyfd; + + support_openpty (&ptmfd, &ttyfd, NULL, NULL, NULL); + run_speed_tests (ttyfd); + close (ttyfd); + close (ptmfd); +} + +static int +do_test (void) +{ + support_become_root (); + run_in_chroot (); + + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h index 17b84c7..06fbae5 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/arch-syscall.h @@ -207,6 +207,7 @@ #define __NR_open 2 #define __NR_open_by_handle_at 304 #define __NR_open_tree 428 +#define __NR_open_tree_attr 467 #define __NR_openat 257 #define __NR_openat2 437 #define __NR_pause 34 diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist index 7ab9073..5648772 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist @@ -2748,6 +2748,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist index 11c5ebc..6719814 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/64/libm.abilist @@ -1302,6 +1302,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile index fb834a7..6938382 100644 --- a/sysdeps/unix/sysv/linux/x86_64/Makefile +++ b/sysdeps/unix/sysv/linux/x86_64/Makefile @@ -87,10 +87,10 @@ $(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport) $(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport) $(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport) -CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile -CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2 -CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2 -CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile -DTEST_AMX +CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2 +CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -DTEST_AMX -mtls-dialect=gnu2 endif endif # $(subdir) == elf diff --git a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c index 006c532..812e023 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c +++ b/sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c @@ -22,7 +22,7 @@ extern void restore_rt (void) asm ("__restore_rt") attribute_hidden; #define SET_SA_RESTORER(kact, act) \ - (kact)->sa_flags = (act)->sa_flags | SA_RESTORER; \ + (kact)->sa_flags |= SA_RESTORER; \ (kact)->sa_restorer = &restore_rt #define RESET_SA_RESTORER(act, kact) \ diff --git a/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h b/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h new file mode 100644 index 0000000..585ca01 --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86_64/uw-sigframe.h @@ -0,0 +1,76 @@ +/* Signal frame backtracing support for SFrame on AMD, x86-64 and x86. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License + as published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty + of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* This code is inspired from libgcc's MD_FALLBACK_FRAME_STATE_FOR + implementation. See libgcc/config/i386/linux-unwind.h */ + +#include <signal.h> +#include <sys/ucontext.h> + +#ifdef __x86_64__ + +/* SFrame is only supported by x86_64 targets. */ + +#define MD_DECODE_SIGNAL_FRAME x86_64_decode_signal_frame + +#ifdef __LP64__ +#define RT_SIGRETURN_SYSCALL 0x050f0000000fc0c7ULL +#else +#define RT_SIGRETURN_SYSCALL 0x050f40000201c0c7ULL +#endif + +static _Unwind_Reason_Code +x86_64_decode_signal_frame (frame *frame) +{ + unsigned char *pc = (unsigned char *) frame->pc; + mcontext_t *st; + + unsigned char pc0 = *(unsigned char *)(pc + 0); + unsigned long long pc1; + memcpy (&pc1, pc + 1, sizeof (unsigned long long)); + + /* movq $__NR_rt_sigreturn, %rax ; syscall. */ + if ( pc0 == 0x48 + && pc1 == RT_SIGRETURN_SYSCALL) + { + ucontext_t *uc_ = (ucontext_t *)frame->sp; + st = &uc_->uc_mcontext; + } + else + return _URC_END_OF_STACK; + + frame->pc = (_Unwind_Ptr) st->gregs[REG_RIP]; + frame->sp = (_Unwind_Ptr) st->gregs[REG_RSP]; + frame->fp = (_Unwind_Ptr) st->gregs[REG_RBP]; + return _URC_NO_REASON; +} + +#define MD_DETECT_OUTERMOST_FRAME x86_64_detect_outermost_frame + +static _Unwind_Reason_Code +x86_64_detect_outermost_frame (frame *frame) +{ + /* Outermost frame has the frame pointer cleared. */ + if (frame->fp == 0) + return _URC_END_OF_STACK; + + return _URC_NO_REASON; +} + +#endif /* ifdef __x86_64__ */ diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h index 1dcd6ab..135ef3d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/arch-syscall.h @@ -200,6 +200,7 @@ #define __NR_open 1073741826 #define __NR_open_by_handle_at 1073742128 #define __NR_open_tree 1073742252 +#define __NR_open_tree_attr 1073742291 #define __NR_openat 1073742081 #define __NR_openat2 1073742261 #define __NR_pause 1073741858 diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist index e11876f..25a39d0 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist @@ -2767,6 +2767,16 @@ GLIBC_2.41 sched_getattr F GLIBC_2.41 sched_setattr F GLIBC_2.42 __inet_ntop_chk F GLIBC_2.42 __inet_pton_chk F +GLIBC_2.42 cfgetibaud F +GLIBC_2.42 cfgetispeed F +GLIBC_2.42 cfgetobaud F +GLIBC_2.42 cfgetospeed F +GLIBC_2.42 cfsetbaud F +GLIBC_2.42 cfsetibaud F +GLIBC_2.42 cfsetispeed F +GLIBC_2.42 cfsetobaud F +GLIBC_2.42 cfsetospeed F +GLIBC_2.42 cfsetspeed F GLIBC_2.42 pthread_gettid_np F GLIBC_2.42 uabs F GLIBC_2.42 uimaxabs F diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist index 2b1b75e..1a1069a 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/x32/libm.abilist @@ -1302,6 +1302,14 @@ GLIBC_2.42 powrf32x F GLIBC_2.42 powrf64 F GLIBC_2.42 powrf64x F GLIBC_2.42 powrl F +GLIBC_2.42 rootn F +GLIBC_2.42 rootnf F +GLIBC_2.42 rootnf128 F +GLIBC_2.42 rootnf32 F +GLIBC_2.42 rootnf32x F +GLIBC_2.42 rootnf64 F +GLIBC_2.42 rootnf64x F +GLIBC_2.42 rootnl F GLIBC_2.42 rsqrt F GLIBC_2.42 rsqrtf F GLIBC_2.42 rsqrtf128 F diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 01b0192..4fbd48e 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -4,7 +4,13 @@ endif ifeq ($(subdir),elf) sysdep_routines += get-cpuid-feature-leaf -sysdep-dl-routines += dl-get-cpu-features +sysdep-dl-routines += \ + dl-get-cpu-features \ + dl-tlsdesc \ + tls_get_addr \ + tlsdesc \ +# sysdep-dl-routines + sysdep_headers += \ bits/platform/features.h \ bits/platform/x86.h \ @@ -90,14 +96,22 @@ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV) tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd) -CFLAGS-tst-gnu2-tls2.c += -msse +CFLAGS-tst-gnu2-tls2.c += -msse2 CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell -LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2 += -rdynamic +LDFLAGS-tst-gnu2-tls2mod0.so += -Wl,-z,undefs +LDFLAGS-tst-gnu2-tls2mod1.so += -Wl,-z,undefs +LDFLAGS-tst-gnu2-tls2mod2.so += -Wl,-z,undefs + +CFLAGS-tst-gnu2-tls2-x86-noxsave.c += -msse2 +CFLAGS-tst-gnu2-tls2-x86-noxsavec.c += -msse2 +CFLAGS-tst-gnu2-tls2-x86-noxsavexsavec.c += -msse2 +LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -rdynamic +LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -rdynamic +LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy -rdynamic # Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled # via tunable. @@ -113,6 +127,18 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ $(objpfx)tst-gnu2-tls2mod0.so \ $(objpfx)tst-gnu2-tls2mod1.so \ $(objpfx)tst-gnu2-tls2mod2.so + +CFLAGS-tst-tls23.c += -msse2 +CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell + +LDFLAGS-tst-tls23 += -rdynamic +tst-tls23-mod.so-no-z-defs = yes + +$(objpfx)tst-tls23-mod.so: $(libsupport) +endif + +ifeq ($(subdir),gmon) +CFLAGS-mcount.c += -mgeneral-regs-only endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure index c7ea9ac..dff26e9 100644 --- a/sysdeps/x86/configure +++ b/sysdeps/x86/configure @@ -171,8 +171,12 @@ fi config_vars="$config_vars have-x86-isa-level = $libc_cv_have_x86_isa_level" config_vars="$config_vars +x86-isa-level-2-or-above = 2 3 4" +config_vars="$config_vars x86-isa-level-3-or-above = 3 4" config_vars="$config_vars +x86-isa-level-4-or-above = 4" +config_vars="$config_vars enable-x86-isa-level = $libc_cv_include_x86_isa_level" diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac index 031f917..54960a7 100644 --- a/sysdeps/x86/configure.ac +++ b/sysdeps/x86/configure.ac @@ -117,7 +117,9 @@ else AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level) fi LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) +LIBC_CONFIG_VAR([x86-isa-level-2-or-above], [2 3 4]) LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) +LIBC_CONFIG_VAR([x86-isa-level-4-or-above], [4]) LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) dnl Check if TEST_CC supports -mfpmath=387 diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index e50f1d6..b7d1506 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1256,7 +1256,7 @@ no_cpuid: #endif if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL - || (GLRO(dl_x86_cpu_features).xsave_state_size != 0)) + || cpu_features->xsave_state_size != 0) { if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) { diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index c3c73e7..b8e963b 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -183,6 +183,29 @@ #define atom_text_section .section ".text.atom", "ax" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte/4-byte stack alignment. + Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't + assume that stack will be always aligned at 16 bytes. */ +# ifdef __x86_64__ +# define DL_STACK_ALIGNMENT 8 +# define MINIMUM_ALIGNMENT 16 +# else +# define DL_STACK_ALIGNMENT 4 +# endif +#endif + +/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for + STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling + _dl_fixup/__tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + #endif /* __ASSEMBLER__ */ #endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c index de900a4..b3195ff 100644 --- a/sysdeps/x86/tst-gnu2-tls2.c +++ b/sysdeps/x86/tst-gnu2-tls2.c @@ -1,20 +1,26 @@ -#ifndef __x86_64__ -#include <sys/platform/x86.h> +#ifndef TEST_AMX +# ifndef __x86_64__ +# include <sys/platform/x86.h> -#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) -#endif +# define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +# endif -/* Clear XMM0...XMM7 */ -#define PREPARE_MALLOC() \ -{ \ - asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \ - asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \ - asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \ - asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \ - asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \ - asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \ - asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \ - asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \ +/* Set XMM0...XMM7 to all 1s. */ +# define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ } +#endif #include <elf/tst-gnu2-tls2.c> + +#ifndef TEST_AMX +v2di v1, v2, v3; +#endif diff --git a/sysdeps/x86/tst-gnu2-tls2.h b/sysdeps/x86/tst-gnu2-tls2.h new file mode 100644 index 0000000..fdbb565 --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2.h @@ -0,0 +1,37 @@ +/* Test TLSDESC relocation, x86 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef TEST_AMX +# include <support/check.h> + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +# define BEFORE_TLSDESC_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +# define AFTER_TLSDESC_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); +#endif + +#include <elf/tst-gnu2-tls2.h> diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c new file mode 100644 index 0000000..6130d91 --- /dev/null +++ b/sysdeps/x86/tst-tls23.c @@ -0,0 +1,22 @@ +#ifndef __x86_64__ +#include <sys/platform/x86.h> + +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +#endif + +/* Set XMM0...XMM7 to all 1s. */ +#define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ +} + +#include <elf/tst-tls23.c> + +v2di v1, v2, v3; diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h new file mode 100644 index 0000000..21cee4c --- /dev/null +++ b/sysdeps/x86/tst-tls23.h @@ -0,0 +1,35 @@ +/* Test that __tls_get_addr preserves XMM registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/check.h> + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +#define BEFORE_TLS_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +#define AFTER_TLS_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); + +#include <elf/tst-tls23.h> diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 5723ec1..8cace35 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -11,6 +11,7 @@ endif ifeq ($(subdir),csu) gen-as-const-headers += link-defines.sym +gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif ifeq ($(subdir),gmon) @@ -19,6 +20,10 @@ sysdep_routines += _mcount # recursive calls when ENTRY is used. Just copy the normal static # object. sysdep_noprof += _mcount + +ifeq (yes,$(have-x86-apx)) +CFLAGS-mcount.c += -mno-apxf +endif endif ifeq ($(subdir),string) @@ -41,9 +46,6 @@ ifeq ($(subdir),elf) CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ -mno-mmx) -sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr - -tests += ifuncmain8 modules-names += ifuncmod8 $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so @@ -214,11 +216,25 @@ $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so endif test-internal-extras += tst-gnu2-tls2mod1 -endif # $(subdir) == elf -ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym rtld-offsets.sym -endif +tests-special += $(objpfx)check-rtld.out + +$(objpfx)rtld.reloc: $(objpfx)rtld.os + @rm -f $@T + LC_ALL=C $(READELF) -rW $< > $@T + test -s $@T + mv -f $@T $@ +common-generated += $(objpfx)rtld.reloc + +# Verify that there are no run-time relocations against __ehdr_start nor +# _end. +$(objpfx)check-rtld.out: $(objpfx)rtld.reloc + LC_ALL=C; \ + if grep -E "R_X86_64_64.*(__ehdr_start|_end)" $^ > $@; \ + then false; else true; fi; \ + $(evaluate-test) +generated += check-rtld.out +endif # $(subdir) == elf ifeq ($(subdir),wcsmbs) @@ -250,6 +266,15 @@ endif ifneq ($(enable-cet),no) +# Add -fcf-protection to CFLAGS when CET is enabled. +CFLAGS-.o += -fcf-protection +CFLAGS-.os += -fcf-protection +CFLAGS-.op += -fcf-protection +CFLAGS-.oS += -fcf-protection + +# Compile assembly codes with <cet.h> when CET is enabled. +asm-CPPFLAGS += -fcf-protection -include cet.h + ifeq ($(subdir),elf) sysdep-dl-routines += dl-cet @@ -442,18 +467,7 @@ $(objpfx)tst-shstk-legacy-1g.out: \ $(..)/sysdeps/x86_64/tst-shstk-legacy-1g.sh $(objpfx)tst-shstk-legacy-1g $(SHELL) $< $(common-objpfx) '$(test-program-prefix)' 2> $@; \ $(evaluate-test) -endif -# Add -fcf-protection to CFLAGS when CET is enabled. -CFLAGS-.o += -fcf-protection -CFLAGS-.os += -fcf-protection -CFLAGS-.op += -fcf-protection -CFLAGS-.oS += -fcf-protection - -# Compile assembly codes with <cet.h> when CET is enabled. -asm-CPPFLAGS += -fcf-protection -include cet.h - -ifeq ($(subdir),elf) ifeq (yes,$(build-shared)) tests-special += $(objpfx)check-cet.out endif diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index bbf520b..32324f6 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -289,6 +289,8 @@ fi config_vars="$config_vars have-x86-apx = $libc_cv_x86_have_apx" +libc_cv_support_sframe=yes + test -n "$critic_missing" && as_fn_error $? " *** $critic_missing" "$LINENO" 5 diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index 4a3f7f4..a00958e 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -104,5 +104,7 @@ if test $libc_cv_x86_have_apx = yes; then fi LIBC_CONFIG_VAR([have-x86-apx], [$libc_cv_x86_have_apx]) +libc_cv_support_sframe=yes + test -n "$critic_missing" && AC_MSG_ERROR([ *** $critic_missing]) diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S index d1bb125..9a55fc5 100644 --- a/sysdeps/x86_64/dl-tlsdesc.S +++ b/sysdeps/x86_64/dl-tlsdesc.S @@ -22,7 +22,6 @@ #include <features-offsets.h> #include <isa-level.h> #include "tlsdesc.h" -#include "dl-trampoline-save.h" /* Area on stack to save and restore registers used for parameter passing when calling _dl_tlsdesc_dynamic. */ diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index a055722..ac85f96 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -22,7 +22,6 @@ #include <features-offsets.h> #include <link-defines.h> #include <isa-level.h> -#include "dl-trampoline-save.h" /* Area on stack to save and restore registers used for parameter passing when calling _dl_fixup. */ diff --git a/sysdeps/x86_64/fpu/math-use-builtins-trunc.h b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h new file mode 100644 index 0000000..c2387eb --- /dev/null +++ b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h @@ -0,0 +1,9 @@ +#ifdef __SSE4_1__ +# define USE_TRUNC_BUILTIN 1 +# define USE_TRUNCF_BUILTIN 1 +#else +# define USE_TRUNC_BUILTIN 0 +# define USE_TRUNCF_BUILTIN 0 +#endif +#define USE_TRUNCL_BUILTIN 0 +#define USE_TRUNCF128_BUILTIN 0 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 3403422..708b142 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -26,6 +26,14 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2 CFLAGS-s_cosf-fma.c = -mfma -mavx2 CFLAGS-s_sincosf-fma.c = -mfma -mavx2 +# Check if ISA level is 2 or above. +ifeq (,$(filter $(have-x86-isa-level),$(x86-isa-level-2-or-above))) +sysdep_calls += \ + s_modf-sse4_1 \ + s_modff-sse4_1 \ +# sysdep_calls +endif + # Check if ISA level is 3 or above. ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above))) libm-sysdep_routines += \ @@ -43,6 +51,10 @@ libm-sysdep_routines += \ s_truncf-avx \ # libm-sysdep_routines else +sysdep_calls += \ + s_modf-avx \ + s_modff-avx \ +# sysdep_calls ifeq (no,$(have-x86-apx)) libm-sysdep_routines += \ e_asin-fma4 \ @@ -121,6 +133,11 @@ libm-sysdep_routines += \ s_truncf-c \ # libm-sysdep_routines endif + +# $(sysdep_calls) functions are built both for libc and libm. While the +# libc objects have the prefix s_, the libm ones are prefixed with m_. +sysdep_routines += $(sysdep_calls) +libm-sysdep_routines += $(sysdep_calls:s_%=m_%) endif CFLAGS-e_asin-fma4.c = -mfma4 @@ -140,6 +157,12 @@ CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX + +CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x -fno-builtin-modff64 +CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32 + +CFLAGS-s_modf-avx.c = -mavx -fno-builtin-modff32x -fno-builtin-modff64 +CFLAGS-s_modff-avx.c = -mavx -fno-builtin-modff32 endif ifeq ($(subdir),mathvec) diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h new file mode 100644 index 0000000..071595f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1-avx.h @@ -0,0 +1,41 @@ +/* Common definition for ifunc selections optimized with SSE4.1 and AVX. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURE_USABLE_P (cpu_features, AVX)) + return OPTIMIZE (avx); + +#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL + return OPTIMIZE (sse41); +#else + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse41); + + return OPTIMIZE (sse2); +#endif +} diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c new file mode 100644 index 0000000..ab4f03d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c @@ -0,0 +1,3 @@ +#define __modf __modf_avx + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c new file mode 100644 index 0000000..00aa8cd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c @@ -0,0 +1,3 @@ +#define __modf __modf_sse41 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/unix/sysv/linux/kernel_termios.h b/sysdeps/x86_64/fpu/multiarch/s_modf.c index f02a197..e365bfc 100644 --- a/sysdeps/unix/sysv/linux/kernel_termios.h +++ b/sysdeps/x86_64/fpu/multiarch/s_modf.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1997-2025 Free Software Foundation, Inc. +/* Multiple versions of modf + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -15,23 +16,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef _KERNEL_TERMIOS_H -#define _KERNEL_TERMIOS_H 1 -/* The following corresponds to the values from the Linux 2.1.20 kernel. */ - -#define __KERNEL_NCCS 19 - -struct __kernel_termios - { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[__KERNEL_NCCS]; /* control characters */ - }; - -#define _HAVE_C_ISPEED 0 -#define _HAVE_C_OSPEED 0 - -#endif /* kernel_termios.h */ +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include <libm-alias-double.h> + +# define modf __redirect_modf +# define __modf __redirect___modf +# include <math.h> +# undef modf +# undef __modf + +# define SYMBOL_NAME modf +# include "ifunc-sse4_1-avx.h" + +libc_ifunc_redirected (__redirect_modf, __modf, IFUNC_SELECTOR ()); +libm_alias_double (__modf, modf) +# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# define __modf __modf_sse41 +# else +# define __modf __modf_sse2 +# endif +#endif +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c new file mode 100644 index 0000000..07cb9c1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c @@ -0,0 +1,3 @@ +#define __modff __modff_avx + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c new file mode 100644 index 0000000..060c5e3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c @@ -0,0 +1,3 @@ +#define __modff __modff_sse41 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/fpu/multiarch/s_modff.c index 761128d..a4b5429 100644 --- a/sysdeps/x86_64/dl-trampoline-save.h +++ b/sysdeps/x86_64/fpu/multiarch/s_modff.c @@ -1,5 +1,5 @@ -/* x86-64 PLT trampoline register save macros. - Copyright (C) 2024-2025 Free Software Foundation, Inc. +/* Multiple versions of modff + Copyright (C) 2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,19 +16,26 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: +#include <sysdeps/x86/isa-level.h> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include <libm-alias-float.h> - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +# define modff __redirect_modff +# define __modff __redirect___modff +# include <math.h> +# undef modff +# undef __modff - __tls_get_addr may be called with 8-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. */ -# define DL_STACK_ALIGNMENT 8 -#endif +# define SYMBOL_NAME modff +# include "ifunc-sse4_1-avx.h" -/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align - stack to 16 bytes before calling _dl_fixup. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || 16 > DL_STACK_ALIGNMENT) +libc_ifunc_redirected (__redirect_modff, __modff, IFUNC_SELECTOR ()); +libm_alias_float (__modf, modf) +# if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL +# define __modff __modff_sse41 +# else +# define __modff __modff_sse2 +# endif +#endif +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index a834977..c2dcadd 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -922,7 +922,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcsncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, 1, __wcsncpy_generic)) @@ -952,7 +952,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (CPU_FEATURE_USABLE (AVX2) && CPU_FEATURE_USABLE (BMI2)), __wcpncpy_avx2) - X86_IFUNC_IMPL_ADD_V2 (array, i, wcsncpy, + X86_IFUNC_IMPL_ADD_V2 (array, i, wcpncpy, 1, __wcpncpy_generic)) |