diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2025-01-03 20:12:10 +0000 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2025-01-03 21:39:56 +0000 |
commit | 40c3a0629321eb2442a220cd9254a0086f70c422 (patch) | |
tree | 317e6ce1a9b1032a79ec884c5b738a0fe8b77af2 /sysdeps/aarch64 | |
parent | 6050b45716ed76b9c5407bea160db53195dcc445 (diff) | |
download | glibc-40c3a0629321eb2442a220cd9254a0086f70c422.zip glibc-40c3a0629321eb2442a220cd9254a0086f70c422.tar.gz glibc-40c3a0629321eb2442a220cd9254a0086f70c422.tar.bz2 |
AArch64: Add vector cospi routines
Vector variant of the new C23 cospi. New tests pass on AArch64.
Diffstat (limited to 'sysdeps/aarch64')
-rw-r--r-- | sysdeps/aarch64/fpu/Makefile | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/Versions | 5 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/advsimd_f32_protos.h | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/bits/math-vector.h | 8 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/cospi_advsimd.c | 87 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/cospi_sve.c | 65 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/cospif_advsimd.c | 87 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/cospif_sve.c | 61 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/test-double-sve-wrappers.c | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/fpu/test-float-sve-wrappers.c | 1 | ||||
-rw-r--r-- | sysdeps/aarch64/libm-test-ulps | 8 |
13 files changed, 327 insertions, 0 deletions
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile index 915da37..6d1e55c 100644 --- a/sysdeps/aarch64/fpu/Makefile +++ b/sysdeps/aarch64/fpu/Makefile @@ -8,6 +8,7 @@ libmvec-supported-funcs = acos \ cbrt \ cos \ cosh \ + cospi \ erf \ erfc \ exp \ diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions index 4cbb906..f8581cf 100644 --- a/sysdeps/aarch64/fpu/Versions +++ b/sysdeps/aarch64/fpu/Versions @@ -136,6 +136,11 @@ libmvec { _ZGVsMxv_tanhf; } GLIBC_2.41 { + _ZGVnN2v_cospi; + _ZGVnN2v_cospif; + _ZGVnN4v_cospif; + _ZGVsMxv_cospi; + _ZGVsMxv_cospif; _ZGVnN2v_logp1; _ZGVnN2v_logp1f; _ZGVnN4v_logp1f; diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h index 1b30407..8e9c064 100644 --- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h +++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h @@ -26,6 +26,7 @@ libmvec_hidden_proto (V_NAME_F1(atanh)); libmvec_hidden_proto (V_NAME_F1(cbrt)); libmvec_hidden_proto (V_NAME_F1(cos)); libmvec_hidden_proto (V_NAME_F1(cosh)); +libmvec_hidden_proto (V_NAME_F1(cospi)); libmvec_hidden_proto (V_NAME_F1(erf)); libmvec_hidden_proto (V_NAME_F1(erfc)); libmvec_hidden_proto (V_NAME_F1(exp10)); diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h index a4ee9fb..2c39674 100644 --- a/sysdeps/aarch64/fpu/bits/math-vector.h +++ b/sysdeps/aarch64/fpu/bits/math-vector.h @@ -69,6 +69,10 @@ # define __DECL_SIMD_cosh __DECL_SIMD_aarch64 # undef __DECL_SIMD_coshf # define __DECL_SIMD_coshf __DECL_SIMD_aarch64 +# undef __DECL_SIMD_cospi +# define __DECL_SIMD_cospi __DECL_SIMD_aarch64 +# undef __DECL_SIMD_cospif +# define __DECL_SIMD_cospif __DECL_SIMD_aarch64 # undef __DECL_SIMD_erf # define __DECL_SIMD_erf __DECL_SIMD_aarch64 # undef __DECL_SIMD_erff @@ -177,6 +181,7 @@ __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_cospif (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); @@ -206,6 +211,7 @@ __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_cospi (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); @@ -240,6 +246,7 @@ __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t); +__sv_f32_t _ZGVsMxv_cospif (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_erfcf (__sv_f32_t, __sv_bool_t); __sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t); @@ -269,6 +276,7 @@ __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t); +__sv_f64_t _ZGVsMxv_cospi (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_erfc (__sv_f64_t, __sv_bool_t); __sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t); diff --git a/sysdeps/aarch64/fpu/cospi_advsimd.c b/sysdeps/aarch64/fpu/cospi_advsimd.c new file mode 100644 index 0000000..dcd12c8 --- /dev/null +++ b/sysdeps/aarch64/fpu/cospi_advsimd.c @@ -0,0 +1,87 @@ +/* Double-precision (Advanced SIMD) cospi function + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" +#include "poly_advsimd_f64.h" + +static const struct data +{ + float64x2_t poly[10]; + float64x2_t range_val; +} data = { + /* Polynomial coefficients generated using Remez algorithm, + see sinpi.sollya for details. */ + .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2), + V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1), + V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8), + V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16), + V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) }, + .range_val = V2 (0x1p63), +}; + +static float64x2_t VPCS_ATTR NOINLINE +special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp) +{ + /* Fall back to scalar code. */ + y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd)); + return v_call_f64 (cospi, x, y, cmp); +} + +/* Approximation for vector double-precision cospi(x). + Maximum Error 3.06 ULP: + _ZGVnN2v_cospi(0x1.7dd4c0b03cc66p-5) got 0x1.fa854babfb6bep-1 + want 0x1.fa854babfb6c1p-1. */ +float64x2_t VPCS_ATTR V_NAME_D1 (cospi) (float64x2_t x) +{ + const struct data *d = ptr_barrier (&data); + +#if WANT_SIMD_EXCEPT + float64x2_t r = vabsq_f64 (x); + uint64x2_t cmp = vcaleq_f64 (v_f64 (0x1p64), x); + + /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd + to avoid them overflowing and throwing exceptions. */ + r = v_zerofy_f64 (r, cmp); + uint64x2_t odd = vshlq_n_u64 (vcvtnq_u64_f64 (r), 63); + +#else + float64x2_t r = x; + uint64x2_t cmp = vcageq_f64 (r, d->range_val); + uint64x2_t odd + = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63); + +#endif + + r = vsubq_f64 (r, vrndaq_f64 (r)); + + /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2. */ + r = vsubq_f64 (v_f64 (0.5), vabsq_f64 (r)); + + /* y = sin(r). */ + float64x2_t r2 = vmulq_f64 (r, r); + float64x2_t r4 = vmulq_f64 (r2, r2); + float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r); + + /* Fallback to scalar. */ + if (__glibc_unlikely (v_any_u64 (cmp))) + return special_case (x, y, odd, cmp); + + /* Reintroduce the sign bit for inputs which round to odd. */ + return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd)); +} diff --git a/sysdeps/aarch64/fpu/cospi_sve.c b/sysdeps/aarch64/fpu/cospi_sve.c new file mode 100644 index 0000000..dd98815 --- /dev/null +++ b/sysdeps/aarch64/fpu/cospi_sve.c @@ -0,0 +1,65 @@ +/* Double-precision (SVE) cospi function + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" +#include "poly_sve_f64.h" + +static const struct data +{ + double poly[10]; + double range_val; +} data = { + /* Polynomial coefficients generated using Remez algorithm, + see sinpi.sollya for details. */ + .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1, + -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8, + 0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, + 0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 }, + .range_val = 0x1p53, +}; + +/* A fast SVE implementation of cospi. + Maximum error 3.20 ULP: + _ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1 + want 0x1.fdabf595f9766p-1. */ +svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + /* Using cospi(x) = sinpi(0.5 - x) + range reduction and offset into sinpi range -1/2 .. 1/2 + r = 0.5 - |x - rint(x)|. */ + svfloat64_t n = svrinta_x (pg, x); + svfloat64_t r = svsub_x (pg, x, n); + r = svsub_x (pg, sv_f64 (0.5), svabs_x (pg, r)); + + /* Result should be negated based on if n is odd or not. + If ax >= 2^53, the result will always be positive. */ + svbool_t cmp = svaclt (pg, x, d->range_val); + svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n)); + svuint64_t sign = svlsl_z (cmp, intn, 63); + + /* y = sin(r). */ + svfloat64_t r2 = svmul_x (pg, r, r); + svfloat64_t r4 = svmul_x (pg, r2, r2); + svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly); + y = svmul_x (pg, y, r); + + return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); +} diff --git a/sysdeps/aarch64/fpu/cospif_advsimd.c b/sysdeps/aarch64/fpu/cospif_advsimd.c new file mode 100644 index 0000000..a81471f --- /dev/null +++ b/sysdeps/aarch64/fpu/cospif_advsimd.c @@ -0,0 +1,87 @@ +/* Single-precision (Advanced SIMD) cospi function + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "v_math.h" +#include "poly_advsimd_f32.h" + +static const struct data +{ + float32x4_t poly[6]; + float32x4_t range_val; +} data = { + /* Taylor series coefficents for sin(pi * x). */ + .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f), + V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) }, + .range_val = V4 (0x1p31f), +}; + +static float32x4_t VPCS_ATTR NOINLINE +special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp) +{ + y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd)); + return v_call_f32 (cospif, x, y, cmp); +} + +/* Approximation for vector single-precision cospi(x) + Maximum Error: 3.17 ULP: + _ZGVnN4v_cospif(0x1.d341a8p-5) got 0x1.f7cd56p-1 + want 0x1.f7cd5p-1. */ +float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x) +{ + const struct data *d = ptr_barrier (&data); + +#if WANT_SIMD_EXCEPT + float32x4_t r = vabsq_f32 (x); + uint32x4_t cmp = vcaleq_f32 (v_f32 (0x1p32f), x); + + /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd + to avoid them overflowing and throwing exceptions. */ + r = v_zerofy_f32 (r, cmp); + uint32x4_t odd = vshlq_n_u32 (vcvtnq_u32_f32 (r), 31); + +#else + float32x4_t r = x; + uint32x4_t cmp = vcageq_f32 (r, d->range_val); + + uint32x4_t odd + = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31); + +#endif + + /* r = x - rint(x). */ + r = vsubq_f32 (r, vrndaq_f32 (r)); + + /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2. */ + r = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (r)); + + /* Pairwise Horner approximation for y = sin(r * pi). */ + float32x4_t r2 = vmulq_f32 (r, r); + float32x4_t r4 = vmulq_f32 (r2, r2); + float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r); + + /* Fallback to scalar. */ + if (__glibc_unlikely (v_any_u32 (cmp))) + return special_case (x, y, odd, cmp); + + /* Reintroduce the sign bit for inputs which round to odd. */ + return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd)); +} + +libmvec_hidden_def (V_NAME_F1 (cospi)) +HALF_WIDTH_ALIAS_F1 (cospi) diff --git a/sysdeps/aarch64/fpu/cospif_sve.c b/sysdeps/aarch64/fpu/cospif_sve.c new file mode 100644 index 0000000..e8980da --- /dev/null +++ b/sysdeps/aarch64/fpu/cospif_sve.c @@ -0,0 +1,61 @@ +/* Single-precision (SVE) cospi function + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include "sv_math.h" +#include "poly_sve_f32.h" + +static const struct data +{ + float poly[6]; + float range_val; +} data = { + /* Taylor series coefficents for sin(pi * x). */ + .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f, + 0x1.50783p-4f, -0x1.e30750p-8f }, + .range_val = 0x1p31f, +}; + +/* A fast SVE implementation of cospif. + Maximum error: 2.60 ULP: + _ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1 + want 0x1.e09c98p-1. */ +svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg) +{ + const struct data *d = ptr_barrier (&data); + + /* Using cospi(x) = sinpi(0.5 - x) + range reduction and offset into sinpi range -1/2 .. 1/2 + r = 0.5 - |x - rint(x)|. */ + svfloat32_t n = svrinta_x (pg, x); + svfloat32_t r = svsub_x (pg, x, n); + r = svsub_x (pg, sv_f32 (0.5f), svabs_x (pg, r)); + + /* Result should be negated based on if n is odd or not. + If ax >= 2^31, the result will always be positive. */ + svbool_t cmp = svaclt (pg, x, d->range_val); + svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n)); + svuint32_t sign = svlsl_z (cmp, intn, 31); + + /* y = sin(r). */ + svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly); + y = svmul_x (pg, y, r); + + return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign)); +} diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c index b2332e6..0b0cc65 100644 --- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c @@ -33,6 +33,7 @@ VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2) VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt) VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos) VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh) +VPCS_VECTOR_WRAPPER (cospi_advsimd, _ZGVnN2v_cospi) VPCS_VECTOR_WRAPPER (erf_advsimd, _ZGVnN2v_erf) VPCS_VECTOR_WRAPPER (erfc_advsimd, _ZGVnN2v_erfc) VPCS_VECTOR_WRAPPER (exp_advsimd, _ZGVnN2v_exp) diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c index e833a840..fb4f6c4 100644 --- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c @@ -52,6 +52,7 @@ SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2) SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt) SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos) SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh) +SVE_VECTOR_WRAPPER (cospi_sve, _ZGVsMxv_cospi) SVE_VECTOR_WRAPPER (erf_sve, _ZGVsMxv_erf) SVE_VECTOR_WRAPPER (erfc_sve, _ZGVsMxv_erfc) SVE_VECTOR_WRAPPER (exp_sve, _ZGVsMxv_exp) diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c index bede18b..51b05cb 100644 --- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c @@ -33,6 +33,7 @@ VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f) VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf) VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf) VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf) +VPCS_VECTOR_WRAPPER (cospif_advsimd, _ZGVnN4v_cospif) VPCS_VECTOR_WRAPPER (erff_advsimd, _ZGVnN4v_erff) VPCS_VECTOR_WRAPPER (erfcf_advsimd, _ZGVnN4v_erfcf) VPCS_VECTOR_WRAPPER (expf_advsimd, _ZGVnN4v_expf) diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c index d5497b2..a10de3f 100644 --- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c +++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c @@ -52,6 +52,7 @@ SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f) SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf) SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf) SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf) +SVE_VECTOR_WRAPPER (cospif_sve, _ZGVsMxv_cospif) SVE_VECTOR_WRAPPER (erff_sve, _ZGVsMxv_erff) SVE_VECTOR_WRAPPER (erfcf_sve, _ZGVsMxv_erfcf) SVE_VECTOR_WRAPPER (expf_sve, _ZGVsMxv_expf) diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index c095c72..12f1b77 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -803,11 +803,19 @@ double: 2 float: 2 ldouble: 2 +Function: "cospi_advsimd": +double: 2 +float: 1 + Function: "cospi_downward": double: 1 float: 2 ldouble: 2 +Function: "cospi_sve": +double: 2 +float: 1 + Function: "cospi_towardzero": double: 1 float: 1 |