diff options
Diffstat (limited to 'sysdeps')
51 files changed, 1635 insertions, 935 deletions
diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c index 0889f79..ff6b713 100644 --- a/sysdeps/aarch64/fpu/asinh_sve.c +++ b/sysdeps/aarch64/fpu/asinh_sve.c @@ -18,36 +18,49 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SignMask (0x8000000000000000) #define One (0x3ff0000000000000) #define Thres (0x5fe0000000000000) /* asuint64 (0x1p511). */ +#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1) static const struct data { - double poly[18]; - double ln2, p3, p1, p4, p0, p2; - uint64_t n; - uint64_t off; + double even_coeffs[9]; + double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17; + uint64_t off, mask; } data = { - /* Polynomial generated using Remez on [2^-26, 1]. */ - .poly - = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5, - 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6, - -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7, - 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8, - -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11, - 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18 }, + /* Polynomial generated using Remez on [2^-26, 1]. */ + .even_coeffs ={ + -0x1.55555555554a7p-3, + -0x1.6db6db68332e6p-5, + -0x1.6e8b8b654a621p-6, + -0x1.c9871d10885afp-7, + -0x1.3ddca533e9f54p-7, + -0x1.b90c7099dd397p-8, + -0x1.d217026a669ecp-9, + -0x1.e0f37daef9127p-11, + -0x1.021a48685e287p-14, }, + + .c1 = 0x1.3333333326c7p-4, + .c3 = 0x1.f1c71b26fb40dp-6, + .c5 = 0x1.1c4daa9e67871p-6, + .c7 = 0x1.7a16e8d9d2ecfp-7, + .c9 = 0x1.0becef748dafcp-7, + .c11 = 0x1.541f2bb1ffe51p-8, + .c13 = 0x1.0b5c7977aaf7p-9, + .c15 = 0x1.388b5fe542a6p-12, + .c17 = 0x1.93d4ba83d34dap-18, + .ln2 = 0x1.62e42fefa39efp-1, .p0 = -0x1.ffffffffffff7p-2, .p1 = 0x1.55555555170d4p-2, .p2 = -0x1.0000000399c27p-2, .p3 = 0x1.999b2e90e94cap-3, .p4 = -0x1.554e550bd501ep-3, - .n = 1 << V_LOG_TABLE_BITS, - .off = 0x3fe6900900000000 + .off = 0x3fe6900900000000, + .mask = 0xfffULL << 52, }; static svfloat64_t NOINLINE @@ -64,11 +77,10 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) of the algorithm used. */ svuint64_t ix = svreinterpret_u64 (x); - svuint64_t tmp = svsub_x (pg, ix, d->off); - svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), - (d->n - 1) << 1); - svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); + svuint64_t i_off = svsub_x (pg, ix, d->off); + svuint64_t i + = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask); + svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i); @@ -78,14 +90,14 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg) svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1); svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z); - svfloat64_t kd = svcvt_f64_x (pg, k); + svfloat64_t kd + = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52)); svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0); - svfloat64_t r2 = svmul_x (pg, r, r); - + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1); - svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0); + y = svmla_lane (y, r2, p1_p4, 1); y = svmla_x (pg, p, r2, y); y = svmla_x (pg, hi, r2, y); @@ -111,7 +123,6 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svuint64_t iax = svbic_x (pg, ix, SignMask); svuint64_t sign = svand_x (pg, ix, SignMask); svfloat64_t ax = svreinterpret_f64 (iax); - svbool_t ge1 = svcmpge (pg, iax, One); svbool_t special = svcmpge (pg, iax, Thres); @@ -120,7 +131,7 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) svfloat64_t option_1 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, ge1))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); option_1 = __sv_log_inline ( svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg); } @@ -130,21 +141,53 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg) The largest observed error in this region is 1.51 ULPs: _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1 want 0x1.c1e649ee2681dp-1. */ + svfloat64_t option_2 = sv_f64 (0); if (__glibc_likely (svptest_any (pg, svnot_z (pg, ge1)))) { - svfloat64_t x2 = svmul_x (pg, ax, ax); - svfloat64_t x4 = svmul_x (pg, x2, x2); - svfloat64_t p = sv_pw_horner_17_f64_x (pg, x2, x4, d->poly); - option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax)); + svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax); + svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2); + /* Order-17 Pairwise Horner scheme. */ + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); + svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); + svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); + svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); + + svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1); + svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0); + svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1); + svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0); + svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1); + svfloat64_t p1213 + = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0); + svfloat64_t p1415 + = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1); + svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17); + + svfloat64_t p = svmla_x (pg, p1415, x4, p1617); + p = svmla_x (pg, p1213, x4, p); + p = svmla_x (pg, p1011, x4, p); + p = svmla_x (pg, p89, x4, p); + + p = svmla_x (pg, p67, x4, p); + p = svmla_x (pg, p45, x4, p); + + p = svmla_x (pg, p23, x4, p); + + p = svmla_x (pg, p01, x4, p); + + option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax)); } - /* Choose the right option for each lane. */ - svfloat64_t y = svsel (ge1, option_1, option_2); - if (__glibc_unlikely (svptest_any (pg, special))) return special_case ( - x, svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)), + x, + svreinterpret_f64 (sveor_x ( + pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)), special); + + /* Choose the right option for each lane. */ + svfloat64_t y = svsel (ge1, option_1, option_2); return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); } diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c index ca44053..77e58e1 100644 --- a/sysdeps/aarch64/fpu/cosh_sve.c +++ b/sysdeps/aarch64/fpu/cosh_sve.c @@ -23,7 +23,7 @@ static const struct data { float64_t poly[3]; float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres; - uint64_t index_mask, special_bound; + uint64_t special_bound; } data = { .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3, 0x1.5555576a59599p-5, }, @@ -35,14 +35,16 @@ static const struct data .shift = 0x1.8p+52, .thres = 704.0, - .index_mask = 0xff, /* 0x1.6p9, above which exp overflows. */ .special_bound = 0x4086000000000000, }; static svfloat64_t NOINLINE -special_case (svfloat64_t x, svfloat64_t y, svbool_t special) +special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special) { + svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); + svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); + svfloat64_t y = svadd_x (pg, half_t, half_over_t); return sv_call_f64 (cosh, x, y, special); } @@ -60,12 +62,12 @@ exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d) svuint64_t u = svreinterpret_u64 (z); svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS); - svuint64_t i = svand_x (pg, u, d->index_mask); + svuint64_t i = svand_x (svptrue_b64 (), u, 0xff); svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]); y = svmla_x (pg, sv_f64 (d->poly[0]), r, y); y = svmla_x (pg, sv_f64 (1.0), r, y); - y = svmul_x (pg, r, y); + y = svmul_x (svptrue_b64 (), r, y); /* s = 2^(n/N). */ u = svld1_gather_index (pg, __v_exp_tail_data, i); @@ -94,12 +96,12 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg) /* Up to the point that exp overflows, we can use it to calculate cosh by exp(|x|) / 2 + 1 / (2 * exp(|x|)). */ svfloat64_t t = exp_inline (ax, pg, d); - svfloat64_t half_t = svmul_x (pg, t, 0.5); - svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); /* Fall back to scalar for any special cases. */ if (__glibc_unlikely (svptest_any (pg, special))) - return special_case (x, svadd_x (pg, half_t, half_over_t), special); + return special_case (x, pg, t, special); + svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5); + svfloat64_t half_over_t = svdivr_x (pg, t, 0.5); return svadd_x (pg, half_t, half_over_t); } diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c index 2743f9d..b57ab51 100644 --- a/sysdeps/aarch64/fpu/erfcf_sve.c +++ b/sysdeps/aarch64/fpu/erfcf_sve.c @@ -76,7 +76,7 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx); /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */ - i = svmul_x (pg, i, 2); + i = svlsl_x (svptrue_b32 (), i, 1); const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr; svfloat32_t erfcr = svld1_gather_index (pg, p, i); svfloat32_t scale = svld1_gather_index (pg, p + 1, i); @@ -84,15 +84,15 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg) /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */ svfloat32_t r = svsub_x (pg, z, shift); svfloat32_t d = svsub_x (pg, a, r); - svfloat32_t d2 = svmul_x (pg, d, d); - svfloat32_t r2 = svmul_x (pg, r, r); + svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d); + svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third); - svfloat32_t third = svdup_lane (coeffs, 0); svfloat32_t p1 = r; - svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1); - svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); + svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1); + svfloat32_t p3 + = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0)); svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2); p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4); diff --git a/sysdeps/aarch64/fpu/exp10_sve.c b/sysdeps/aarch64/fpu/exp10_sve.c index f71bafd..53b2893 100644 --- a/sysdeps/aarch64/fpu/exp10_sve.c +++ b/sysdeps/aarch64/fpu/exp10_sve.c @@ -18,21 +18,23 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" #define SpecialBound 307.0 /* floor (log10 (2^1023)). */ static const struct data { - double poly[5]; + double c1, c3, c2, c4, c0; double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound; } data = { /* Coefficients generated using Remez algorithm. rel error: 0x1.9fcb9b3p-60 abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ] max ulp err 0.52 +0.5. */ - .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1, - 0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 }, + .c0 = 0x1.26bb1bbb55516p1, + .c1 = 0x1.53524c73cd32ap1, + .c2 = 0x1.0470591daeafbp1, + .c3 = 0x1.2bd77b1361ef6p0, + .c4 = 0x1.142b5d54e9621p-1, /* 1.5*2^46+1023. This value is further explained below. */ .shift = 0x1.800000000ffc0p+46, .log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */ @@ -70,9 +72,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, d->scale_thres); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,11 +105,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg) comes at significant performance cost. */ svuint64_t u = svreinterpret_u64 (z); svfloat64_t scale = svexpa (u); - + svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2); /* Approximate exp10(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2, - sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1)); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0); + svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1); + svfloat64_t p14 = svmla_x (pg, p12, p34, r2); + + svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14); /* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound multiplication may overflow, so use special case routine. */ diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c index a37c330..6db8526 100644 --- a/sysdeps/aarch64/fpu/exp2_sve.c +++ b/sysdeps/aarch64/fpu/exp2_sve.c @@ -18,7 +18,6 @@ <https://www.gnu.org/licenses/>. */ #include "sv_math.h" -#include "poly_sve_f64.h" #define N (1 << V_EXP_TABLE_BITS) @@ -27,15 +26,15 @@ static const struct data { - double poly[4]; + double c0, c2; + double c1, c3; double shift, big_bound, uoflow_bound; } data = { /* Coefficients are computed using Remez algorithm with minimisation of the absolute error. */ - .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5, - 0x1.3b2abf5571ad8p-7 }, - .shift = 0x1.8p52 / N, - .uoflow_bound = UOFlowBound, + .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3, + .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7, + .shift = 0x1.8p52 / N, .uoflow_bound = UOFlowBound, .big_bound = BigBound, }; @@ -67,9 +66,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n, /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -99,11 +98,14 @@ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg) svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS); svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top)); + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); /* Approximate exp2(r) using polynomial. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly); - svfloat64_t y = svmul_x (pg, r, p); - + /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); + svfloat64_t p = svmla_x (pg, p01, p23, r2); + svfloat64_t y = svmul_x (svptrue_b64 (), r, p); /* Assemble exp2(x) = exp2(r) * scale. */ if (__glibc_unlikely (svptest_any (pg, special))) return special_case (pg, scale, y, kd, d); diff --git a/sysdeps/aarch64/fpu/exp_sve.c b/sysdeps/aarch64/fpu/exp_sve.c index 37de751..dc04948 100644 --- a/sysdeps/aarch64/fpu/exp_sve.c +++ b/sysdeps/aarch64/fpu/exp_sve.c @@ -21,12 +21,15 @@ static const struct data { - double poly[4]; + double c0, c2; + double c1, c3; double ln2_hi, ln2_lo, inv_ln2, shift, thres; + } data = { - .poly = { /* ulp error: 0.53. */ - 0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5, - 0x1.1111266d28935p-7 }, + .c0 = 0x1.fffffffffdbcdp-2, + .c1 = 0x1.555555555444cp-3, + .c2 = 0x1.555573c6a9f7dp-5, + .c3 = 0x1.1111266d28935p-7, .ln2_hi = 0x1.62e42fefa3800p-1, .ln2_lo = 0x1.ef35793c76730p-45, /* 1/ln2. */ @@ -36,7 +39,6 @@ static const struct data .thres = 704.0, }; -#define C(i) sv_f64 (d->poly[i]) #define SpecialOffset 0x6000000000000000 /* 0x1p513. */ /* SpecialBias1 + SpecialBias1 = asuint(1.0). */ #define SpecialBias1 0x7000000000000000 /* 0x1p769. */ @@ -56,20 +58,20 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n) svuint64_t b = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */ - /* Set s1 to generate overflow depending on sign of exponent n. */ - svfloat64_t s1 = svreinterpret_f64 ( - svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b. */ - /* Offset s to avoid overflow in final result if n is below threshold. */ + /* Set s1 to generate overflow depending on sign of exponent n, + ie. s1 = 0x70...0 - b. */ + svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1)); + /* Offset s to avoid overflow in final result if n is below threshold. + ie. s2 = as_u64 (s) - 0x3010...0 + b. */ svfloat64_t s2 = svreinterpret_f64 ( - svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), - b)); /* as_u64 (s) - 0x3010...0 + b. */ + svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b)); /* |n| > 1280 => 2^(n) overflows. */ svbool_t p_cmp = svacgt (pg, n, 1280.0); - svfloat64_t r1 = svmul_x (pg, s1, s1); + svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1); svfloat64_t r2 = svmla_x (pg, s2, s2, y); - svfloat64_t r0 = svmul_x (pg, r2, s1); + svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1); return svsel (p_cmp, r1, r0); } @@ -103,16 +105,16 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg) svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2); svuint64_t u = svreinterpret_u64 (z); svfloat64_t n = svsub_x (pg, z, d->shift); - + svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)]. */ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi); svfloat64_t r = svmls_lane (x, n, ln2, 0); r = svmls_lane (r, n, ln2, 1); /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t p01 = svmla_x (pg, C (0), C (1), r); - svfloat64_t p23 = svmla_x (pg, C (2), C (3), r); + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0); + svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1); svfloat64_t p04 = svmla_x (pg, p01, p23, r2); svfloat64_t y = svmla_x (pg, r, p04, r2); diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c index 42d551c..b8c1b39 100644 --- a/sysdeps/aarch64/fpu/pow_sve.c +++ b/sysdeps/aarch64/fpu/pow_sve.c @@ -44,19 +44,18 @@ /* Data is defined in v_pow_log_data.c. */ #define N_LOG (1 << V_POW_LOG_TABLE_BITS) -#define A __v_pow_log_data.poly #define Off 0x3fe6955500000000 /* Data is defined in v_pow_exp_data.c. */ #define N_EXP (1 << V_POW_EXP_TABLE_BITS) #define SignBias (0x800 << V_POW_EXP_TABLE_BITS) -#define C __v_pow_exp_data.poly #define SmallExp 0x3c9 /* top12(0x1p-54). */ #define BigExp 0x408 /* top12(512.). */ #define ThresExp 0x03f /* BigExp - SmallExp. */ #define HugeExp 0x409 /* top12(1024.). */ /* Constants associated with pow. */ +#define SmallBoundX 0x1p-126 #define SmallPowX 0x001 /* top12(0x1p-126). */ #define BigPowX 0x7ff /* top12(INFINITY). */ #define ThresPowX 0x7fe /* BigPowX - SmallPowX. */ @@ -64,6 +63,31 @@ #define BigPowY 0x43e /* top12(0x1.749p62). */ #define ThresPowY 0x080 /* BigPowY - SmallPowY. */ +static const struct data +{ + double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo; + double log_c1, log_c3, log_c5, off; + double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo; + double exp_c0, exp_c1; +} data = { + .log_c0 = -0x1p-1, + .log_c1 = -0x1.555555555556p-1, + .log_c2 = 0x1.0000000000006p-1, + .log_c3 = 0x1.999999959554ep-1, + .log_c4 = -0x1.555555529a47ap-1, + .log_c5 = -0x1.2495b9b4845e9p0, + .log_c6 = 0x1.0002b8b263fc3p0, + .off = Off, + .exp_c0 = 0x1.fffffffffffd4p-2, + .exp_c1 = 0x1.5555571d6ef9p-3, + .exp_c2 = 0x1.5555576a5adcep-5, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP, + .ln2_over_n_hi = 0x1.62e42fefc0000p-9, + .ln2_over_n_lo = -0x1.c610ca86c3899p-45, +}; + /* Check if x is an integer. */ static inline svbool_t sv_isint (svbool_t pg, svfloat64_t x) @@ -82,7 +106,7 @@ sv_isnotint (svbool_t pg, svfloat64_t x) static inline svbool_t sv_isodd (svbool_t pg, svfloat64_t x) { - svfloat64_t y = svmul_x (pg, x, 0.5); + svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5); return sv_isnotint (pg, y); } @@ -121,7 +145,7 @@ zeroinfnan (uint64_t i) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint64_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2 * asuint64 (INFINITY) - 1); } @@ -174,16 +198,17 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2, additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ static inline svfloat64_t -sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) +sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail, + const struct data *d) { /* x = 2^k z; where z is in range [Off,2*Off) and exact. The range is split into N subintervals. The ith subinterval contains z and c is near its center. */ - svuint64_t tmp = svsub_x (pg, ix, Off); + svuint64_t tmp = svsub_x (pg, ix, d->off); svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS), sv_u64 (N_LOG - 1)); svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52); - svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52))); + svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52)); svfloat64_t z = svreinterpret_f64 (iz); svfloat64_t kd = svcvt_f64_x (pg, k); @@ -199,40 +224,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail) |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ svfloat64_t r = svmad_x (pg, z, invc, -1.0); /* k*Ln2 + log(c) + r. */ - svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi); + + svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi); + svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0); svfloat64_t t2 = svadd_x (pg, t1, r); - svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo); + svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1); svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r); /* Evaluation is optimized assuming superscalar pipelined execution. */ - svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5. */ - svfloat64_t ar2 = svmul_x (pg, r, ar); - svfloat64_t ar3 = svmul_x (pg, r, ar2); + + svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0); + svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0); + svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar); + svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2); /* k*Ln2 + log(c) + r + A[0]*r*r. */ svfloat64_t hi = svadd_x (pg, t2, ar2); - svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r); + svfloat64_t lo3 = svmls_x (pg, ar2, ar, r); svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2); /* p = log1p(r) - r - A[0]*r*r. */ /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))). */ - svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]); - svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]); - svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]); + + svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4); + svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1); + svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0); + svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1); svfloat64_t p = svmla_x (pg, a34, ar2, a56); p = svmla_x (pg, a12, ar2, p); - p = svmul_x (pg, ar3, p); + p = svmul_x (svptrue_b64 (), ar3, p); svfloat64_t lo = svadd_x ( - pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); + pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p); svfloat64_t y = svadd_x (pg, hi, lo); *tail = svadd_x (pg, svsub_x (pg, hi, y), lo); return y; } +static inline svfloat64_t +sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail, + svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits, + svuint64_t *ki, const struct data *d) +{ + /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ + /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ + svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2); + svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0); + /* z - kd is in [-1, 1] in non-nearest rounding modes. */ + svfloat64_t kd = svrinta_x (pg, z); + *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd)); + + svfloat64_t ln2_over_n_hilo + = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi); + svfloat64_t r = x; + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0); + r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1); + /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ + r = svadd_x (pg, r, xtail); + /* 2^(k/N) ~= scale. */ + svuint64_t idx = svand_x (pg, *ki, N_EXP - 1); + svuint64_t top + = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); + /* This is only a valid scale when -1023*N < k < 1024*N. */ + *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); + *sbits = svadd_x (pg, *sbits, top); + /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ + svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); + *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1); + *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp); + *tmp = svmla_x (pg, r, r2, *tmp); + svfloat64_t scale = svreinterpret_f64 (*sbits); + /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there + is no spurious underflow here even without fma. */ + z = svmla_x (pg, scale, scale, *tmp); + return z; +} + /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */ static inline svfloat64_t sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, - svuint64_t sign_bias) + svuint64_t sign_bias, const struct data *d) { /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow) and other cases of large values of x (scale * (1 + TMP) oflow). */ @@ -240,73 +310,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail, /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp); - /* Conditions special, uflow and oflow are all expressed as uoflow && - something, hence do not bother computing anything if no lane in uoflow is - true. */ - svbool_t special = svpfalse_b (); - svbool_t uflow = svpfalse_b (); - svbool_t oflow = svpfalse_b (); + svfloat64_t tmp; + svuint64_t sbits, ki; if (__glibc_unlikely (svptest_any (pg, uoflow))) { + svfloat64_t z + = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); + /* |x| is tiny (|x| <= 0x1p-54). */ - uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); + svbool_t uflow + = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000); uflow = svand_z (pg, uoflow, uflow); /* |x| is huge (|x| >= 1024). */ - oflow = svcmpge (pg, abstop, HugeExp); + svbool_t oflow = svcmpge (pg, abstop, HugeExp); oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow)); + /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow - or underflow. */ - special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); + or underflow. */ + svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow)); + + /* Update result with special and large cases. */ + z = sv_call_specialcase (tmp, sbits, ki, z, special); + + /* Handle underflow and overflow. */ + svbool_t x_is_neg = svcmplt (pg, x, 0); + svuint64_t sign_mask + = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); + svfloat64_t res_uoflow + = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); + res_uoflow = svreinterpret_f64 ( + svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); + /* Avoid spurious underflow for tiny x. */ + svfloat64_t res_spurious_uflow + = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); + + z = svsel (oflow, res_uoflow, z); + z = svsel (uflow, res_spurious_uflow, z); + return z; } - /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ - /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ - svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2); - /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift); - svfloat64_t kd = svadd_x (pg, z, shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, shift); - svfloat64_t r = x; - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi); - r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo); - /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ - r = svadd_x (pg, r, xtail); - /* 2^(k/N) ~= scale. */ - svuint64_t idx = svand_x (pg, ki, N_EXP - 1); - svuint64_t top - = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS); - /* This is only a valid scale when -1023*N < k < 1024*N. */ - svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx); - sbits = svadd_x (pg, sbits, top); - /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */ - svfloat64_t r2 = svmul_x (pg, r, r); - svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]); - tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp); - tmp = svmla_x (pg, r, r2, tmp); - svfloat64_t scale = svreinterpret_f64 (sbits); - /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there - is no spurious underflow here even without fma. */ - z = svmla_x (pg, scale, scale, tmp); - - /* Update result with special and large cases. */ - if (__glibc_unlikely (svptest_any (pg, special))) - z = sv_call_specialcase (tmp, sbits, ki, z, special); - - /* Handle underflow and overflow. */ - svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63); - svbool_t x_is_neg = svcmpne (pg, sign_bit, 0); - svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS); - svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY)); - res_uoflow = svreinterpret_f64 ( - svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask)); - z = svsel (oflow, res_uoflow, z); - /* Avoid spurious underflow for tiny x. */ - svfloat64_t res_spurious_uflow - = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000)); - z = svsel (uflow, res_spurious_uflow, z); - - return z; + return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d); } static inline double @@ -341,47 +384,39 @@ pow_sc (double x, double y) svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) { + const struct data *d = ptr_barrier (&data); + /* This preamble handles special case conditions used in the final scalar fallbacks. It also updates ix and sign_bias, that are used in the core computation too, i.e., exp( y * log (x) ). */ svuint64_t vix0 = svreinterpret_u64 (x); svuint64_t viy0 = svreinterpret_u64 (y); - svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52); /* Negative x cases. */ - svuint64_t sign_bit = svlsr_m (pg, vix0, 63); - svbool_t xisneg = svcmpeq (pg, sign_bit, 1); + svbool_t xisneg = svcmplt (pg, x, 0); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint64_t sign_bias = sv_u64 (0); svuint64_t vix = vix0; - svuint64_t vtopx1 = vtopx0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = sv_isnotint (xisneg, y); - svbool_t yisint_xisneg = sv_isint (xisneg, y); + yint_or_xpos = sv_isint (xisneg, y); svbool_t yisodd_xisneg = sv_isodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff); - vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0)); } - /* Special cases of x or y: zero, inf and nan. */ - svbool_t xspecial = sv_zeroinfnan (pg, vix0); - svbool_t yspecial = sv_zeroinfnan (pg, viy0); - svbool_t special = svorr_z (pg, xspecial, yspecial); - /* Small cases of x: |x| < 0x1p-126. */ - svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff); - svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ - svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0); + svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52); + svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0); svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52)); vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff); @@ -391,20 +426,24 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) /* y_hi = log(ix, &y_lo). */ svfloat64_t vlo; - svfloat64_t vhi = sv_log_inline (pg, vix, &vlo); + svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d); /* z = exp(y_hi, y_lo, sign_bias). */ - svfloat64_t vehi = svmul_x (pg, y, vhi); - svfloat64_t velo = svmul_x (pg, y, vlo); - svfloat64_t vemi = svmls_x (pg, vehi, y, vhi); - velo = svsub_x (pg, velo, vemi); - svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias); + svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi); + svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi); + svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo); + svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d); /* Cases of finite y and finite negative x. */ - vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz); + vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan (""))); + + /* Special cases of x or y: zero, inf and nan. */ + svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0); + svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0); + svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial); /* Cases of zero/inf/nan x or y. */ - if (__glibc_unlikely (svptest_any (pg, special))) + if (__glibc_unlikely (svptest_any (svptrue_b64 (), special))) vz = sv_call2_f64 (pow_sc, x, y, vz, special); return vz; diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c index 29e9acb..7046990 100644 --- a/sysdeps/aarch64/fpu/powf_sve.c +++ b/sysdeps/aarch64/fpu/powf_sve.c @@ -26,7 +26,6 @@ #define Tlogc __v_powf_data.logc #define Texp __v_powf_data.scale #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11)) -#define Shift 0x1.8p52 #define Norm 0x1p23f /* 0x4b000000. */ /* Overall ULP error bound for pow is 2.6 ulp @@ -36,7 +35,7 @@ static const struct data double log_poly[4]; double exp_poly[3]; float uflow_bound, oflow_bound, small_bound; - uint32_t sign_bias, sign_mask, subnormal_bias, off; + uint32_t sign_bias, subnormal_bias, off; } data = { /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of V_POWF_EXP2_N. */ @@ -53,7 +52,6 @@ static const struct data .small_bound = 0x1p-126f, .off = 0x3f35d000, .sign_bias = SignBias, - .sign_mask = 0x80000000, .subnormal_bias = 0x0b800000, /* 23 << 23. */ }; @@ -86,7 +84,7 @@ svisodd (svbool_t pg, svfloat32_t x) static inline svbool_t sv_zeroinfnan (svbool_t pg, svuint32_t i) { - return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1), + return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1), 2u * 0x7f800000 - 1); } @@ -150,9 +148,14 @@ powf_specialcase (float x, float y, float z) } /* Scalar fallback for special case routines with custom signature. */ -static inline svfloat32_t -sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp) +static svfloat32_t NOINLINE +sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y) { + /* Special cases of x or y: zero, inf and nan. */ + svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1)); + svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2)); + svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial); + svbool_t p = svpfirst (cmp, svpfalse ()); while (svptest_any (cmp, p)) { @@ -182,30 +185,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k, /* Polynomial to approximate log1p(r)/ln2. */ svfloat64_t logx = A (0); - logx = svmla_x (pg, A (1), r, logx); - logx = svmla_x (pg, A (2), r, logx); - logx = svmla_x (pg, A (3), r, logx); - logx = svmla_x (pg, y0, r, logx); + logx = svmad_x (pg, r, logx, A (1)); + logx = svmad_x (pg, r, logx, A (2)); + logx = svmad_x (pg, r, logx, A (3)); + logx = svmad_x (pg, r, logx, y0); *pylogx = svmul_x (pg, y, logx); /* z - kd is in [-1, 1] in non-nearest rounding modes. */ - svfloat64_t kd = svadd_x (pg, *pylogx, Shift); - svuint64_t ki = svreinterpret_u64 (kd); - kd = svsub_x (pg, kd, Shift); + svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx); + svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd)); r = svsub_x (pg, *pylogx, kd); /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */ - svuint64_t t - = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1)); - svuint64_t ski = svadd_x (pg, ki, sign_bias); - t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS)); + svuint64_t t = svld1_gather_index ( + svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1)); + svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias); + t = svadd_x (svptrue_b64 (), t, + svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS)); svfloat64_t s = svreinterpret_f64 (t); svfloat64_t p = C (0); p = svmla_x (pg, C (1), p, r); p = svmla_x (pg, C (2), p, r); - p = svmla_x (pg, s, p, svmul_x (pg, s, r)); + p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r)); return p; } @@ -219,19 +222,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, { const svbool_t ptrue = svptrue_b64 (); - /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in - order to perform core computation in double precision. */ + /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two + * in order to perform core computation in double precision. */ const svbool_t pg_lo = svunpklo (pg); const svbool_t pg_hi = svunpkhi (pg); - svfloat64_t y_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); - svfloat64_t y_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); - svfloat32_t z = svreinterpret_f32 (iz); - svfloat64_t z_lo = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z)))); - svfloat64_t z_hi = svcvt_f64_x ( - ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z)))); + svfloat64_t y_lo + = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y)))); + svfloat64_t y_hi + = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y)))); + svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz))); + svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz))); svuint64_t i_lo = svunpklo (i); svuint64_t i_hi = svunpkhi (i); svint64_t k_lo = svunpklo (k); @@ -258,9 +258,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k, /* Implementation of SVE powf. Provides the same accuracy as AdvSIMD powf, since it relies on the same algorithm. The theoretical maximum error is under 2.60 ULPs. - Maximum measured error is 2.56 ULPs: - SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127 - want 0x1.fd4b06p+127. */ + Maximum measured error is 2.57 ULPs: + SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127 + want 0x1.fff862p+127. */ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) { const struct data *d = ptr_barrier (&data); @@ -269,21 +269,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svuint32_t viy0 = svreinterpret_u32 (y); /* Negative x cases. */ - svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask); - svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask); + svbool_t xisneg = svcmplt (pg, x, sv_f32 (0)); /* Set sign_bias and ix depending on sign of x and nature of y. */ - svbool_t yisnotint_xisneg = svpfalse_b (); + svbool_t yint_or_xpos = pg; svuint32_t sign_bias = sv_u32 (0); svuint32_t vix = vix0; if (__glibc_unlikely (svptest_any (pg, xisneg))) { /* Determine nature of y. */ - yisnotint_xisneg = svisnotint (xisneg, y); - svbool_t yisint_xisneg = svisint (xisneg, y); + yint_or_xpos = svisint (xisneg, y); svbool_t yisodd_xisneg = svisodd (xisneg, y); /* ix set to abs(ix) if y is integer. */ - vix = svand_m (yisint_xisneg, vix0, 0x7fffffff); + vix = svand_m (yint_or_xpos, vix0, 0x7fffffff); /* Set to SignBias if x is negative and y is odd. */ sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0)); } @@ -294,8 +292,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) svbool_t cmp = svorr_z (pg, xspecial, yspecial); /* Small cases of x: |x| < 0x1p-126. */ - svbool_t xsmall = svaclt (pg, x, d->small_bound); - if (__glibc_unlikely (svptest_any (pg, xsmall))) + svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound); + if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall))) { /* Normalize subnormal x so exponent becomes negative. */ svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm)); @@ -304,32 +302,35 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg) vix = svsel (xsmall, vix_norm, vix); } /* Part of core computation carried in working precision. */ - svuint32_t tmp = svsub_x (pg, vix, d->off); - svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), - V_POWF_LOG2_N - 1); - svuint32_t top = svand_x (pg, tmp, 0xff800000); - svuint32_t iz = svsub_x (pg, vix, top); - svint32_t k - = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS)); - - /* Compute core in extended precision and return intermediate ylogx results to - handle cases of underflow and underflow in exp. */ + svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off); + svuint32_t i = svand_x ( + yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)), + V_POWF_LOG2_N - 1); + svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000); + svuint32_t iz = svsub_x (yint_or_xpos, vix, top); + svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top), + (23 - V_POWF_EXP2_TABLE_BITS)); + + /* Compute core in extended precision and return intermediate ylogx results + * to handle cases of underflow and underflow in exp. */ svfloat32_t ylogx; - svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d); + svfloat32_t ret + = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d); /* Handle exp special cases of underflow and overflow. */ - svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); + svuint32_t sign + = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS); svfloat32_t ret_oflow - = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY))); + = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY))); svfloat32_t ret_uflow = svreinterpret_f32 (sign); - ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret); - ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret); + ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret); + ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret); /* Cases of finite y and finite negative x. */ - ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret); + ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf (""))); - if (__glibc_unlikely (svptest_any (pg, cmp))) - return sv_call_powf_sc (x, y, ret, cmp); + if (__glibc_unlikely (svptest_any (cmp, cmp))) + return sv_call_powf_sc (x, y, ret); return ret; } diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h index f208d33..16b81fc 100644 --- a/sysdeps/aarch64/fpu/sv_expf_inline.h +++ b/sysdeps/aarch64/fpu/sv_expf_inline.h @@ -61,7 +61,7 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) /* scale = 2^(n/N). */ svfloat32_t scale = svexpa (svreinterpret_u32 (z)); - /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */ + /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5. */ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2); svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3); svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); @@ -71,5 +71,4 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) return svmla_x (pg, scale, scale, poly); } - #endif diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 59ec929..3bcd0e5 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -51,22 +51,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -119,22 +115,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -166,22 +158,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_advsimd": @@ -230,22 +218,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -798,7 +782,6 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_advsimd": @@ -807,7 +790,6 @@ float: 1 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_sve": @@ -816,12 +798,10 @@ float: 1 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1630,7 +1610,6 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_advsimd": @@ -1639,7 +1618,6 @@ float: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_sve": @@ -1648,12 +1626,10 @@ float: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1705,7 +1681,6 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_advsimd": @@ -1714,7 +1689,6 @@ float: 2 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_sve": @@ -1723,12 +1697,10 @@ float: 2 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps index 82bc156..24e6036 100644 --- a/sysdeps/arc/fpu/libm-test-ulps +++ b/sysdeps/arc/fpu/libm-test-ulps @@ -27,19 +27,15 @@ double: 3 Function: "acospi": double: 2 -float: 1 Function: "acospi_downward": double: 1 -float: 2 Function: "acospi_towardzero": double: 1 -float: 2 Function: "acospi_upward": double: 2 -float: 1 Function: "asin": double: 1 @@ -67,19 +63,15 @@ double: 3 Function: "asinpi": double: 1 -float: 1 Function: "asinpi_downward": double: 1 -float: 1 Function: "asinpi_towardzero": double: 1 -float: 2 Function: "asinpi_upward": double: 2 -float: 2 Function: "atan": double: 1 @@ -98,19 +90,15 @@ double: 8 Function: "atan2pi": double: 1 -float: 1 Function: "atan2pi_downward": double: 1 -float: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 Function: "atan2pi_upward": double: 1 -float: 2 Function: "atan_downward": double: 1 @@ -135,19 +123,15 @@ double: 3 Function: "atanpi": double: 2 -float: 1 Function: "atanpi_downward": double: 1 -float: 2 Function: "atanpi_towardzero": double: 1 -float: 2 Function: "atanpi_upward": double: 1 -float: 1 Function: "cabs": double: 1 @@ -569,19 +553,15 @@ double: 3 Function: "cospi": double: 2 -float: 2 Function: "cospi_downward": double: 1 -float: 2 Function: "cospi_towardzero": double: 1 -float: 1 Function: "cospi_upward": double: 1 -float: 2 Function: Real part of "cpow": double: 9 @@ -1121,19 +1101,15 @@ double: 3 Function: "sinpi": double: 2 -float: 2 Function: "sinpi_downward": double: 2 -float: 2 Function: "sinpi_towardzero": double: 2 -float: 1 Function: "sinpi_upward": double: 2 -float: 3 Function: "tan": double: 1 @@ -1161,19 +1137,15 @@ double: 3 Function: "tanpi": double: 3 -float: 3 Function: "tanpi_downward": double: 2 -float: 3 Function: "tanpi_towardzero": double: 2 -float: 3 Function: "tanpi_upward": double: 2 -float: 4 Function: "tgamma": double: 9 diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps index aa93d71..6359d6e 100644 --- a/sysdeps/arc/nofpu/libm-test-ulps +++ b/sysdeps/arc/nofpu/libm-test-ulps @@ -9,7 +9,6 @@ double: 2 Function: "acospi": double: 2 -float: 1 Function: "asin": double: 1 @@ -19,21 +18,18 @@ double: 2 Function: "asinpi": double: 1 -float: 1 Function: "atan": double: 1 Function: "atan2pi": double: 1 -float: 1 Function: "atanh": double: 2 Function: "atanpi": double: 2 -float: 1 Function: "cabs": double: 1 @@ -138,7 +134,6 @@ double: 2 Function: "cospi": double: 2 -float: 2 Function: Real part of "cpow": double: 2 @@ -270,14 +265,12 @@ double: 2 Function: "sinpi": double: 2 -float: 2 Function: "tanh": double: 2 Function: "tanpi": double: 3 -float: 3 Function: "tgamma": double: 9 diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps index 218ffa8..273c54d 100644 --- a/sysdeps/arm/libm-test-ulps +++ b/sysdeps/arm/libm-test-ulps @@ -27,19 +27,15 @@ double: 2 Function: "acospi": double: 2 -float: 1 Function: "acospi_downward": double: 1 -float: 2 Function: "acospi_towardzero": double: 1 -float: 2 Function: "acospi_upward": double: 2 -float: 1 Function: "asin": double: 1 @@ -67,19 +63,15 @@ double: 3 Function: "asinpi": double: 1 -float: 1 Function: "asinpi_downward": double: 1 -float: 1 Function: "asinpi_towardzero": double: 1 -float: 2 Function: "asinpi_upward": double: 2 -float: 2 Function: "atan": double: 1 @@ -95,19 +87,15 @@ double: 1 Function: "atan2pi": double: 1 -float: 1 Function: "atan2pi_downward": double: 1 -float: 3 Function: "atan2pi_towardzero": double: 1 -float: 2 Function: "atan2pi_upward": double: 1 -float: 3 Function: "atan_downward": double: 1 @@ -132,19 +120,15 @@ double: 3 Function: "atanpi": double: 2 -float: 1 Function: "atanpi_downward": double: 1 -float: 2 Function: "atanpi_towardzero": double: 1 -float: 2 Function: "atanpi_upward": double: 1 -float: 1 Function: "cabs": double: 1 @@ -561,19 +545,15 @@ double: 2 Function: "cospi": double: 2 -float: 2 Function: "cospi_downward": double: 1 -float: 2 Function: "cospi_towardzero": double: 1 -float: 1 Function: "cospi_upward": double: 1 -float: 2 Function: Real part of "cpow": double: 2 @@ -1117,19 +1097,15 @@ double: 3 Function: "sinpi": double: 2 -float: 2 Function: "sinpi_downward": double: 2 -float: 2 Function: "sinpi_towardzero": double: 2 -float: 1 Function: "sinpi_upward": double: 2 -float: 3 Function: "tan_downward": double: 1 @@ -1154,19 +1130,15 @@ double: 3 Function: "tanpi": double: 3 -float: 3 Function: "tanpi_downward": double: 2 -float: 3 Function: "tanpi_towardzero": double: 2 -float: 3 Function: "tanpi_upward": double: 2 -float: 4 Function: "tgamma": double: 9 diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps index 2b8fa35..723cb79 100644 --- a/sysdeps/hppa/fpu/libm-test-ulps +++ b/sysdeps/hppa/fpu/libm-test-ulps @@ -27,19 +27,15 @@ double: 2 Function: "acospi": double: 2 -float: 1 Function: "acospi_downward": double: 1 -float: 2 Function: "acospi_towardzero": double: 1 -float: 2 Function: "acospi_upward": double: 2 -float: 1 Function: "asin": double: 1 @@ -67,19 +63,15 @@ double: 3 Function: "asinpi": double: 1 -float: 1 Function: "asinpi_downward": double: 1 -float: 1 Function: "asinpi_towardzero": double: 1 -float: 2 Function: "asinpi_upward": double: 2 -float: 2 Function: "atan": double: 1 @@ -95,19 +87,15 @@ double: 1 Function: "atan2pi": double: 1 -float: 1 Function: "atan2pi_downward": double: 1 -float: 3 Function: "atan2pi_towardzero": double: 1 -float: 2 Function: "atan2pi_upward": double: 1 -float: 3 Function: "atan_downward": double: 1 @@ -132,19 +120,15 @@ double: 3 Function: "atanpi": double: 2 -float: 1 Function: "atanpi_downward": double: 1 -float: 2 Function: "atanpi_towardzero": double: 1 -float: 2 Function: "atanpi_upward": double: 1 -float: 1 Function: "cabs": double: 1 @@ -571,19 +555,15 @@ double: 2 Function: "cospi": double: 2 -float: 2 Function: "cospi_downward": double: 1 -float: 2 Function: "cospi_towardzero": double: 1 -float: 1 Function: "cospi_upward": double: 1 -float: 2 Function: Real part of "cpow": double: 2 @@ -1143,19 +1123,15 @@ double: 3 Function: "sinpi": double: 2 -float: 2 Function: "sinpi_downward": double: 2 -float: 2 Function: "sinpi_towardzero": double: 2 -float: 1 Function: "sinpi_upward": double: 2 -float: 3 Function: "tan": double: 1 @@ -1184,19 +1160,15 @@ double: 3 Function: "tanpi": double: 3 -float: 3 Function: "tanpi_downward": double: 2 -float: 3 Function: "tanpi_towardzero": double: 2 -float: 3 Function: "tanpi_upward": double: 2 -float: 4 Function: "tgamma": double: 9 diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps index b898344..8107d2f 100644 --- a/sysdeps/i386/fpu/libm-test-ulps +++ b/sysdeps/i386/fpu/libm-test-ulps @@ -41,25 +41,21 @@ ldouble: 3 Function: "acospi": double: 1 -float: 1 float128: 2 ldouble: 1 Function: "acospi_downward": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_upward": double: 2 -float: 1 float128: 2 ldouble: 2 @@ -105,25 +101,21 @@ ldouble: 5 Function: "asinpi": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_downward": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "asinpi_upward": double: 2 -float: 2 float128: 2 ldouble: 2 @@ -154,25 +146,21 @@ ldouble: 1 Function: "atan2pi": double: 1 -float: 1 float128: 3 ldouble: 1 Function: "atan2pi_downward": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "atan2pi_upward": double: 2 -float: 2 float128: 2 ldouble: 2 @@ -213,25 +201,21 @@ ldouble: 5 Function: "atanpi": double: 1 -float: 1 float128: 2 ldouble: 1 Function: "atanpi_downward": double: 2 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_upward": double: 2 -float: 1 float128: 2 ldouble: 1 @@ -870,25 +854,21 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 float128: 2 ldouble: 2 @@ -1711,25 +1691,21 @@ ldouble: 5 Function: "sinpi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 float128: 2 ldouble: 2 @@ -1774,25 +1750,21 @@ ldouble: 4 Function: "tanpi": double: 3 -float: 3 float128: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 float128: 4 ldouble: 4 diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps index 750d519..b99c502 100644 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps @@ -41,25 +41,21 @@ ldouble: 3 Function: "acospi": double: 1 -float: 1 float128: 2 ldouble: 3 Function: "acospi_downward": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_upward": double: 2 -float: 1 float128: 2 ldouble: 2 @@ -105,25 +101,21 @@ ldouble: 5 Function: "asinpi": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_downward": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "asinpi_upward": double: 2 -float: 2 float128: 2 ldouble: 2 @@ -154,25 +146,21 @@ ldouble: 1 Function: "atan2pi": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "atan2pi_downward": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "atan2pi_upward": double: 2 -float: 2 float128: 2 ldouble: 2 @@ -213,25 +201,21 @@ ldouble: 5 Function: "atanpi": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "atanpi_downward": double: 2 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_upward": double: 2 -float: 1 float128: 2 ldouble: 1 @@ -870,25 +854,21 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 float128: 2 ldouble: 2 @@ -1716,25 +1696,21 @@ ldouble: 5 Function: "sinpi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "sinpi_towardzero": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 float128: 2 ldouble: 2 @@ -1779,25 +1755,21 @@ ldouble: 4 Function: "tanpi": double: 3 -float: 3 float128: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 float128: 4 ldouble: 4 diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h index 299a2ff..3382e38 100644 --- a/sysdeps/ieee754/dbl-64/math_config.h +++ b/sysdeps/ieee754/dbl-64/math_config.h @@ -195,16 +195,18 @@ check_uflow (double x) extern const struct exp_data { double invln2N; - double shift; double negln2hiN; double negln2loN; double poly[4]; /* Last four coefficients. */ + double shift; + double exp2_shift; double exp2_poly[EXP2_POLY_ORDER]; - double invlog10_2N; + double neglog10_2hiN; double neglog10_2loN; double exp10_poly[5]; + double invlog10_2N; uint64_t tab[2*(1 << EXP_TABLE_BITS)]; } __exp_data attribute_hidden; diff --git a/sysdeps/ieee754/flt-32/e_acoshf.c b/sysdeps/ieee754/flt-32/e_acoshf.c index a0a0abc..b2a9c6b 100644 --- a/sysdeps/ieee754/flt-32/e_acoshf.c +++ b/sysdeps/ieee754/flt-32/e_acoshf.c @@ -28,6 +28,7 @@ SOFTWARE. #include <stdint.h> #include <libm-alias-finite.h> #include "math_config.h" +#include "s_asincoshf_data.h" static __attribute__ ((noinline)) float as_special (float x) @@ -45,90 +46,6 @@ as_special (float x) float __ieee754_acoshf (float x) { - static const double ix[] = - { - 0x1p+0, 0x1.fc07f01fcp-1, 0x1.f81f81f82p-1, 0x1.f44659e4ap-1, - 0x1.f07c1f07cp-1, 0x1.ecc07b302p-1, 0x1.e9131abfp-1, 0x1.e573ac902p-1, - 0x1.e1e1e1e1ep-1, 0x1.de5d6e3f8p-1, 0x1.dae6076bap-1, 0x1.d77b654b8p-1, - 0x1.d41d41d42p-1, 0x1.d0cb58f6ep-1, 0x1.cd8568904p-1, 0x1.ca4b3055ep-1, - 0x1.c71c71c72p-1, 0x1.c3f8f01c4p-1, 0x1.c0e070382p-1, 0x1.bdd2b8994p-1, - 0x1.bacf914c2p-1, 0x1.b7d6c3ddap-1, 0x1.b4e81b4e8p-1, 0x1.b2036406cp-1, - 0x1.af286bca2p-1, 0x1.ac5701ac6p-1, 0x1.a98ef606ap-1, 0x1.a6d01a6dp-1, - 0x1.a41a41a42p-1, 0x1.a16d3f97ap-1, 0x1.9ec8e951p-1, 0x1.9c2d14ee4p-1, - 0x1.99999999ap-1, 0x1.970e4f80cp-1, 0x1.948b0fcd6p-1, 0x1.920fb49dp-1, - 0x1.8f9c18f9cp-1, 0x1.8d3018d3p-1, 0x1.8acb90f6cp-1, 0x1.886e5f0acp-1, - 0x1.861861862p-1, 0x1.83c977ab2p-1, 0x1.818181818p-1, 0x1.7f405fd02p-1, - 0x1.7d05f417ep-1, 0x1.7ad2208ep-1, 0x1.78a4c8178p-1, 0x1.767dce434p-1, - 0x1.745d1745ep-1, 0x1.724287f46p-1, 0x1.702e05c0cp-1, 0x1.6e1f76b44p-1, - 0x1.6c16c16c2p-1, 0x1.6a13cd154p-1, 0x1.681681682p-1, 0x1.661ec6a52p-1, - 0x1.642c8590cp-1, 0x1.623fa7702p-1, 0x1.605816058p-1, 0x1.5e75bb8dp-1, - 0x1.5c9882b94p-1, 0x1.5ac056b02p-1, 0x1.58ed23082p-1, 0x1.571ed3c5p-1, - 0x1.555555556p-1, 0x1.5390948f4p-1, 0x1.51d07eae2p-1, 0x1.501501502p-1, - 0x1.4e5e0a73p-1, 0x1.4cab88726p-1, 0x1.4afd6a052p-1, 0x1.49539e3b2p-1, - 0x1.47ae147aep-1, 0x1.460cbc7f6p-1, 0x1.446f86562p-1, 0x1.42d6625d6p-1, - 0x1.414141414p-1, 0x1.3fb013fbp-1, 0x1.3e22cbce4p-1, 0x1.3c995a47cp-1, - 0x1.3b13b13b2p-1, 0x1.3991c2c18p-1, 0x1.381381382p-1, 0x1.3698df3dep-1, - 0x1.3521cfb2cp-1, 0x1.33ae45b58p-1, 0x1.323e34a2cp-1, 0x1.30d19013p-1, - 0x1.2f684bda2p-1, 0x1.2e025c04cp-1, 0x1.2c9fb4d82p-1, 0x1.2b404ad02p-1, - 0x1.29e4129e4p-1, 0x1.288b01288p-1, 0x1.27350b882p-1, 0x1.25e22708p-1, - 0x1.24924924ap-1, 0x1.23456789ap-1, 0x1.21fb78122p-1, 0x1.20b470c68p-1, - 0x1.1f7047dc2p-1, 0x1.1e2ef3b4p-1, 0x1.1cf06ada2p-1, 0x1.1bb4a4046p-1, - 0x1.1a7b9611ap-1, 0x1.19453808cp-1, 0x1.181181182p-1, 0x1.16e068942p-1, - 0x1.15b1e5f76p-1, 0x1.1485f0e0ap-1, 0x1.135c81136p-1, 0x1.12358e75ep-1, - 0x1.111111112p-1, 0x1.0fef010fep-1, 0x1.0ecf56be6p-1, 0x1.0db20a89p-1, - 0x1.0c9714fbcp-1, 0x1.0b7e6ec26p-1, 0x1.0a6810a68p-1, 0x1.0953f3902p-1, - 0x1.084210842p-1, 0x1.073260a48p-1, 0x1.0624dd2f2p-1, 0x1.05197f7d8p-1, - 0x1.041041042p-1, 0x1.03091b52p-1, 0x1.020408102p-1, 0x1.01010101p-1, - 0x1p-1 - }; - - static const double lix[] = - { - 0x0p+0, 0x1.fe02a6b146789p-8, 0x1.fc0a8b0fa03e4p-7, - 0x1.7b91b07de311bp-6, 0x1.f829b0e7c33p-6, 0x1.39e87b9fd7d6p-5, - 0x1.77458f63edcfcp-5, 0x1.b42dd7117b1bfp-5, 0x1.f0a30c01362a6p-5, - 0x1.16536eea7fae1p-4, 0x1.341d7961791d1p-4, 0x1.51b073f07983fp-4, - 0x1.6f0d28ae3eb4cp-4, 0x1.8c345d6383b21p-4, 0x1.a926d3a475563p-4, - 0x1.c5e548f63a743p-4, 0x1.e27076e28f2e6p-4, 0x1.fec9131dbaabbp-4, - 0x1.0d77e7ccf6e59p-3, 0x1.1b72ad52f87ap-3, 0x1.29552f81eb523p-3, - 0x1.371fc201f7f74p-3, 0x1.44d2b6ccbfd1ep-3, 0x1.526e5e3a41438p-3, - 0x1.5ff3070a613d4p-3, 0x1.6d60fe717221dp-3, 0x1.7ab890212b909p-3, - 0x1.87fa065214911p-3, 0x1.9525a9cf296b4p-3, 0x1.a23bc1fe42563p-3, - 0x1.af3c94e81bff3p-3, 0x1.bc2867430acd6p-3, 0x1.c8ff7c7989a22p-3, - 0x1.d5c216b535b91p-3, 0x1.e27076e2f92e6p-3, 0x1.ef0adcbe0d936p-3, - 0x1.fb9186d5ebe2bp-3, 0x1.0402594b51041p-2, 0x1.0a324e27370e3p-2, - 0x1.1058bf9ad7ad5p-2, 0x1.1675cabaa660ep-2, 0x1.1c898c16b91fbp-2, - 0x1.22941fbcfb966p-2, 0x1.2895a13dd2ea3p-2, 0x1.2e8e2bade7d31p-2, - 0x1.347dd9a9afd55p-2, 0x1.3a64c556b05eap-2, 0x1.40430868877e4p-2, - 0x1.4618bc219dec2p-2, 0x1.4be5f9579e0a1p-2, 0x1.51aad872c982dp-2, - 0x1.5767717432a6cp-2, 0x1.5d1bdbf5669cap-2, 0x1.62c82f2b83795p-2, - 0x1.686c81e9964afp-2, 0x1.6e08eaa2929e4p-2, 0x1.739d7f6b95007p-2, - 0x1.792a55fdb7fa2p-2, 0x1.7eaf83b82efc3p-2, 0x1.842d1da1ecb17p-2, - 0x1.89a3386be825bp-2, 0x1.8f11e87347ac7p-2, 0x1.947941c1f26fbp-2, - 0x1.99d958119208bp-2, 0x1.9f323ecbd984cp-2, 0x1.a484090e5eb0ap-2, - 0x1.a9cec9a9cf84ap-2, 0x1.af1293245606bp-2, 0x1.b44f77bc98f63p-2, - 0x1.b9858969218fbp-2, 0x1.beb4d9da96b7cp-2, 0x1.c3dd7a7d0354dp-2, - 0x1.c8ff7c79ada22p-2, 0x1.ce1af0b855bebp-2, 0x1.d32fe7e039bd5p-2, - 0x1.d83e72587673ep-2, 0x1.dd46a04c204a1p-2, 0x1.e24881a7cac26p-2, - 0x1.e744261d8a788p-2, 0x1.ec399d2457ccp-2, 0x1.f128f5fac86edp-2, - 0x1.f6123fa71c8acp-2, 0x1.faf588f76631fp-2, 0x1.ffd2e08580c98p-2, - 0x1.02552a5a4f0ffp-1, 0x1.04bdf9da8b6d2p-1, 0x1.0723e5c1b4f4p-1, - 0x1.0986f4f589521p-1, 0x1.0be72e423ca83p-1, 0x1.0e44985d0f48cp-1, - 0x1.109f39e2be497p-1, 0x1.12f71959283bcp-1, 0x1.154c3d2f4f5eap-1, - 0x1.179eabbd9c9a1p-1, 0x1.19ee6b466516fp-1, 0x1.1c3b81f723c25p-1, - 0x1.1e85f5e6ec0dp-1, 0x1.20cdcd193f76ep-1, 0x1.23130d7beb743p-1, - 0x1.2555bce9887cbp-1, 0x1.2795e1288211bp-1, 0x1.29d37fec2308bp-1, - 0x1.2c0e9ed45768cp-1, 0x1.2e47436e5ae68p-1, 0x1.307d7334ff0bep-1, - 0x1.32b1339134571p-1, 0x1.34e289d9b39d3p-1, 0x1.37117b5481bb6p-1, - 0x1.393e0d3549a1ap-1, 0x1.3b6844a017823p-1, 0x1.3d9026a70eefbp-1, - 0x1.3fb5b84cfeb42p-1, 0x1.41d8fe844b2aep-1, 0x1.43f9fe2fb9267p-1, - 0x1.4618bc21d86c2p-1, 0x1.48353d1e928dfp-1, 0x1.4a4f85db1debbp-1, - 0x1.4c679afcc323ap-1, 0x1.4e7d811b77bb1p-1, 0x1.50913cbff8c6bp-1, - 0x1.52a2d265be5abp-1, 0x1.54b2467998498p-1, 0x1.56bf9d5b34b99p-1, - 0x1.58cadb5cbe989p-1, 0x1.5ad404c33af2dp-1, 0x1.5cdb1dc6ad765p-1, - 0x1.5ee02a9241e75p-1, 0x1.60e32f447a8d9p-1, 0x1.62e42fefa39efp-1 - }; - uint32_t t = asuint (x); if (__glibc_unlikely (t <= 0x3f800000u)) return as_special (x); @@ -160,36 +77,26 @@ __ieee754_acoshf (float x) int j = (m + (UINT64_C(1) << (52 - 8))) >> (52 - 7); int e = (tp >> 52) - 0x3ff; double w = asdouble (m | UINT64_C(0x3ff) << 52); - double z = w * ix[j] - 1.0; - static const double c[] = - { - 0x1.0000000066947p+0, -0x1.00007f053d8cbp-1, 0x1.555280111d914p-2 - }; + double z = w * IX[j] - 1.0; double z2 = z * z; - double r = ((lix[128] * e + lix[j]) + z * c[0]) - + z2 * (c[1] + z * c[2]); + double r = ((LIX[128] * e + LIX[j]) + z * C[0]) + + z2 * (C[1] + z * C[2]); if (__glibc_unlikely (((asuint64 (r) + 259000) & INT64_C(0xfffffff)) < 260000)) /* accurate path */ { - static const double cp[] = - { - 0x1p+0, -0x1p-1, - 0x1.55555555030bcp-2, -0x1.ffffffff2b4e5p-3, - 0x1.999b5076a42f2p-3, -0x1.55570c45a647dp-3 - }; z2 = z * z; - double c0 = cp[0] + z * cp[1]; - double c2 = cp[2] + z * cp[3]; - double c4 = cp[4] + z * cp[5]; + double c0 = CP[0] + z * CP[1]; + double c2 = CP[2] + z * CP[3]; + double c4 = CP[4] + z * CP[5]; c0 += z2 * (c2 + z2 * c4); const double ln2l = 0x1.7f7d1cf79abcap-20; const double ln2h = 0x1.62e4p-1; double Lh = ln2h * e; double Ll = ln2l * e; - r = fma (z, c0, Ll + lix[j]) + Lh; + r = fma (z, c0, Ll + LIX[j]) + Lh; if (__glibc_unlikely ((asuint64 (r) & UINT64_C(0xfffffff)) == 0)) { - double h = fma (z, c0, Ll + lix[j]) + (Lh - r); + double h = fma (z, c0, Ll + LIX[j]) + (Lh - r); r = r + 64.0 * h; } } diff --git a/sysdeps/ieee754/flt-32/e_coshf.c b/sysdeps/ieee754/flt-32/e_coshf.c index 602d2b4..5f6ff8c 100644 --- a/sysdeps/ieee754/flt-32/e_coshf.c +++ b/sysdeps/ieee754/flt-32/e_coshf.c @@ -28,34 +28,11 @@ SOFTWARE. #include <math.h> #include <libm-alias-finite.h> #include "math_config.h" +#include "e_sincoshf_data.h" float __ieee754_coshf (float x) { - static const double c[] = - { - 1, 0x1.62e42fef4c4e7p-6, 0x1.ebfd1b232f475p-13, 0x1.c6b19384ecd93p-20 - }; - static const double ch[] = - { - 1, 0x1.62e42fefa39efp-6, 0x1.ebfbdff82c58fp-13, - 0x1.c6b08d702e0edp-20, 0x1.3b2ab6fb92e5ep-27, 0x1.5d886e6d54203p-35, - 0x1.430976b8ce6efp-43 - }; - static const uint64_t tb[] = - { - 0x3fe0000000000000, 0x3fe059b0d3158574, 0x3fe0b5586cf9890f, - 0x3fe11301d0125b51, 0x3fe172b83c7d517b, 0x3fe1d4873168b9aa, - 0x3fe2387a6e756238, 0x3fe29e9df51fdee1, 0x3fe306fe0a31b715, - 0x3fe371a7373aa9cb, 0x3fe3dea64c123422, 0x3fe44e086061892d, - 0x3fe4bfdad5362a27, 0x3fe5342b569d4f82, 0x3fe5ab07dd485429, - 0x3fe6247eb03a5585, 0x3fe6a09e667f3bcd, 0x3fe71f75e8ec5f74, - 0x3fe7a11473eb0187, 0x3fe82589994cce13, 0x3fe8ace5422aa0db, - 0x3fe93737b0cdc5e5, 0x3fe9c49182a3f090, 0x3fea5503b23e255d, - 0x3feae89f995ad3ad, 0x3feb7f76f2fb5e47, 0x3fec199bdd85529c, - 0x3fecb720dcef9069, 0x3fed5818dcfba487, 0x3fedfc97337b9b5f, - 0x3feea4afa2a490da, 0x3fef50765b6e4540 - }; const double iln2 = 0x1.71547652b82fep+5; double z = x; uint32_t ax = asuint (x) << 1; @@ -92,10 +69,10 @@ __ieee754_coshf (float x) double h2 = h * h; int64_t jp = asuint64 (ia + 0x1.8p52); int64_t jm = -jp; - double sp = asdouble (tb[jp & 31] + ((jp >> 5) << 52)); - double sm = asdouble (tb[jm & 31] + ((jm >> 5) << 52)); - double te = c[0] + h2 * c[2]; - double to = (c[1] + h2 * c[3]); + double sp = asdouble (TB[jp & 31] + ((jp >> 5) << 52)); + double sm = asdouble (TB[jm & 31] + ((jm >> 5) << 52)); + double te = C[0] + h2 * C[2]; + double to = (C[1] + h2 * C[3]); double rp = sp * (te + h * to); double rm = sm * (te - h * to); double r = rp + rm; @@ -107,8 +84,9 @@ __ieee754_coshf (float x) const double iln2l = 0x1.5c17f0bbbe88p-26; h = (iln2h * z - ia) + iln2l * z; h2 = h * h; - te = ch[0] + h2 * ch[2] + (h2 * h2) * (ch[4] + h2 * ch[6]); - to = ch[1] + h2 * (ch[3] + h2 * ch[5]); + te = CH[0] + h2 * CH[2] + (h2 * h2) * (CH[4] + h2 * CH[6]); + to = CH[1] + h2 * (CH[3] + h2 + * CH[5]); r = sp * (te + h * to) + sm * (te - h * to); ub = r; } diff --git a/sysdeps/ieee754/flt-32/e_sincoshf_data.c b/sysdeps/ieee754/flt-32/e_sincoshf_data.c new file mode 100644 index 0000000..c1a34d1 --- /dev/null +++ b/sysdeps/ieee754/flt-32/e_sincoshf_data.c @@ -0,0 +1,49 @@ +/* Common data for sinhf/coshf implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "e_sincoshf_data.h" + +const double __sincoshf_c[] = + { + 1, 0x1.62e42fef4c4e7p-6, 0x1.ebfd1b232f475p-13, 0x1.c6b19384ecd93p-20 + }; +const double __sincoshf_ch[] = + { + 1, 0x1.62e42fefa39efp-6, 0x1.ebfbdff82c58fp-13, + 0x1.c6b08d702e0edp-20, 0x1.3b2ab6fb92e5ep-27, 0x1.5d886e6d54203p-35, + 0x1.430976b8ce6efp-43 + }; +const uint64_t __sincoshf_tb[] = + { + 0x3fe0000000000000, 0x3fe059b0d3158574, 0x3fe0b5586cf9890f, + 0x3fe11301d0125b51, 0x3fe172b83c7d517b, 0x3fe1d4873168b9aa, + 0x3fe2387a6e756238, 0x3fe29e9df51fdee1, 0x3fe306fe0a31b715, + 0x3fe371a7373aa9cb, 0x3fe3dea64c123422, 0x3fe44e086061892d, + 0x3fe4bfdad5362a27, 0x3fe5342b569d4f82, 0x3fe5ab07dd485429, + 0x3fe6247eb03a5585, 0x3fe6a09e667f3bcd, 0x3fe71f75e8ec5f74, + 0x3fe7a11473eb0187, 0x3fe82589994cce13, 0x3fe8ace5422aa0db, + 0x3fe93737b0cdc5e5, 0x3fe9c49182a3f090, 0x3fea5503b23e255d, + 0x3feae89f995ad3ad, 0x3feb7f76f2fb5e47, 0x3fec199bdd85529c, + 0x3fecb720dcef9069, 0x3fed5818dcfba487, 0x3fedfc97337b9b5f, + 0x3feea4afa2a490da, 0x3fef50765b6e4540 + }; diff --git a/sysdeps/ieee754/flt-32/e_sincoshf_data.h b/sysdeps/ieee754/flt-32/e_sincoshf_data.h new file mode 100644 index 0000000..ea4b380 --- /dev/null +++ b/sysdeps/ieee754/flt-32/e_sincoshf_data.h @@ -0,0 +1,36 @@ +/* Common data for sinhf/coshf implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef _SINCOSHF_H +#define _SINCOSHF_H + +#include <stdint.h> + +extern const double __sincoshf_c[] attribute_hidden; +#define C __sincoshf_c +extern const double __sincoshf_ch[] attribute_hidden; +#define CH __sincoshf_ch +extern const uint64_t __sincoshf_tb[] attribute_hidden; +#define TB __sincoshf_tb + +#endif diff --git a/sysdeps/ieee754/flt-32/e_sinhf.c b/sysdeps/ieee754/flt-32/e_sinhf.c index dee96fc..754b84a 100644 --- a/sysdeps/ieee754/flt-32/e_sinhf.c +++ b/sysdeps/ieee754/flt-32/e_sinhf.c @@ -29,34 +29,11 @@ SOFTWARE. #include <libm-alias-finite.h> #include <math-narrow-eval.h> #include "math_config.h" +#include "e_sincoshf_data.h" float __ieee754_sinhf (float x) { - static const double c[] = - { - 1, 0x1.62e42fef4c4e7p-6, 0x1.ebfd1b232f475p-13, 0x1.c6b19384ecd93p-20 - }; - static const double ch[] = - { - 1, 0x1.62e42fefa39efp-6, 0x1.ebfbdff82c58fp-13, - 0x1.c6b08d702e0edp-20, 0x1.3b2ab6fb92e5ep-27, 0x1.5d886e6d54203p-35, - 0x1.430976b8ce6efp-43 - }; - static const uint64_t tb[] = - { - 0x3fe0000000000000, 0x3fe059b0d3158574, 0x3fe0b5586cf9890f, - 0x3fe11301d0125b51, 0x3fe172b83c7d517b, 0x3fe1d4873168b9aa, - 0x3fe2387a6e756238, 0x3fe29e9df51fdee1, 0x3fe306fe0a31b715, - 0x3fe371a7373aa9cb, 0x3fe3dea64c123422, 0x3fe44e086061892d, - 0x3fe4bfdad5362a27, 0x3fe5342b569d4f82, 0x3fe5ab07dd485429, - 0x3fe6247eb03a5585, 0x3fe6a09e667f3bcd, 0x3fe71f75e8ec5f74, - 0x3fe7a11473eb0187, 0x3fe82589994cce13, 0x3fe8ace5422aa0db, - 0x3fe93737b0cdc5e5, 0x3fe9c49182a3f090, 0x3fea5503b23e255d, - 0x3feae89f995ad3ad, 0x3feb7f76f2fb5e47, 0x3fec199bdd85529c, - 0x3fecb720dcef9069, 0x3fed5818dcfba487, 0x3fedfc97337b9b5f, - 0x3feea4afa2a490da, 0x3fef50765b6e4540 - }; static const struct { uint32_t uarg; @@ -106,10 +83,10 @@ __ieee754_sinhf (float x) double h2 = h * h; int64_t jp = asuint64 (ia + 0x1.8p52); int64_t jm = -jp; - double sp = asdouble (tb[jp & 31] + ((jp >> 5) << 52)); - double sm = asdouble (tb[jm & 31] + ((jm >> 5) << 52)); - double te = c[0] + h2 * c[2]; - double to = (c[1] + h2 * c[3]); + double sp = asdouble (TB[jp & 31] + ((jp >> 5) << 52)); + double sm = asdouble (TB[jm & 31] + ((jm >> 5) << 52)); + double te = C[0] + h2 * C[2]; + double to = (C[1] + h2 * C[3]); double rp = sp * (te + h * to); double rm = sm * (te - h * to); double r = rp - rm; @@ -121,8 +98,8 @@ __ieee754_sinhf (float x) const double iln2l = 0x1.5c17f0bbbe88p-26; h = (iln2h * z - ia) + iln2l * z; h2 = h * h; - te = ch[0] + h2 * ch[2] + (h2 * h2) * (ch[4] + h2 * ch[6]); - to = ch[1] + h2 * (ch[3] + h2 * ch[5]); + te = CH[0] + h2 * CH[2] + (h2 * h2) * (CH[4] + h2 * CH[6]); + to = CH[1] + h2 * (CH[3] + h2 * CH[5]); r = sp * (te + h * to) - sm * (te - h * to); ub = r; } diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index 0354611..8d9c8ee 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -84,6 +84,31 @@ roundeven_finite (double x) #endif } +#ifndef ROUNDEVENF_INTRINSICS +/* When set, roundevenf_finite will route to the internal roundevenf function. */ +# define ROUNDEVENF_INTRINSICS 1 +#endif + +static inline float +roundevenf_finite (float x) +{ + if (!isfinite (x)) + __builtin_unreachable (); +#if ROUNDEVENF_INTRINSICS + return roundevenf (x); +#else + float y = roundf (x); + if (fabs (x - y) == 0.5) + { + union { float f; uint32_t i; } u = {y}; + union { float f; uint32_t i; } v = {y - copysignf (1.0, x)}; + if (__builtin_ctzl (v.i) > __builtin_ctzl (u.i)) + y = v.f; + } + return y; +#endif +} + static inline uint32_t asuint (float f) { diff --git a/sysdeps/ieee754/flt-32/s_acospif.c b/sysdeps/ieee754/flt-32/s_acospif.c new file mode 100644 index 0000000..03d63a7 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_acospif.c @@ -0,0 +1,137 @@ +/* Correctly-rounded half-revolution arc-cosine function for binary32 value. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/acospi/acospif.c, revision 1a6a9ab). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <math.h> +#include <stdint.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__acospif (float x) +{ + float ax = fabsf (x); + double az = ax; + double z = x; + uint32_t t = asuint (x); + int e = (t >> 23) & 0xff; + if (__glibc_unlikely (e >= 127)) + { + if (x == 1.0f) + return 0.0f; + if (x == -1.0f) + return 1.0f; + if (e == 0xff && (t << 9)) + return x + x; /* nan */ + return __math_edomf ((x - x) / (x - x)); /* nan */ + } + int s = 146 - e; + int i = 0; + if (__glibc_likely (s < 32)) + i = ((t & (~0u >> 9)) | 1 << 23) >> s; + static const double ch[][8] = { + { 0x1.45f306dc9c882p-2, 0x1.b2995e7b7dc2fp-5, 0x1.8723a1cf50c7ep-6, + 0x1.d1a4591d16a29p-7, 0x1.3ce3aa68ddaeep-7, 0x1.d3182ab0cc1bfp-8, + 0x1.62b379a8b88e3p-8, 0x1.6811411fcfec2p-8 }, + { 0x1.ffffffffd3cd9p-2, -0x1.17cc1b3355fd5p-4, 0x1.d067a1e8d5a99p-6, + -0x1.08e16fb09314ap-6, 0x1.5eed43d42dcb2p-7, -0x1.f58baca7acc71p-8, + 0x1.5dab64e2dcf15p-8, -0x1.59270e30797acp-9 }, + { 0x1.fffffff7c4622p-2, -0x1.17cc149ded3a2p-4, 0x1.d0654d4cb2c1ap-6, + -0x1.08c3ba713d33ap-6, 0x1.5d2053481079cp-7, -0x1.e485ebc545e7ep-8, + 0x1.303baca167dddp-8, -0x1.dee8d16d06b38p-10 }, + { 0x1.ffffffa749848p-2, -0x1.17cbe7155935p-4, 0x1.d05a312269adfp-6, + -0x1.0862b3ee617d7p-6, 0x1.5920708db2a73p-7, -0x1.cb0463b3862c3p-8, + 0x1.02b82478f95d7p-8, -0x1.52a7b8579e729p-10 }, + { 0x1.fffffe1f92bb5p-2, -0x1.17cb3e74c64e3p-4, 0x1.d03af67311cbfp-6, + -0x1.079441cbfc7ap-6, 0x1.52b4287805a61p-7, -0x1.ac3286d604a98p-8, + 0x1.b2f1210d9701bp-9, -0x1.e740ddc25afd6p-11 }, + { 0x1.fffff92beb6e2p-2, -0x1.17c986fe9518bp-4, 0x1.cff98167c9a5ep-6, + -0x1.0638b591eae52p-6, 0x1.4a0803828959ep-7, -0x1.8adeca229f11dp-8, + 0x1.6b9a7ba05dfcep-9, -0x1.640521a43b2dp-11 }, + { 0x1.ffffeccee5bfcp-2, -0x1.17c5f1753f5eap-4, 0x1.cf874e4fe258fp-6, + -0x1.043e6cf77b256p-6, 0x1.3f7db42227d92p-7, -0x1.691a6fa2a2882p-8, + 0x1.2f6543162bc61p-9, -0x1.07d5da05822b6p-11 }, + { 0x1.ffffd2f64431dp-2, -0x1.17bf8208c10c1p-4, 0x1.ced7487cdb124p-6, + -0x1.01a0d30932905p-6, 0x1.3388f99b254dap-7, -0x1.4844e245c65bdp-8, + 0x1.fa777150197c6p-10, -0x1.8c1ecf16a05c8p-12 }, + { 0x1.ffffa36d1712ep-2, -0x1.17b523971bd4ep-4, 0x1.cddee26de2deep-6, + -0x1.fccb00abaaabcp-7, 0x1.269afc3622342p-7, -0x1.2933152686752p-8, + 0x1.a76d4956cc9a3p-10, -0x1.2ce7d6dc651cep-12 }, + { 0x1.ffff5402ab3a1p-2, -0x1.17a5ba85da77ap-4, 0x1.cc96894e05c02p-6, + -0x1.f532143cb832ep-7, 0x1.19180b660ff09p-7, -0x1.0c57417a78b3cp-8, + 0x1.62e26cbd7bb1ep-10, -0x1.ce28d33fe1df3p-13 }, + { 0x1.fffed8d639751p-2, -0x1.1790349f3ae76p-4, 0x1.caf9a4fd1b398p-6, + -0x1.ec986b111342ep-7, 0x1.0b53c3ad4baa4p-7, -0x1.e3c2282eeace4p-9, + 0x1.2a55369f55bbep-10, -0x1.667fe48c396e8p-13 }, + { 0x1.fffe24b714161p-2, -0x1.177394fbcb719p-4, 0x1.c90652d920ebdp-6, + -0x1.e3239197bddf1p-7, 0x1.fb2188525b025p-8, -0x1.b3aadd451afc7p-9, + 0x1.f74020f31fdabp-11, -0x1.18b0cb246768dp-13 }, + { 0x1.fffd298bec9e2p-2, -0x1.174efbfd34648p-4, 0x1.c6bcfe48ea92bp-6, + -0x1.d8f9f2a16157cp-7, 0x1.e0044f56c8864p-8, -0x1.883e2347fe76cp-9, + 0x1.a9f0e3c1b7af5p-11, -0x1.bb5acc0e60825p-14 }, + { 0x1.fffbd8b784c4dp-2, -0x1.1721abdd3722ep-4, 0x1.c41fee756d4bp-6, + -0x1.ce40bccf8065fp-7, 0x1.c59b684b70ef9p-8, -0x1.6133d027996b3p-9, + 0x1.69cad01106397p-11, -0x1.60f8e45494156p-14 }, + { 0x1.fffa23749cf88p-2, -0x1.16eb0a8285c06p-4, 0x1.c132d762e1b0dp-6, + -0x1.c31a959398f4ep-7, 0x1.ac1c5b46bc8ap-8, -0x1.3e34f1abe51dcp-9, + 0x1.346738737c0b9p-11, -0x1.1b227a3f5c75p-14 }, + { 0x1.fff7fb25bb407p-2, -0x1.16aaa14d7564p-4, 0x1.bdfa75fca5ff2p-6, + -0x1.b7a6e260d079cp-7, 0x1.93ab06911033cp-8, -0x1.1ee5560967fd5p-9, + 0x1.07d31060838bfp-11, -0x1.c96f33a283115p-15 }, + }; + const double *c = ch[i]; + double z2 = z * z; + double z4 = z2 * z2; + if (__glibc_unlikely (i == 0)) + { + double c0 = c[0] + z2 * c[1]; + double c2 = c[2] + z2 * c[3]; + double c4 = c[4] + z2 * c[5]; + double c6 = c[6] + z2 * c[7]; + c0 += c2 * z4; + c4 += c6 * z4; + /* For |x| <= 0x1.0fd288p-127, c0 += c4*(z4*z4) would raise a spurious + underflow exception, we use an FMA instead, where c4 * z4 does not + underflow. */ + c0 = fma (c4 * z4, z4, c0); + return 0.5 - z * c0; + } + else + { + double f = sqrt (1 - az); + double c0 = c[0] + az * c[1]; + double c2 = c[2] + az * c[3]; + double c4 = c[4] + az * c[5]; + double c6 = c[6] + az * c[7]; + c0 += c2 * z2; + c4 += c6 * z2; + c0 += c4 * z4; + static const double o[] = { 0, 1 }; + double r = o[t >> 31] + c0 * copysign (f, x); + return r; + } +} +libm_alias_float (__acospi, acospi) diff --git a/sysdeps/ieee754/flt-32/s_asincoshf_data.c b/sysdeps/ieee754/flt-32/s_asincoshf_data.c new file mode 100644 index 0000000..e852122 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_asincoshf_data.c @@ -0,0 +1,121 @@ +/* Common data for asinhf/acoshf implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "s_asincoshf_data.h" + +const double __asincoshf_ix[] = + { + 0x1p+0, 0x1.fc07f01fcp-1, 0x1.f81f81f82p-1, 0x1.f44659e4ap-1, + 0x1.f07c1f07cp-1, 0x1.ecc07b302p-1, 0x1.e9131abfp-1, 0x1.e573ac902p-1, + 0x1.e1e1e1e1ep-1, 0x1.de5d6e3f8p-1, 0x1.dae6076bap-1, 0x1.d77b654b8p-1, + 0x1.d41d41d42p-1, 0x1.d0cb58f6ep-1, 0x1.cd8568904p-1, 0x1.ca4b3055ep-1, + 0x1.c71c71c72p-1, 0x1.c3f8f01c4p-1, 0x1.c0e070382p-1, 0x1.bdd2b8994p-1, + 0x1.bacf914c2p-1, 0x1.b7d6c3ddap-1, 0x1.b4e81b4e8p-1, 0x1.b2036406cp-1, + 0x1.af286bca2p-1, 0x1.ac5701ac6p-1, 0x1.a98ef606ap-1, 0x1.a6d01a6dp-1, + 0x1.a41a41a42p-1, 0x1.a16d3f97ap-1, 0x1.9ec8e951p-1, 0x1.9c2d14ee4p-1, + 0x1.99999999ap-1, 0x1.970e4f80cp-1, 0x1.948b0fcd6p-1, 0x1.920fb49dp-1, + 0x1.8f9c18f9cp-1, 0x1.8d3018d3p-1, 0x1.8acb90f6cp-1, 0x1.886e5f0acp-1, + 0x1.861861862p-1, 0x1.83c977ab2p-1, 0x1.818181818p-1, 0x1.7f405fd02p-1, + 0x1.7d05f417ep-1, 0x1.7ad2208ep-1, 0x1.78a4c8178p-1, 0x1.767dce434p-1, + 0x1.745d1745ep-1, 0x1.724287f46p-1, 0x1.702e05c0cp-1, 0x1.6e1f76b44p-1, + 0x1.6c16c16c2p-1, 0x1.6a13cd154p-1, 0x1.681681682p-1, 0x1.661ec6a52p-1, + 0x1.642c8590cp-1, 0x1.623fa7702p-1, 0x1.605816058p-1, 0x1.5e75bb8dp-1, + 0x1.5c9882b94p-1, 0x1.5ac056b02p-1, 0x1.58ed23082p-1, 0x1.571ed3c5p-1, + 0x1.555555556p-1, 0x1.5390948f4p-1, 0x1.51d07eae2p-1, 0x1.501501502p-1, + 0x1.4e5e0a73p-1, 0x1.4cab88726p-1, 0x1.4afd6a052p-1, 0x1.49539e3b2p-1, + 0x1.47ae147aep-1, 0x1.460cbc7f6p-1, 0x1.446f86562p-1, 0x1.42d6625d6p-1, + 0x1.414141414p-1, 0x1.3fb013fbp-1, 0x1.3e22cbce4p-1, 0x1.3c995a47cp-1, + 0x1.3b13b13b2p-1, 0x1.3991c2c18p-1, 0x1.381381382p-1, 0x1.3698df3dep-1, + 0x1.3521cfb2cp-1, 0x1.33ae45b58p-1, 0x1.323e34a2cp-1, 0x1.30d19013p-1, + 0x1.2f684bda2p-1, 0x1.2e025c04cp-1, 0x1.2c9fb4d82p-1, 0x1.2b404ad02p-1, + 0x1.29e4129e4p-1, 0x1.288b01288p-1, 0x1.27350b882p-1, 0x1.25e22708p-1, + 0x1.24924924ap-1, 0x1.23456789ap-1, 0x1.21fb78122p-1, 0x1.20b470c68p-1, + 0x1.1f7047dc2p-1, 0x1.1e2ef3b4p-1, 0x1.1cf06ada2p-1, 0x1.1bb4a4046p-1, + 0x1.1a7b9611ap-1, 0x1.19453808cp-1, 0x1.181181182p-1, 0x1.16e068942p-1, + 0x1.15b1e5f76p-1, 0x1.1485f0e0ap-1, 0x1.135c81136p-1, 0x1.12358e75ep-1, + 0x1.111111112p-1, 0x1.0fef010fep-1, 0x1.0ecf56be6p-1, 0x1.0db20a89p-1, + 0x1.0c9714fbcp-1, 0x1.0b7e6ec26p-1, 0x1.0a6810a68p-1, 0x1.0953f3902p-1, + 0x1.084210842p-1, 0x1.073260a48p-1, 0x1.0624dd2f2p-1, 0x1.05197f7d8p-1, + 0x1.041041042p-1, 0x1.03091b52p-1, 0x1.020408102p-1, 0x1.01010101p-1, + 0x1p-1 + }; + +const double __asincoshf_lix[] = + { + 0x0p+0, 0x1.fe02a6b146789p-8, 0x1.fc0a8b0fa03e4p-7, + 0x1.7b91b07de311bp-6, 0x1.f829b0e7c33p-6, 0x1.39e87b9fd7d6p-5, + 0x1.77458f63edcfcp-5, 0x1.b42dd7117b1bfp-5, 0x1.f0a30c01362a6p-5, + 0x1.16536eea7fae1p-4, 0x1.341d7961791d1p-4, 0x1.51b073f07983fp-4, + 0x1.6f0d28ae3eb4cp-4, 0x1.8c345d6383b21p-4, 0x1.a926d3a475563p-4, + 0x1.c5e548f63a743p-4, 0x1.e27076e28f2e6p-4, 0x1.fec9131dbaabbp-4, + 0x1.0d77e7ccf6e59p-3, 0x1.1b72ad52f87ap-3, 0x1.29552f81eb523p-3, + 0x1.371fc201f7f74p-3, 0x1.44d2b6ccbfd1ep-3, 0x1.526e5e3a41438p-3, + 0x1.5ff3070a613d4p-3, 0x1.6d60fe717221dp-3, 0x1.7ab890212b909p-3, + 0x1.87fa065214911p-3, 0x1.9525a9cf296b4p-3, 0x1.a23bc1fe42563p-3, + 0x1.af3c94e81bff3p-3, 0x1.bc2867430acd6p-3, 0x1.c8ff7c7989a22p-3, + 0x1.d5c216b535b91p-3, 0x1.e27076e2f92e6p-3, 0x1.ef0adcbe0d936p-3, + 0x1.fb9186d5ebe2bp-3, 0x1.0402594b51041p-2, 0x1.0a324e27370e3p-2, + 0x1.1058bf9ad7ad5p-2, 0x1.1675cabaa660ep-2, 0x1.1c898c16b91fbp-2, + 0x1.22941fbcfb966p-2, 0x1.2895a13dd2ea3p-2, 0x1.2e8e2bade7d31p-2, + 0x1.347dd9a9afd55p-2, 0x1.3a64c556b05eap-2, 0x1.40430868877e4p-2, + 0x1.4618bc219dec2p-2, 0x1.4be5f9579e0a1p-2, 0x1.51aad872c982dp-2, + 0x1.5767717432a6cp-2, 0x1.5d1bdbf5669cap-2, 0x1.62c82f2b83795p-2, + 0x1.686c81e9964afp-2, 0x1.6e08eaa2929e4p-2, 0x1.739d7f6b95007p-2, + 0x1.792a55fdb7fa2p-2, 0x1.7eaf83b82efc3p-2, 0x1.842d1da1ecb17p-2, + 0x1.89a3386be825bp-2, 0x1.8f11e87347ac7p-2, 0x1.947941c1f26fbp-2, + 0x1.99d958119208bp-2, 0x1.9f323ecbd984cp-2, 0x1.a484090e5eb0ap-2, + 0x1.a9cec9a9cf84ap-2, 0x1.af1293245606bp-2, 0x1.b44f77bc98f63p-2, + 0x1.b9858969218fbp-2, 0x1.beb4d9da96b7cp-2, 0x1.c3dd7a7d0354dp-2, + 0x1.c8ff7c79ada22p-2, 0x1.ce1af0b855bebp-2, 0x1.d32fe7e039bd5p-2, + 0x1.d83e72587673ep-2, 0x1.dd46a04c204a1p-2, 0x1.e24881a7cac26p-2, + 0x1.e744261d8a788p-2, 0x1.ec399d2457ccp-2, 0x1.f128f5fac86edp-2, + 0x1.f6123fa71c8acp-2, 0x1.faf588f76631fp-2, 0x1.ffd2e08580c98p-2, + 0x1.02552a5a4f0ffp-1, 0x1.04bdf9da8b6d2p-1, 0x1.0723e5c1b4f4p-1, + 0x1.0986f4f589521p-1, 0x1.0be72e423ca83p-1, 0x1.0e44985d0f48cp-1, + 0x1.109f39e2be497p-1, 0x1.12f71959283bcp-1, 0x1.154c3d2f4f5eap-1, + 0x1.179eabbd9c9a1p-1, 0x1.19ee6b466516fp-1, 0x1.1c3b81f723c25p-1, + 0x1.1e85f5e6ec0dp-1, 0x1.20cdcd193f76ep-1, 0x1.23130d7beb743p-1, + 0x1.2555bce9887cbp-1, 0x1.2795e1288211bp-1, 0x1.29d37fec2308bp-1, + 0x1.2c0e9ed45768cp-1, 0x1.2e47436e5ae68p-1, 0x1.307d7334ff0bep-1, + 0x1.32b1339134571p-1, 0x1.34e289d9b39d3p-1, 0x1.37117b5481bb6p-1, + 0x1.393e0d3549a1ap-1, 0x1.3b6844a017823p-1, 0x1.3d9026a70eefbp-1, + 0x1.3fb5b84cfeb42p-1, 0x1.41d8fe844b2aep-1, 0x1.43f9fe2fb9267p-1, + 0x1.4618bc21d86c2p-1, 0x1.48353d1e928dfp-1, 0x1.4a4f85db1debbp-1, + 0x1.4c679afcc323ap-1, 0x1.4e7d811b77bb1p-1, 0x1.50913cbff8c6bp-1, + 0x1.52a2d265be5abp-1, 0x1.54b2467998498p-1, 0x1.56bf9d5b34b99p-1, + 0x1.58cadb5cbe989p-1, 0x1.5ad404c33af2dp-1, 0x1.5cdb1dc6ad765p-1, + 0x1.5ee02a9241e75p-1, 0x1.60e32f447a8d9p-1, 0x1.62e42fefa39efp-1 + }; + +const double __asincoshf_c[] = + { + 0x1.0000000066947p+0, -0x1.00007f053d8cbp-1, 0x1.555280111d914p-2 + }; + + +const double __asincoshf_cp[] = + { + 0x1p+0, -0x1p-1, + 0x1.55555555030bcp-2, -0x1.ffffffff2b4e5p-3, + 0x1.999b5076a42f2p-3, -0x1.55570c45a647dp-3 + }; diff --git a/sysdeps/ieee754/flt-32/s_asincoshf_data.h b/sysdeps/ieee754/flt-32/s_asincoshf_data.h new file mode 100644 index 0000000..2a7947a --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_asincoshf_data.h @@ -0,0 +1,36 @@ +/* Common data for asinpif/acospif implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef _ASINCOSHF_H +#define _ASINCOSHF_H + +extern const double __asincoshf_ix[] attribute_hidden; +#define IX __asincoshf_ix +extern const double __asincoshf_lix[] attribute_hidden; +#define LIX __asincoshf_lix +extern const double __asincoshf_c[] attribute_hidden; +#define C __asincoshf_c +extern const double __asincoshf_cp[] attribute_hidden; +#define CP __asincoshf_cp + +#endif diff --git a/sysdeps/ieee754/flt-32/s_asinhf.c b/sysdeps/ieee754/flt-32/s_asinhf.c index 93750f5..9353b42 100644 --- a/sysdeps/ieee754/flt-32/s_asinhf.c +++ b/sysdeps/ieee754/flt-32/s_asinhf.c @@ -28,94 +28,11 @@ SOFTWARE. #include <stdint.h> #include <libm-alias-float.h> #include "math_config.h" +#include "s_asincoshf_data.h" float __asinhf (float x) { - static const double ix[] = - { - 0x1p+0, 0x1.fc07f01fcp-1, 0x1.f81f81f82p-1, 0x1.f44659e4ap-1, - 0x1.f07c1f07cp-1, 0x1.ecc07b302p-1, 0x1.e9131abfp-1, 0x1.e573ac902p-1, - 0x1.e1e1e1e1ep-1, 0x1.de5d6e3f8p-1, 0x1.dae6076bap-1, 0x1.d77b654b8p-1, - 0x1.d41d41d42p-1, 0x1.d0cb58f6ep-1, 0x1.cd8568904p-1, 0x1.ca4b3055ep-1, - 0x1.c71c71c72p-1, 0x1.c3f8f01c4p-1, 0x1.c0e070382p-1, 0x1.bdd2b8994p-1, - 0x1.bacf914c2p-1, 0x1.b7d6c3ddap-1, 0x1.b4e81b4e8p-1, 0x1.b2036406cp-1, - 0x1.af286bca2p-1, 0x1.ac5701ac6p-1, 0x1.a98ef606ap-1, 0x1.a6d01a6dp-1, - 0x1.a41a41a42p-1, 0x1.a16d3f97ap-1, 0x1.9ec8e951p-1, 0x1.9c2d14ee4p-1, - 0x1.99999999ap-1, 0x1.970e4f80cp-1, 0x1.948b0fcd6p-1, 0x1.920fb49dp-1, - 0x1.8f9c18f9cp-1, 0x1.8d3018d3p-1, 0x1.8acb90f6cp-1, 0x1.886e5f0acp-1, - 0x1.861861862p-1, 0x1.83c977ab2p-1, 0x1.818181818p-1, 0x1.7f405fd02p-1, - 0x1.7d05f417ep-1, 0x1.7ad2208ep-1, 0x1.78a4c8178p-1, 0x1.767dce434p-1, - 0x1.745d1745ep-1, 0x1.724287f46p-1, 0x1.702e05c0cp-1, 0x1.6e1f76b44p-1, - 0x1.6c16c16c2p-1, 0x1.6a13cd154p-1, 0x1.681681682p-1, 0x1.661ec6a52p-1, - 0x1.642c8590cp-1, 0x1.623fa7702p-1, 0x1.605816058p-1, 0x1.5e75bb8dp-1, - 0x1.5c9882b94p-1, 0x1.5ac056b02p-1, 0x1.58ed23082p-1, 0x1.571ed3c5p-1, - 0x1.555555556p-1, 0x1.5390948f4p-1, 0x1.51d07eae2p-1, 0x1.501501502p-1, - 0x1.4e5e0a73p-1, 0x1.4cab88726p-1, 0x1.4afd6a052p-1, 0x1.49539e3b2p-1, - 0x1.47ae147aep-1, 0x1.460cbc7f6p-1, 0x1.446f86562p-1, 0x1.42d6625d6p-1, - 0x1.414141414p-1, 0x1.3fb013fbp-1, 0x1.3e22cbce4p-1, 0x1.3c995a47cp-1, - 0x1.3b13b13b2p-1, 0x1.3991c2c18p-1, 0x1.381381382p-1, 0x1.3698df3dep-1, - 0x1.3521cfb2cp-1, 0x1.33ae45b58p-1, 0x1.323e34a2cp-1, 0x1.30d19013p-1, - 0x1.2f684bda2p-1, 0x1.2e025c04cp-1, 0x1.2c9fb4d82p-1, 0x1.2b404ad02p-1, - 0x1.29e4129e4p-1, 0x1.288b01288p-1, 0x1.27350b882p-1, 0x1.25e22708p-1, - 0x1.24924924ap-1, 0x1.23456789ap-1, 0x1.21fb78122p-1, 0x1.20b470c68p-1, - 0x1.1f7047dc2p-1, 0x1.1e2ef3b4p-1, 0x1.1cf06ada2p-1, 0x1.1bb4a4046p-1, - 0x1.1a7b9611ap-1, 0x1.19453808cp-1, 0x1.181181182p-1, 0x1.16e068942p-1, - 0x1.15b1e5f76p-1, 0x1.1485f0e0ap-1, 0x1.135c81136p-1, 0x1.12358e75ep-1, - 0x1.111111112p-1, 0x1.0fef010fep-1, 0x1.0ecf56be6p-1, 0x1.0db20a89p-1, - 0x1.0c9714fbcp-1, 0x1.0b7e6ec26p-1, 0x1.0a6810a68p-1, 0x1.0953f3902p-1, - 0x1.084210842p-1, 0x1.073260a48p-1, 0x1.0624dd2f2p-1, 0x1.05197f7d8p-1, - 0x1.041041042p-1, 0x1.03091b52p-1, 0x1.020408102p-1, 0x1.01010101p-1, - 0x1p-1 - }; - - static const double lix[] = - { - 0x0p+0, 0x1.fe02a6b146789p-8, 0x1.fc0a8b0fa03e4p-7, - 0x1.7b91b07de311bp-6, 0x1.f829b0e7c33p-6, 0x1.39e87b9fd7d6p-5, - 0x1.77458f63edcfcp-5, 0x1.b42dd7117b1bfp-5, 0x1.f0a30c01362a6p-5, - 0x1.16536eea7fae1p-4, 0x1.341d7961791d1p-4, 0x1.51b073f07983fp-4, - 0x1.6f0d28ae3eb4cp-4, 0x1.8c345d6383b21p-4, 0x1.a926d3a475563p-4, - 0x1.c5e548f63a743p-4, 0x1.e27076e28f2e6p-4, 0x1.fec9131dbaabbp-4, - 0x1.0d77e7ccf6e59p-3, 0x1.1b72ad52f87ap-3, 0x1.29552f81eb523p-3, - 0x1.371fc201f7f74p-3, 0x1.44d2b6ccbfd1ep-3, 0x1.526e5e3a41438p-3, - 0x1.5ff3070a613d4p-3, 0x1.6d60fe717221dp-3, 0x1.7ab890212b909p-3, - 0x1.87fa065214911p-3, 0x1.9525a9cf296b4p-3, 0x1.a23bc1fe42563p-3, - 0x1.af3c94e81bff3p-3, 0x1.bc2867430acd6p-3, 0x1.c8ff7c7989a22p-3, - 0x1.d5c216b535b91p-3, 0x1.e27076e2f92e6p-3, 0x1.ef0adcbe0d936p-3, - 0x1.fb9186d5ebe2bp-3, 0x1.0402594b51041p-2, 0x1.0a324e27370e3p-2, - 0x1.1058bf9ad7ad5p-2, 0x1.1675cabaa660ep-2, 0x1.1c898c16b91fbp-2, - 0x1.22941fbcfb966p-2, 0x1.2895a13dd2ea3p-2, 0x1.2e8e2bade7d31p-2, - 0x1.347dd9a9afd55p-2, 0x1.3a64c556b05eap-2, 0x1.40430868877e4p-2, - 0x1.4618bc219dec2p-2, 0x1.4be5f9579e0a1p-2, 0x1.51aad872c982dp-2, - 0x1.5767717432a6cp-2, 0x1.5d1bdbf5669cap-2, 0x1.62c82f2b83795p-2, - 0x1.686c81e9964afp-2, 0x1.6e08eaa2929e4p-2, 0x1.739d7f6b95007p-2, - 0x1.792a55fdb7fa2p-2, 0x1.7eaf83b82efc3p-2, 0x1.842d1da1ecb17p-2, - 0x1.89a3386be825bp-2, 0x1.8f11e87347ac7p-2, 0x1.947941c1f26fbp-2, - 0x1.99d958119208bp-2, 0x1.9f323ecbd984cp-2, 0x1.a484090e5eb0ap-2, - 0x1.a9cec9a9cf84ap-2, 0x1.af1293245606bp-2, 0x1.b44f77bc98f63p-2, - 0x1.b9858969218fbp-2, 0x1.beb4d9da96b7cp-2, 0x1.c3dd7a7d0354dp-2, - 0x1.c8ff7c79ada22p-2, 0x1.ce1af0b855bebp-2, 0x1.d32fe7e039bd5p-2, - 0x1.d83e72587673ep-2, 0x1.dd46a04c204a1p-2, 0x1.e24881a7cac26p-2, - 0x1.e744261d8a788p-2, 0x1.ec399d2457ccp-2, 0x1.f128f5fac86edp-2, - 0x1.f6123fa71c8acp-2, 0x1.faf588f76631fp-2, 0x1.ffd2e08580c98p-2, - 0x1.02552a5a4f0ffp-1, 0x1.04bdf9da8b6d2p-1, 0x1.0723e5c1b4f4p-1, - 0x1.0986f4f589521p-1, 0x1.0be72e423ca83p-1, 0x1.0e44985d0f48cp-1, - 0x1.109f39e2be497p-1, 0x1.12f71959283bcp-1, 0x1.154c3d2f4f5eap-1, - 0x1.179eabbd9c9a1p-1, 0x1.19ee6b466516fp-1, 0x1.1c3b81f723c25p-1, - 0x1.1e85f5e6ec0dp-1, 0x1.20cdcd193f76ep-1, 0x1.23130d7beb743p-1, - 0x1.2555bce9887cbp-1, 0x1.2795e1288211bp-1, 0x1.29d37fec2308bp-1, - 0x1.2c0e9ed45768cp-1, 0x1.2e47436e5ae68p-1, 0x1.307d7334ff0bep-1, - 0x1.32b1339134571p-1, 0x1.34e289d9b39d3p-1, 0x1.37117b5481bb6p-1, - 0x1.393e0d3549a1ap-1, 0x1.3b6844a017823p-1, 0x1.3d9026a70eefbp-1, - 0x1.3fb5b84cfeb42p-1, 0x1.41d8fe844b2aep-1, 0x1.43f9fe2fb9267p-1, - 0x1.4618bc21d86c2p-1, 0x1.48353d1e928dfp-1, 0x1.4a4f85db1debbp-1, - 0x1.4c679afcc323ap-1, 0x1.4e7d811b77bb1p-1, 0x1.50913cbff8c6bp-1, - 0x1.52a2d265be5abp-1, 0x1.54b2467998498p-1, 0x1.56bf9d5b34b99p-1, - 0x1.58cadb5cbe989p-1, 0x1.5ad404c33af2dp-1, 0x1.5cdb1dc6ad765p-1, - 0x1.5ee02a9241e75p-1, 0x1.60e32f447a8d9p-1, 0x1.62e42fefa39efp-1 - }; - uint32_t t = asuint (x); t &= ~0u >> 1; double xs = x; @@ -152,35 +69,25 @@ __asinhf (float x) int j = (m + (UINT64_C(1) << (52 - 8))) >> (52 - 7); int e = (tp >> 52) - 0x3ff; double w = asdouble (m | UINT64_C(0x3ff) << 52); - double z = w * ix[j] - 1.0; - static const double c[] = - { - 0x1.0000000066947p+0, -0x1.00007f053d8cbp-1, 0x1.555280111d914p-2 - }; + double z = w * IX[j] - 1.0; double z2 = z * z; - double r = ((lix[128] * e + lix[j]) + z * c[0]) + z2 * (c[1] + z * c[2]); + double r = ((LIX[128] * e + LIX[j]) + z * C[0]) + z2 * (C[1] + z * C[2]); if (__glibc_unlikely (((asuint64 (r) + 259000) & INT64_C(0xfffffff)) < 260000)) /* accurate path */ { - static const double cp[] = - { - 0x1p+0, -0x1p-1, - 0x1.55555555030bcp-2, -0x1.ffffffff2b4e5p-3, - 0x1.999b5076a42f2p-3, -0x1.55570c45a647dp-3 - }; z2 = z * z; - double c0 = cp[0] + z * cp[1]; - double c2 = cp[2] + z * cp[3]; - double c4 = cp[4] + z * cp[5]; + double c0 = CP[0] + z * CP[1]; + double c2 = CP[2] + z * CP[3]; + double c4 = CP[4] + z * CP[5]; c0 += z2 * (c2 + z2 * c4); const double ln2l = 0x1.7f7d1cf79abcap-20; const double ln2h = 0x1.62e4p-1; double Lh = ln2h * e; double Ll = ln2l * e; - r = fma (z, c0, Ll + lix[j]) + Lh; + r = fma (z, c0, Ll + LIX[j]) + Lh; if (__glibc_unlikely ((asuint64 (r) & INT64_C(0xfffffff)) == 0)) { - double h = fma (z, c0, Ll + lix[j]) + (Lh - r); + double h = fma (z, c0, Ll + LIX[j]) + (Lh - r); r = r + 64 * h; } } diff --git a/sysdeps/ieee754/flt-32/s_asinpif.c b/sysdeps/ieee754/flt-32/s_asinpif.c new file mode 100644 index 0000000..5e97a34 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_asinpif.c @@ -0,0 +1,137 @@ +/* Correctly-rounded half-revolution arc-sine function for binary32 value. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/asinpi/asinpif.c, revision 6ee58266). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <errno.h> +#include <math.h> +#include <stdint.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__asinpif (float x) +{ + float ax = fabsf (x); + double az = ax; + double z = x; + uint32_t t = asuint (x); + int32_t e = (t >> 23) & 0xff; + if (__glibc_unlikely (e >= 127)) + { + if (ax == 1.0f) + return copysignf (0.5f, x); + if (e == 0xff && (t << 9)) + return x + x; /* nan */ + return __math_edomf ((x - x) / (x - x)); /* nan */ + } + int32_t s = 146 - e; + int32_t i = 0; + /* s<32 corresponds to |x| >= 2^-12 */ + if (__glibc_likely (s < 32)) + i = ((t & (~0u >> 9)) | 1 << 23) >> s; + static const double ch[][8] = + { + { 0x1.45f306dc9c882p-2, 0x1.b2995e7b7dc2fp-5, 0x1.8723a1cf50c7ep-6, + 0x1.d1a4591d16a29p-7, 0x1.3ce3aa68ddaeep-7, 0x1.d3182ab0cc1bfp-8, + 0x1.62b379a8b88e3p-8, 0x1.6811411fcfec2p-8 }, + { 0x1.ffffffffd3cdap-2, -0x1.17cc1b3355fddp-4, 0x1.d067a1e8d5a99p-6, + -0x1.08e16fb09314ap-6, 0x1.5eed43d42dcb2p-7, -0x1.f58baca7acc71p-8, + 0x1.5dab64e2dcf15p-8, -0x1.59270e30797acp-9 }, + { 0x1.fffffff7c4617p-2, -0x1.17cc149ded3a2p-4, 0x1.d0654d4cb2c1ap-6, + -0x1.08c3ba713d33ap-6, 0x1.5d2053481079cp-7, -0x1.e485ebc545e7ep-8, + 0x1.303baca167dddp-8, -0x1.dee8d16d06b38p-10 }, + { 0x1.ffffffa749848p-2, -0x1.17cbe7155935p-4, 0x1.d05a312269adfp-6, + -0x1.0862b3ee617d7p-6, 0x1.5920708db2a73p-7, -0x1.cb0463b3862c3p-8, + 0x1.02b82478f95d7p-8, -0x1.52a7b8579e729p-10 }, + { 0x1.fffffe1f92bb5p-2, -0x1.17cb3e74c64e3p-4, 0x1.d03af67311cbfp-6, + -0x1.079441cbfc7ap-6, 0x1.52b4287805a61p-7, -0x1.ac3286d604a98p-8, + 0x1.b2f1210d9701bp-9, -0x1.e740ddc25afd6p-11 }, + { 0x1.fffff92beb6e2p-2, -0x1.17c986fe9518bp-4, 0x1.cff98167c9a5ep-6, + -0x1.0638b591eae52p-6, 0x1.4a0803828959ep-7, -0x1.8adeca229f11dp-8, + 0x1.6b9a7ba05dfcep-9, -0x1.640521a43b2dp-11 }, + { 0x1.ffffeccee5bfcp-2, -0x1.17c5f1753f5eap-4, 0x1.cf874e4fe258fp-6, + -0x1.043e6cf77b256p-6, 0x1.3f7db42227d92p-7, -0x1.691a6fa2a2882p-8, + 0x1.2f6543162bc61p-9, -0x1.07d5da05822b6p-11 }, + { 0x1.ffffd2f64431dp-2, -0x1.17bf8208c10c1p-4, 0x1.ced7487cdb124p-6, + -0x1.01a0d30932905p-6, 0x1.3388f99b254dap-7, -0x1.4844e245c65bdp-8, + 0x1.fa777150197c6p-10, -0x1.8c1ecf16a05c8p-12 }, + { 0x1.ffffa36d1712ep-2, -0x1.17b523971bd4ep-4, 0x1.cddee26de2deep-6, + -0x1.fccb00abaaabcp-7, 0x1.269afc3622342p-7, -0x1.2933152686752p-8, + 0x1.a76d4956cc9a3p-10, -0x1.2ce7d6dc651cep-12 }, + { 0x1.ffff5402ab3a1p-2, -0x1.17a5ba85da77ap-4, 0x1.cc96894e05c02p-6, + -0x1.f532143cb832ep-7, 0x1.19180b660ff09p-7, -0x1.0c57417a78b3cp-8, + 0x1.62e26cbd7bb1ep-10, -0x1.ce28d33fe1df3p-13 }, + { 0x1.fffed8d639751p-2, -0x1.1790349f3ae76p-4, 0x1.caf9a4fd1b398p-6, + -0x1.ec986b111342ep-7, 0x1.0b53c3ad4baa4p-7, -0x1.e3c2282eeace4p-9, + 0x1.2a55369f55bbep-10, -0x1.667fe48c396e8p-13 }, + { 0x1.fffe24b714161p-2, -0x1.177394fbcb719p-4, 0x1.c90652d920ebdp-6, + -0x1.e3239197bddf1p-7, 0x1.fb2188525b025p-8, -0x1.b3aadd451afc7p-9, + 0x1.f74020f31fdabp-11, -0x1.18b0cb246768dp-13 }, + { 0x1.fffd298bec9e2p-2, -0x1.174efbfd34648p-4, 0x1.c6bcfe48ea92bp-6, + -0x1.d8f9f2a16157cp-7, 0x1.e0044f56c8864p-8, -0x1.883e2347fe76cp-9, + 0x1.a9f0e3c1b7af5p-11, -0x1.bb5acc0e60825p-14 }, + { 0x1.fffbd8b784c4dp-2, -0x1.1721abdd3722ep-4, 0x1.c41fee756d4bp-6, + -0x1.ce40bccf8065fp-7, 0x1.c59b684b70ef9p-8, -0x1.6133d027996b3p-9, + 0x1.69cad01106397p-11, -0x1.60f8e45494156p-14 }, + { 0x1.fffa23749cf88p-2, -0x1.16eb0a8285c06p-4, 0x1.c132d762e1b0dp-6, + -0x1.c31a959398f4ep-7, 0x1.ac1c5b46bc8ap-8, -0x1.3e34f1abe51dcp-9, + 0x1.346738737c0b9p-11, -0x1.1b227a3f5c75p-14 }, + { 0x1.fff7fb25bb407p-2, -0x1.16aaa14d7564p-4, 0x1.bdfa75fca5ff2p-6, + -0x1.b7a6e260d079cp-7, 0x1.93ab06911033cp-8, -0x1.1ee5560967fd5p-9, + 0x1.07d31060838bfp-11, -0x1.c96f33a283115p-15 + } + }; + const double *c = ch[i]; + double z2 = z * z; + double z4 = z2 * z2; + if (__glibc_unlikely (i == 0)) /* |x| < 2^-4 */ + { + double c0 = c[0] + z2 * c[1]; + double c2 = c[2] + z2 * c[3]; + double c4 = c[4] + z2 * c[5]; + double c6 = c[6] + z2 * c[7]; + c0 += c2 * z4; + c4 += c6 * z4; + c0 += c4 * (z4 * z4); + if (__glibc_unlikely (ax != 0.0 && ax <= 0x1.921fb4p-126f)) + __set_errno (ERANGE); + return z * c0; + } + else /* |x| >= 2^-4 */ + { + double f = sqrt (1 - az); + double c0 = c[0] + az * c[1]; + double c2 = c[2] + az * c[3]; + double c4 = c[4] + az * c[5]; + double c6 = c[6] + az * c[7]; + c0 += c2 * z2; + c4 += c6 * z2; + c0 += c4 * z4; + double r = fma (-c0, copysign (f, x), copysign (0.5, x)); + return r; + } +} +libm_alias_float (__asinpi, asinpi) diff --git a/sysdeps/ieee754/flt-32/s_atan2pif.c b/sysdeps/ieee754/flt-32/s_atan2pif.c new file mode 100644 index 0000000..8c9cbc13 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_atan2pif.c @@ -0,0 +1,238 @@ +/* Correctly-rounded half revolution arctangent function of two binary32 values. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/atan2pi/atan2pif.c, revision dbebee1). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <math.h> +#include <stdint.h> +#include <errno.h> +#include <libm-alias-float.h> +#include "math_config.h" + +static inline double +muldd (double xh, double xl, double ch, double cl, double *l) +{ + double ahlh = ch * xl; + double alhh = cl * xh; + double ahhh = ch * xh; + double ahhl = fma (ch, xh, -ahhh); + ahhl += alhh + ahlh; + ch = ahhh + ahhl; + *l = (ahhh - ch) + ahhl; + return ch; +} + +static double +polydd (double xh, double xl, int n, const double c[][2], double *l) +{ + int i = n - 1; + double ch = c[i][0], cl = c[i][1]; + while (--i >= 0) + { + ch = muldd (xh, xl, ch, cl, &cl); + double th = ch + c[i][0], tl = (c[i][0] - th) + ch; + ch = th; + cl += tl + c[i][1]; + } + *l = cl; + return ch; +} + +float +__atan2pif (float y, float x) +{ + static const double cn[] = + { + 0x1.45f306dc9c883p-2, 0x1.988d83a142adap-1, 0x1.747bebf492057p-1, + 0x1.2cc5645094ff3p-2, 0x1.a0521c711ab66p-5, 0x1.881b8058b9a0dp-9, + 0x1.b16ff514a0afp-16 + }; + static const double cd[] = + { + 0x1p+0, 0x1.6b8b143a3f6dap+1, 0x1.8421201d18ed5p+1, + 0x1.8221d086914ebp+0, 0x1.670657e3a07bap-2, 0x1.0f4951fd1e72dp-5, + 0x1.b3874b8798286p-11 + }; + static const double m[] = { 0, 1 }; + static const double off[] + = { 0.0f, 0.5f, 1.0f, 0.5f, -0.0f, -0.5f, -1.0f, -0.5f }; + static const float sgnf[] = { 1, -1 }; + static const double sgn[] = { 1, -1 }; + uint32_t ux = asuint (x); + uint32_t uy = asuint (y); + uint32_t ax = ux & (~0u >> 1); + uint32_t ay = uy & (~0u >> 1); + if (__glibc_unlikely (ay >= (0xff << 23) || ax >= (0xff << 23))) + { + if (ay > (0xff << 23)) + return x + y; /* nan */ + if (ax > (0xff << 23)) + return x + y; /* nan */ + uint32_t yinf = ay == (0xff << 23); + uint32_t xinf = ax == (0xff << 23); + if (yinf & xinf) + { + if (ux >> 31) + return 0.75f * sgnf[uy >> 31]; + else + return 0.25f * sgnf[uy >> 31]; + } + if (xinf) + { + if (ux >> 31) + return sgnf[uy >> 31]; + else + return 0.0f * sgnf[uy >> 31]; + } + if (yinf) + return 0.5f * sgnf[uy >> 31]; + } + if (__glibc_unlikely (ay == 0)) + { + if (__glibc_unlikely (!(ay | ax))) + { + uint32_t i = (uy >> 31) * 4 + (ux >> 31) * 2; + return off[i]; + } + if (!(ux >> 31)) + return 0.0f * sgnf[uy >> 31]; + } + if (__glibc_unlikely (ax == ay)) + { + static const float s[] = { 0.25, 0.75, -0.25, -0.75 }; + uint32_t i = (uy >> 31) * 2 + (ux >> 31); + return s[i]; + } + uint32_t gt = ay > ax, i = (uy >> 31) * 4 + (ux >> 31) * 2 + gt; + + double zx = x, zy = y; + double z = (m[gt] * zx + m[1 - gt] * zy) / (m[gt] * zy + m[1 - gt] * zx); + double r = cn[0], z2 = z*z; + z *= sgn[gt]; + /* avoid spurious underflow in the polynomial evaluation excluding extremely + small arguments */ + if (__glibc_likely (z2 > 0x1p-54)) + { + double z4 = z2*z2, z8 = z4*z4; + double cn0 = r + z2*cn[1]; + double cn2 = cn[2] + z2*cn[3]; + double cn4 = cn[4] + z2*cn[5]; + double cn6 = cn[6]; + cn0 += z4*cn2; + cn4 += z4*cn6; + cn0 += z8*cn4; + double cd0 = cd[0] + z2*cd[1]; + double cd2 = cd[2] + z2*cd[3]; + double cd4 = cd[4] + z2*cd[5]; + double cd6 = cd[6]; + cd0 += z4*cd2; + cd4 += z4*cd6; + cd0 += z8*cd4; + r = cn0/cd0; + } + r = z * r + off[i]; + uint64_t res = asuint64 (r); + if (__glibc_unlikely ((res << 1) > 0x6d40000000000000 + && ((res + 8) & 0xfffffff) <= 16)) + { + if (ax == ay) + { + static const double off2[] = { 0.25, 0.75, -0.25, -0.75 }; + r = off2[(uy >> 31) * 2 + (ux >> 31)]; + } + else + { + double zh, zl; + if (!gt) + { + zh = zy / zx; + zl = fma (zh, -zx, zy) / zx; + } + else + { + zh = zx / zy; + zl = fma (zh, -zy, zx) / zy; + } + double z2l, z2h = muldd (zh, zl, zh, zl, &z2l); + static const double c[][2] = + { + { 0x1.45f306dc9c883p-2, -0x1.6b01ec5513324p-56 }, + { -0x1.b2995e7b7b604p-4, 0x1.e402b0c13eedcp-58 }, + { 0x1.04c26be3b06cfp-4, -0x1.571d178a53efp-60 }, + { -0x1.7483758e69c03p-5, 0x1.819a6ed7aaf38p-63 }, + { 0x1.21bb9452523ffp-5, -0x1.234d866fb9807p-60 }, + { -0x1.da1bace3cc54ep-6, -0x1.c84f6ada49294p-64 }, + { 0x1.912b1c23345ddp-6, -0x1.534890fbc165p-60 }, + { -0x1.5bade52f5f52ap-6, 0x1.f783bafc832f6p-60 }, + { 0x1.32c69d084c5cp-6, 0x1.042d155953025p-60 }, + { -0x1.127bcfb3e8c7dp-6, -0x1.85aae199a7b6bp-60 }, + { 0x1.f0af43b11a731p-7, 0x1.8f0356356663p-61 }, + { -0x1.c57e86801029ep-7, 0x1.dcdf3e3b38eb4p-61 }, + { 0x1.a136408617ea1p-7, 0x1.a71affb36c6c4p-63 }, + { -0x1.824ac7814ba37p-7, 0x1.8928b295c0898p-61 }, + { 0x1.6794e32ea5471p-7, 0x1.0b4334fb41e63p-61 }, + { -0x1.501d57f643d97p-7, 0x1.516785bf1376ep-61 }, + { 0x1.3adf02ff2400ap-7, -0x1.b0e30bb8c8076p-62 }, + { -0x1.267702f94faap-7, -0x1.7a4d3a1850cc6p-62 }, + { 0x1.10dce97099686p-7, 0x1.fcc208eee2571p-61 }, + { -0x1.eee49cdad8002p-8, -0x1.9109b3f1bab82p-64 }, + { 0x1.af93bc191a929p-8, 0x1.069fd3b47d7bp-62 }, + { -0x1.6240751b54675p-8, -0x1.72dc8cfd03b6fp-62 }, + { 0x1.0b61e84080884p-8, 0x1.825824c80941bp-63 }, + { -0x1.6a72a8a74e3a5p-9, 0x1.8786a82fd117ep-63 }, + { 0x1.aede3217d939dp-10, -0x1.93b626982e1fep-68 }, + { -0x1.b66568f09ebeep-11, -0x1.704a39121d0a5p-66 }, + { 0x1.73af3977fa973p-12, -0x1.aa050e2244ea3p-68 }, + { -0x1.fc69d85ed28c9p-14, 0x1.867f17b764cap-68 }, + { 0x1.0c883a9270162p-15, -0x1.6842833896dd9p-70 }, + { -0x1.9a0b27b6dfe15p-18, 0x1.427fc2f4e1327p-73 }, + { 0x1.91e15e7ab5bdcp-21, -0x1.730dbc6279d0dp-77 }, + { -0x1.7b1119c1ff867p-25, 0x1.145f9980759c4p-79 } + }; + double pl, ph = polydd (z2h, z2l, 32, c, &pl); + zh *= sgn[gt]; + zl *= sgn[gt]; + ph = muldd (zh, zl, ph, pl, &pl); + double sh = ph + off[i], sl = ((off[i] - sh) + ph) + pl; + float rf = sh; + double th = rf, dh = sh - th, tm = dh + sl; + r = th + tm; + double d = r - th; + if (!(asuint64 (d) << 12)) + { + double ad = fabs (d), am = fabs (tm); + if (ad > am) + r -= d * 0x1p-10; + if (ad < am) + r += d * 0x1p-10; + } + } + } + float rf = r; + if (__glibc_unlikely (rf == 0.0f && y != 0.0f)) + __set_errno (ERANGE); + return rf; +} +libm_alias_float (__atan2pi, atan2pi) diff --git a/sysdeps/ieee754/flt-32/s_atanpif.c b/sysdeps/ieee754/flt-32/s_atanpif.c new file mode 100644 index 0000000..8d78163 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_atanpif.c @@ -0,0 +1,109 @@ +/* Correctly-rounded half-revolution arc-tangent of binary32 value. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (file src/binary32/atanpi/atanpif.c, revision e02000e). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <errno.h> +#include <math.h> +#include <stdint.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__atanpif (float x) +{ + uint32_t t = asuint (x); + int32_t e = (t >> 23) & 0xff; + bool gt = e >= 127; + if (__glibc_unlikely (e > 127 + 24)) + { + float f = copysignf (0.5f, x); + if (__glibc_unlikely (e == 0xff)) + { + if (t << 9) + return x + x; /* nan */ + return f; /* inf */ + } + /* Warning: 0x1.45f306p-2f / x underflows for |x| >= 0x1.45f306p+124 */ + if (fabsf (x) >= 0x1.45f306p+124f) + return f - copysign (0x1p-26f, x); + else + return f - 0x1.45f306p-2f / x; + } + double z = x; + if (__glibc_unlikely (e < 127 - 13)) + { + double sx = z * 0x1.45f306dc9c883p-2; + if (__glibc_unlikely (e < 127 - 25)) + { + float rsx = sx; + if (x != 0 && rsx == 0) + __set_errno (ERANGE); + return rsx; + } + return sx - (0x1.5555555555555p-2 * sx) * (x * x); + } + uint32_t ax = t & (~0u >> 1); + if (__glibc_unlikely (ax == 0x3fa267ddu)) + return copysignf (0x1.267004p-2f, x) - copysignf (0x1p-55f, x); + if (__glibc_unlikely (ax == 0x3f693531u)) + return copysignf (0x1.e1a662p-3f, x) + copysignf (0x1p-28f, x); + if (__glibc_unlikely (ax == 0x3f800000u)) + return copysignf (0x1p-2f, x); + if (gt) + z = 1 / z; + double z2 = z * z; + double z4 = z2 * z2; + double z8 = z4 * z4; + static const double cn[] = + { + 0x1.45f306dc9c882p-2, 0x1.733b561bc23d5p-1, 0x1.28d9805bdfbf2p-1, + 0x1.8c3ba966ae287p-3, 0x1.94a7f81ee634bp-6, 0x1.a6bbf6127a6dfp-11 + }; + static const double cd[] = + { + 0x1p+0, 0x1.4e3b3ecc2518fp+1, 0x1.3ef4a360ff063p+1, + 0x1.0f1dc55bad551p+0, 0x1.8da0fecc018a4p-3, 0x1.8fa87803776bfp-7, + 0x1.dadf2ca0acb43p-14 + }; + double cn0 = cn[0] + z2 * cn[1]; + double cn2 = cn[2] + z2 * cn[3]; + double cn4 = cn[4] + z2 * cn[5]; + cn0 += z4 * cn2; + cn0 += z8 * cn4; + cn0 *= z; + double cd0 = cd[0] + z2 * cd[1]; + double cd2 = cd[2] + z2 * cd[3]; + double cd4 = cd[4] + z2 * cd[5]; + double cd6 = cd[6]; + cd0 += z4 * cd2; + cd4 += z4 * cd6; + cd0 += z8 * cd4; + double r = cn0 / cd0; + if (gt) + r = copysign (0.5, z) - r; + return r; +} +libm_alias_float (__atanpi, atanpi) diff --git a/sysdeps/ieee754/flt-32/s_cospif.c b/sysdeps/ieee754/flt-32/s_cospif.c new file mode 100644 index 0000000..d4c652f --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_cospif.c @@ -0,0 +1,136 @@ +/* Correctly-rounded cosine of binary32 value for angles in half-revolutions + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (src/binary32/cospi/cospif.c, revision f786e13). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <math.h> +#include <stdint.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__cospif (float x) +{ + static const double sn[] = + { + 0x1.921fb54442d0fp-37, -0x1.4abbce6102b94p-112, 0x1.4669fa3c58463p-189 + }; + static const double cn[] = + { + -0x1.3bd3cc9be45cfp-74, 0x1.03c1f08088742p-150, -0x1.55d1e5eff55a5p-228 + }; + /* S[i] approximates sin(i*pi/2^6) */ + static const double S[] = + { + 0x0p+0, 0x1.91f65f10dd814p-5, 0x1.917a6bc29b42cp-4, + 0x1.2c8106e8e613ap-3, 0x1.8f8b83c69a60bp-3, 0x1.f19f97b215f1bp-3, + 0x1.294062ed59f06p-2, 0x1.58f9a75ab1fddp-2, 0x1.87de2a6aea963p-2, + 0x1.b5d1009e15ccp-2, 0x1.e2b5d3806f63bp-2, 0x1.073879922ffeep-1, + 0x1.1c73b39ae68c8p-1, 0x1.30ff7fce17035p-1, 0x1.44cf325091dd6p-1, + 0x1.57d69348cecap-1, 0x1.6a09e667f3bcdp-1, 0x1.7b5df226aafafp-1, + 0x1.8bc806b151741p-1, 0x1.9b3e047f38741p-1, 0x1.a9b66290ea1a3p-1, + 0x1.b728345196e3ep-1, 0x1.c38b2f180bdb1p-1, 0x1.ced7af43cc773p-1, + 0x1.d906bcf328d46p-1, 0x1.e212104f686e5p-1, 0x1.e9f4156c62ddap-1, + 0x1.f0a7efb9230d7p-1, 0x1.f6297cff75cbp-1, 0x1.fa7557f08a517p-1, + 0x1.fd88da3d12526p-1, 0x1.ff621e3796d7ep-1, 0x1p+0, + 0x1.ff621e3796d7ep-1, 0x1.fd88da3d12526p-1, 0x1.fa7557f08a517p-1, + 0x1.f6297cff75cbp-1, 0x1.f0a7efb9230d7p-1, 0x1.e9f4156c62ddap-1, + 0x1.e212104f686e5p-1, 0x1.d906bcf328d46p-1, 0x1.ced7af43cc773p-1, + 0x1.c38b2f180bdb1p-1, 0x1.b728345196e3ep-1, 0x1.a9b66290ea1a3p-1, + 0x1.9b3e047f38741p-1, 0x1.8bc806b151741p-1, 0x1.7b5df226aafafp-1, + 0x1.6a09e667f3bcdp-1, 0x1.57d69348cecap-1, 0x1.44cf325091dd6p-1, + 0x1.30ff7fce17035p-1, 0x1.1c73b39ae68c8p-1, 0x1.073879922ffeep-1, + 0x1.e2b5d3806f63bp-2, 0x1.b5d1009e15ccp-2, 0x1.87de2a6aea963p-2, + 0x1.58f9a75ab1fddp-2, 0x1.294062ed59f06p-2, 0x1.f19f97b215f1bp-3, + 0x1.8f8b83c69a60bp-3, 0x1.2c8106e8e613ap-3, 0x1.917a6bc29b42cp-4, + 0x1.91f65f10dd814p-5, 0x0p+0, -0x1.91f65f10dd814p-5, + -0x1.917a6bc29b42cp-4, -0x1.2c8106e8e613ap-3, -0x1.8f8b83c69a60bp-3, + -0x1.f19f97b215f1bp-3, -0x1.294062ed59f06p-2, -0x1.58f9a75ab1fddp-2, + -0x1.87de2a6aea963p-2, -0x1.b5d1009e15ccp-2, -0x1.e2b5d3806f63bp-2, + -0x1.073879922ffeep-1, -0x1.1c73b39ae68c8p-1, -0x1.30ff7fce17035p-1, + -0x1.44cf325091dd6p-1, -0x1.57d69348cecap-1, -0x1.6a09e667f3bcdp-1, + -0x1.7b5df226aafafp-1, -0x1.8bc806b151741p-1, -0x1.9b3e047f38741p-1, + -0x1.a9b66290ea1a3p-1, -0x1.b728345196e3ep-1, -0x1.c38b2f180bdb1p-1, + -0x1.ced7af43cc773p-1, -0x1.d906bcf328d46p-1, -0x1.e212104f686e5p-1, + -0x1.e9f4156c62ddap-1, -0x1.f0a7efb9230d7p-1, -0x1.f6297cff75cbp-1, + -0x1.fa7557f08a517p-1, -0x1.fd88da3d12526p-1, -0x1.ff621e3796d7ep-1, + -0x1p+0, -0x1.ff621e3796d7ep-1, -0x1.fd88da3d12526p-1, + -0x1.fa7557f08a517p-1, -0x1.f6297cff75cbp-1, -0x1.f0a7efb9230d7p-1, + -0x1.e9f4156c62ddap-1, -0x1.e212104f686e5p-1, -0x1.d906bcf328d46p-1, + -0x1.ced7af43cc773p-1, -0x1.c38b2f180bdb1p-1, -0x1.b728345196e3ep-1, + -0x1.a9b66290ea1a3p-1, -0x1.9b3e047f38741p-1, -0x1.8bc806b151741p-1, + -0x1.7b5df226aafafp-1, -0x1.6a09e667f3bcdp-1, -0x1.57d69348cecap-1, + -0x1.44cf325091dd6p-1, -0x1.30ff7fce17035p-1, -0x1.1c73b39ae68c8p-1, + -0x1.073879922ffeep-1, -0x1.e2b5d3806f63bp-2, -0x1.b5d1009e15ccp-2, + -0x1.87de2a6aea963p-2, -0x1.58f9a75ab1fddp-2, -0x1.294062ed59f06p-2, + -0x1.f19f97b215f1bp-3, -0x1.8f8b83c69a60bp-3, -0x1.2c8106e8e613ap-3, + -0x1.917a6bc29b42cp-4, -0x1.91f65f10dd814p-5 + }; + + uint32_t ix = asuint (x); + int32_t e = (ix >> 23) & 0xff; + if (__glibc_unlikely (e == 0xff)) + { + if (!(ix << 9)) + return __math_invalidf (x); + return x + x; /* nan */ + } + int32_t m = (ix & ~0u >> 9) | 1 << 23; + int32_t s = 143 - e; + int32_t p = e - 112; + if (__glibc_unlikely (p < 0)) /* |x| < 2^-15 */ + { + uint32_t ax = ix & (~0u>>1); + /* Warning: -0x1.3bd3ccp+2f * x underflows for |x| < 0x1.9f03p-129 */ + if (ax >= 0x19f030u) + return fmaf (-0x1.3bd3ccp+2f * x, x, 1.0f); + else /* |x| < 0x1.9f03p-129 */ + return fmaf (-x, x, 1.0f); + } + if (__glibc_unlikely (p > 31)) + { + if (__glibc_unlikely (p > 63)) + return 1.0f; + int32_t iq = m << (p - 32); + return S[(iq + 32) & 127]; + } + int32_t k = m << p; + if (__glibc_unlikely (k == 0)) + { + int32_t iq = m >> (32 - p); + return S[(iq + 32) & 127]; + } + double z = k; + double z2 = z * z; + double fs = sn[0] + z2 * (sn[1] + z2 * sn[2]); + double fc = cn[0] + z2 * (cn[1] + z2 * cn[2]); + uint32_t iq = m >> s; + iq = (iq + 1) >> 1; + uint32_t is = iq & 127, ic = (iq + 32) & 127; + double ts = S[ic], tc = S[is]; + double r = ts + (ts * z2) * fc - (tc * z) * fs; + return r; +} +libm_alias_float (__cospi, cospi) diff --git a/sysdeps/ieee754/flt-32/s_sinpif.c b/sysdeps/ieee754/flt-32/s_sinpif.c new file mode 100644 index 0000000..772ea42 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_sinpif.c @@ -0,0 +1,134 @@ +/* Correctly-rounded sine of binary32 value for angles in half-revolutions + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (src/binary32/sinpi/sinpif.c, revision f786e13). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <math.h> +#include <stdint.h> +#include <errno.h> +#include <fenv.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__sinpif (float x) +{ + static const double sn[] = + { + 0x1.921fb54442d0fp-37, -0x1.4abbce6102b94p-112, 0x1.4669fa3c58463p-189 + }; + static const double cn[] = + { + -0x1.3bd3cc9be45cfp-74, 0x1.03c1f08088742p-150, -0x1.55d1e5eff55a5p-228 + }; + static const double S[] = + { + 0x0p+0, 0x1.91f65f10dd814p-5, 0x1.917a6bc29b42cp-4, + 0x1.2c8106e8e613ap-3, 0x1.8f8b83c69a60bp-3, 0x1.f19f97b215f1bp-3, + 0x1.294062ed59f06p-2, 0x1.58f9a75ab1fddp-2, 0x1.87de2a6aea963p-2, + 0x1.b5d1009e15ccp-2, 0x1.e2b5d3806f63bp-2, 0x1.073879922ffeep-1, + 0x1.1c73b39ae68c8p-1, 0x1.30ff7fce17035p-1, 0x1.44cf325091dd6p-1, + 0x1.57d69348cecap-1, 0x1.6a09e667f3bcdp-1, 0x1.7b5df226aafafp-1, + 0x1.8bc806b151741p-1, 0x1.9b3e047f38741p-1, 0x1.a9b66290ea1a3p-1, + 0x1.b728345196e3ep-1, 0x1.c38b2f180bdb1p-1, 0x1.ced7af43cc773p-1, + 0x1.d906bcf328d46p-1, 0x1.e212104f686e5p-1, 0x1.e9f4156c62ddap-1, + 0x1.f0a7efb9230d7p-1, 0x1.f6297cff75cbp-1, 0x1.fa7557f08a517p-1, + 0x1.fd88da3d12526p-1, 0x1.ff621e3796d7ep-1, 0x1p+0, + 0x1.ff621e3796d7ep-1, 0x1.fd88da3d12526p-1, 0x1.fa7557f08a517p-1, + 0x1.f6297cff75cbp-1, 0x1.f0a7efb9230d7p-1, 0x1.e9f4156c62ddap-1, + 0x1.e212104f686e5p-1, 0x1.d906bcf328d46p-1, 0x1.ced7af43cc773p-1, + 0x1.c38b2f180bdb1p-1, 0x1.b728345196e3ep-1, 0x1.a9b66290ea1a3p-1, + 0x1.9b3e047f38741p-1, 0x1.8bc806b151741p-1, 0x1.7b5df226aafafp-1, + 0x1.6a09e667f3bcdp-1, 0x1.57d69348cecap-1, 0x1.44cf325091dd6p-1, + 0x1.30ff7fce17035p-1, 0x1.1c73b39ae68c8p-1, 0x1.073879922ffeep-1, + 0x1.e2b5d3806f63bp-2, 0x1.b5d1009e15ccp-2, 0x1.87de2a6aea963p-2, + 0x1.58f9a75ab1fddp-2, 0x1.294062ed59f06p-2, 0x1.f19f97b215f1bp-3, + 0x1.8f8b83c69a60bp-3, 0x1.2c8106e8e613ap-3, 0x1.917a6bc29b42cp-4, + 0x1.91f65f10dd814p-5, 0x0p+0, -0x1.91f65f10dd814p-5, + -0x1.917a6bc29b42cp-4, -0x1.2c8106e8e613ap-3, -0x1.8f8b83c69a60bp-3, + -0x1.f19f97b215f1bp-3, -0x1.294062ed59f06p-2, -0x1.58f9a75ab1fddp-2, + -0x1.87de2a6aea963p-2, -0x1.b5d1009e15ccp-2, -0x1.e2b5d3806f63bp-2, + -0x1.073879922ffeep-1, -0x1.1c73b39ae68c8p-1, -0x1.30ff7fce17035p-1, + -0x1.44cf325091dd6p-1, -0x1.57d69348cecap-1, -0x1.6a09e667f3bcdp-1, + -0x1.7b5df226aafafp-1, -0x1.8bc806b151741p-1, -0x1.9b3e047f38741p-1, + -0x1.a9b66290ea1a3p-1, -0x1.b728345196e3ep-1, -0x1.c38b2f180bdb1p-1, + -0x1.ced7af43cc773p-1, -0x1.d906bcf328d46p-1, -0x1.e212104f686e5p-1, + -0x1.e9f4156c62ddap-1, -0x1.f0a7efb9230d7p-1, -0x1.f6297cff75cbp-1, + -0x1.fa7557f08a517p-1, -0x1.fd88da3d12526p-1, -0x1.ff621e3796d7ep-1, + -0x1p+0, -0x1.ff621e3796d7ep-1, -0x1.fd88da3d12526p-1, + -0x1.fa7557f08a517p-1, -0x1.f6297cff75cbp-1, -0x1.f0a7efb9230d7p-1, + -0x1.e9f4156c62ddap-1, -0x1.e212104f686e5p-1, -0x1.d906bcf328d46p-1, + -0x1.ced7af43cc773p-1, -0x1.c38b2f180bdb1p-1, -0x1.b728345196e3ep-1, + -0x1.a9b66290ea1a3p-1, -0x1.9b3e047f38741p-1, -0x1.8bc806b151741p-1, + -0x1.7b5df226aafafp-1, -0x1.6a09e667f3bcdp-1, -0x1.57d69348cecap-1, + -0x1.44cf325091dd6p-1, -0x1.30ff7fce17035p-1, -0x1.1c73b39ae68c8p-1, + -0x1.073879922ffeep-1, -0x1.e2b5d3806f63bp-2, -0x1.b5d1009e15ccp-2, + -0x1.87de2a6aea963p-2, -0x1.58f9a75ab1fddp-2, -0x1.294062ed59f06p-2, + -0x1.f19f97b215f1bp-3, -0x1.8f8b83c69a60bp-3, -0x1.2c8106e8e613ap-3, + -0x1.917a6bc29b42cp-4, -0x1.91f65f10dd814p-5 + }; + + uint32_t ix = asuint (x); + int32_t e = (ix >> 23) & 0xff; + if (__glibc_unlikely (e == 0xff)) + { + if (!(ix << 9)) + return __math_invalidf (x); + return x + x; /* nan */ + } + int32_t m = (ix & ~0u >> 9) | 1 << 23, sgn = ix; + sgn >>= 31; + m = (m ^ sgn) - sgn; + int32_t s = 143 - e; + if (__glibc_unlikely (s < 0)) + { + if (__glibc_unlikely (s < -6)) + return copysignf (0.0f, x); + int32_t iq = m << (-s - 1); + iq &= 127; + if (iq == 0 || iq == 64) + return copysignf (0.0f, x); + return S[iq]; + } + else if (__glibc_unlikely (s > 30)) + { + double z = x, z2 = z * z; + return z * (0x1.921fb54442d18p+1 + z2 * (-0x1.4abbce625be53p+2)); + } + int32_t si = 25 - s; + if (__glibc_unlikely (si >= 0 && (m << si) == 0)) + return copysignf (0.0f, x); + + int32_t k = m << (31 - s); + double z = k, z2 = z * z; + double fs = sn[0] + z2 * (sn[1] + z2 * sn[2]); + double fc = cn[0] + z2 * (cn[1] + z2 * cn[2]); + uint32_t iq = m >> s; + iq = (iq + 1) >> 1; + uint32_t is = iq & 127, ic = (iq + 32) & 127; + double ts = S[is], tc = S[ic]; + double r = ts + (ts * z2) * fc + (tc * z) * fs; + return r; +} +libm_alias_float (__sinpi, sinpi) diff --git a/sysdeps/ieee754/flt-32/s_tanpif.c b/sysdeps/ieee754/flt-32/s_tanpif.c new file mode 100644 index 0000000..a268549 --- /dev/null +++ b/sysdeps/ieee754/flt-32/s_tanpif.c @@ -0,0 +1,90 @@ +/* Correctly-rounded tangent of binary32 value for angles in half-revolutions + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (src/binary32/tanpi/tanpif.c, revision 3bbf907). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +#include <stdint.h> +#include <errno.h> +#include <libm-alias-float.h> +#include "math_config.h" + +float +__tanpif (float x) +{ + uint32_t ix = asuint (x); + uint32_t e = ix & (0xff << 23); + if (__glibc_unlikely (e > (150 << 23))) /* |x| > 2^23 */ + { + if (e == (0xff << 23)) /* x = nan or inf */ + { + if (!(ix << 9)) /* x = inf */ + return __math_invalidf (x); + return x + x; /* x = nan */ + } + return copysign (0.0f, x); + } + float x4 = 4.0f * x; + float nx4 = roundevenf_finite (x4); + float dx4 = x4 - nx4; + float ni = roundevenf_finite (x); + float zf = x - ni; + if (__glibc_unlikely (dx4 == 0.0f)) /* 4*x integer */ + { + int k = x4; + if (k & 1) + return copysignf (1.0f, zf); /* x = 1/4 mod 1/2 */ + k &= 7; + if (k == 0) + return copysignf (0.0f, x); /* x = 0 mod 2 */ + if (k == 4) + return -copysignf (0.0f, x); /* x = 1 mod 2 */ + __set_errno (ERANGE); + if (k == 2) + return 1.0f / 0.0f; /* x = 1/2 mod 2 */ + if (k == 6) + return -1.0f / 0.0f; /* x = -1/2 mod 2 */ + } + ix = asuint (zf); + uint32_t a = ix & (~0u >> 1); + /* x=0x1.267004p-2 is not correctly rounded for RNDZ/RNDD by the code below */ + if (__glibc_unlikely (a == 0x3e933802u)) + return copysignf (0x1.44cfbap+0f, zf) + copysignf (0x1p-25f, zf); + /* x=-0x1.e4cd0ap-14 is not correctly rounded for RNDU by the code below */ + if (__glibc_unlikely (a == 0x38f26685u)) + return copysignf (0x1.7cc304p-12, zf) + copysignf (0x1p-37f, zf); + + double z = zf, z2 = z * z; + + static const double cn[] = { 0x1.921fb54442d19p-1, -0x1.1f458b3e1f8d6p-2, + 0x1.68a34bd0b8f6ap-6, -0x1.e4866f7a25f99p-13 }; + static const double cd[] = { 0x1p+0, -0x1.4b4b98d2df3a7p-1, + 0x1.8e9926d2bb901p-4, -0x1.a6f77fd847eep-9 }; + double z4 + = z2 * z2, + r = (z - z * z2) * ((cn[0] + z2 * cn[1]) + z4 * (cn[2] + z2 * cn[3])) + / (((cd[0] + z2 * cd[1]) + z4 * (cd[2] + z2 * cd[3])) * (0.25 - z2)); + return r; +} +libm_alias_float (__tanpi, tanpi) diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps index f8bf089..b4a6a3b 100644 --- a/sysdeps/loongarch/lp64/libm-test-ulps +++ b/sysdeps/loongarch/lp64/libm-test-ulps @@ -35,22 +35,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -87,22 +83,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -126,22 +118,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_downward": @@ -174,22 +162,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -717,22 +701,18 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1409,22 +1389,18 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1461,22 +1437,18 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/mach/hurd/bind.c b/sysdeps/mach/hurd/bind.c index bb408af..cdcdd30 100644 --- a/sysdeps/mach/hurd/bind.c +++ b/sysdeps/mach/hurd/bind.c @@ -66,7 +66,7 @@ __bind (int fd, __CONST_SOCKADDR_ARG addrarg, socklen_t len) if (! err) { enum retry_type doretry; - char retryname[1024]; + string_t retryname; /* Get a port to the ifsock translator. */ err = __dir_lookup (node, "", 0, 0, &doretry, retryname, &ifsock); if (! err && (doretry != FS_RETRY_NORMAL || retryname[0] != '\0')) diff --git a/sysdeps/mach/hurd/dl-sysdep.c b/sysdeps/mach/hurd/dl-sysdep.c index 0ddb220..173499c 100644 --- a/sysdeps/mach/hurd/dl-sysdep.c +++ b/sysdeps/mach/hurd/dl-sysdep.c @@ -267,7 +267,7 @@ open_file (const char *file_name, int flags, mach_port_t *port, struct stat64 *stat) { enum retry_type doretry; - char retryname[1024]; /* XXX string_t LOSES! */ + string_t retryname; file_t startdir; error_t err; diff --git a/sysdeps/mach/hurd/fexecve.c b/sysdeps/mach/hurd/fexecve.c index 906cd86..cd3bd2b 100644 --- a/sysdeps/mach/hurd/fexecve.c +++ b/sysdeps/mach/hurd/fexecve.c @@ -28,7 +28,7 @@ fexecve (int fd, char *const argv[], char *const envp[]) file_t file; error_t err; enum retry_type doretry; - char retryname[1024]; + string_t retryname; err = HURD_DPORT_USE (fd, __dir_lookup (port, "", O_EXEC, 0, &doretry, retryname, &file)); diff --git a/sysdeps/mach/hurd/getlogin.c b/sysdeps/mach/hurd/getlogin.c index b3427b0..d73c034 100644 --- a/sysdeps/mach/hurd/getlogin.c +++ b/sysdeps/mach/hurd/getlogin.c @@ -25,7 +25,7 @@ char * getlogin (void) { - static char login[1024]; /* XXX */ + static string_t login; error_t err; if (err = __USEPORT (PROC, __proc_getlogin (port, login))) diff --git a/sysdeps/mach/hurd/readlinkat.c b/sysdeps/mach/hurd/readlinkat.c index e56764d..dbbc528 100644 --- a/sysdeps/mach/hurd/readlinkat.c +++ b/sysdeps/mach/hurd/readlinkat.c @@ -32,7 +32,7 @@ __readlinkat (int fd, const char *file_name, char *buf, size_t len) file_t file_stat; struct stat64 st; enum retry_type doretry; - char retryname[1024]; + string_t retryname; file_t file; char *rbuf = buf; mach_msg_type_number_t nread = len; diff --git a/sysdeps/mach/hurd/ttyname.c b/sysdeps/mach/hurd/ttyname.c index 62c3bf7..1353fe8 100644 --- a/sysdeps/mach/hurd/ttyname.c +++ b/sysdeps/mach/hurd/ttyname.c @@ -27,7 +27,7 @@ char * ttyname (int fd) { error_t err; - static char nodename[1024]; /* XXX */ + static string_t nodename; if (err = HURD_DPORT_USE (fd, __term_get_nodename (port, nodename))) { diff --git a/sysdeps/mach/hurd/ttyname_r.c b/sysdeps/mach/hurd/ttyname_r.c index f0e3f65..836ec6a 100644 --- a/sysdeps/mach/hurd/ttyname_r.c +++ b/sysdeps/mach/hurd/ttyname_r.c @@ -28,7 +28,7 @@ int __ttyname_r (int fd, char *buf, size_t buflen) { error_t err; - char nodename[1024]; /* XXX */ + string_t nodename; size_t len; if (err = HURD_DPORT_USE (fd, __term_get_nodename (port, nodename))) diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps index 98079e0..3b1c725 100644 --- a/sysdeps/mips/mips64/libm-test-ulps +++ b/sysdeps/mips/mips64/libm-test-ulps @@ -35,22 +35,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -87,22 +83,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -126,22 +118,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_downward": @@ -174,22 +162,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -717,22 +701,18 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1422,22 +1402,18 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1473,22 +1449,18 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps index b0de024..accf309 100644 --- a/sysdeps/or1k/fpu/libm-test-ulps +++ b/sysdeps/or1k/fpu/libm-test-ulps @@ -27,19 +27,15 @@ double: 2 Function: "acospi": double: 2 -float: 1 Function: "acospi_downward": double: 1 -float: 2 Function: "acospi_towardzero": double: 1 -float: 2 Function: "acospi_upward": double: 2 -float: 1 Function: "asin": double: 1 @@ -67,19 +63,15 @@ double: 3 Function: "asinpi": double: 1 -float: 1 Function: "asinpi_downward": double: 1 -float: 1 Function: "asinpi_towardzero": double: 1 -float: 2 Function: "asinpi_upward": double: 2 -float: 2 Function: "atan": double: 1 @@ -95,19 +87,15 @@ double: 8 Function: "atan2pi": double: 1 -float: 1 Function: "atan2pi_downward": double: 1 -float: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 Function: "atan2pi_upward": double: 1 -float: 2 Function: "atan_downward": double: 1 @@ -132,19 +120,15 @@ double: 3 Function: "atanpi": double: 2 -float: 1 Function: "atanpi_downward": double: 1 -float: 2 Function: "atanpi_towardzero": double: 1 -float: 2 Function: "atanpi_upward": double: 1 -float: 1 Function: "cabs": double: 1 @@ -561,19 +545,15 @@ double: 2 Function: "cospi": double: 2 -float: 2 Function: "cospi_downward": double: 1 -float: 2 Function: "cospi_towardzero": double: 1 -float: 1 Function: "cospi_upward": double: 1 -float: 2 Function: Real part of "cpow": double: 2 @@ -1102,19 +1082,15 @@ double: 3 Function: "sinpi": double: 2 -float: 2 Function: "sinpi_downward": double: 2 -float: 2 Function: "sinpi_towardzero": double: 2 -float: 1 Function: "sinpi_upward": double: 2 -float: 3 Function: "tan_downward": double: 1 @@ -1139,19 +1115,15 @@ double: 3 Function: "tanpi": double: 3 -float: 3 Function: "tanpi_downward": double: 2 -float: 3 Function: "tanpi_towardzero": double: 2 -float: 3 Function: "tanpi_upward": double: 2 -float: 4 Function: "tgamma": double: 9 diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps index aa047f3..f3d5604 100644 --- a/sysdeps/or1k/nofpu/libm-test-ulps +++ b/sysdeps/or1k/nofpu/libm-test-ulps @@ -27,7 +27,6 @@ double: 2 Function: "acospi": double: 2 -float: 1 Function: "asin": double: 1 @@ -55,7 +54,6 @@ double: 3 Function: "asinpi": double: 1 -float: 1 Function: "atan": double: 1 @@ -71,7 +69,6 @@ double: 8 Function: "atan2pi": double: 1 -float: 1 Function: "atan_downward": double: 1 @@ -96,7 +93,6 @@ double: 3 Function: "atanpi": double: 2 -float: 1 Function: "cabs": double: 1 @@ -513,7 +509,6 @@ double: 2 Function: "cospi": double: 2 -float: 2 Function: Real part of "cpow": double: 2 @@ -996,7 +991,6 @@ double: 3 Function: "sinpi": double: 2 -float: 2 Function: "tan_downward": double: 1 @@ -1021,7 +1015,6 @@ double: 3 Function: "tanpi": double: 3 -float: 3 Function: "tgamma": double: 9 diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps index cf3dec3..404d3af 100644 --- a/sysdeps/powerpc/fpu/libm-test-ulps +++ b/sysdeps/powerpc/fpu/libm-test-ulps @@ -43,25 +43,21 @@ ldouble: 4 Function: "acospi": double: 2 -float: 1 float128: 1 ldouble: 1 Function: "acospi_downward": double: 1 -float: 2 float128: 1 ldouble: 4 Function: "acospi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 4 Function: "acospi_upward": double: 2 -float: 1 float128: 2 ldouble: 4 @@ -111,25 +107,21 @@ ldouble: 7 Function: "asinpi": double: 1 -float: 1 float128: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 float128: 2 ldouble: 4 Function: "asinpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 4 Function: "asinpi_upward": double: 2 -float: 2 float128: 2 ldouble: 4 @@ -159,25 +151,21 @@ ldouble: 3 Function: "atan2pi": double: 1 -float: 1 float128: 3 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 float128: 2 ldouble: 4 Function: "atan2pi_towardzero": double: 1 -float: 2 float128: 2 ldouble: 5 Function: "atan2pi_upward": double: 1 -float: 2 float128: 2 ldouble: 4 @@ -218,25 +206,21 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 float128: 2 ldouble: 1 Function: "atanpi_downward": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "atanpi_upward": double: 1 -float: 1 float128: 2 ldouble: 5 @@ -874,25 +858,21 @@ ldouble: 2 Function: "cospi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 float128: 2 ldouble: 4 Function: "cospi_towardzero": double: 1 -float: 1 float128: 2 ldouble: 6 Function: "cospi_upward": double: 1 -float: 2 float128: 2 ldouble: 6 @@ -1799,25 +1779,21 @@ ldouble: 6 Function: "sinpi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 float128: 2 ldouble: 5 Function: "sinpi_towardzero": double: 2 -float: 1 float128: 2 ldouble: 8 Function: "sinpi_upward": double: 2 -float: 3 float128: 2 ldouble: 8 @@ -1881,25 +1857,21 @@ ldouble: 6 Function: "tanpi": double: 3 -float: 3 float128: 2 ldouble: 2 Function: "tanpi_downward": double: 2 -float: 3 float128: 4 ldouble: 8 Function: "tanpi_towardzero": double: 2 -float: 3 float128: 4 ldouble: 8 Function: "tanpi_upward": double: 2 -float: 4 float128: 4 ldouble: 8 diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h index aace1a8..7065d27 100644 --- a/sysdeps/powerpc/fpu/math_private.h +++ b/sysdeps/powerpc/fpu/math_private.h @@ -62,6 +62,7 @@ __ieee754_sqrtf128 (_Float128 __x) #ifdef _ARCH_PWR6 /* ISA 2.03 provides frin/round() and cntlzw/ctznll(). */ # define ROUNDEVEN_INTRINSICS 0 +# define ROUNDEVENF_INTRINSICS 0 #endif #endif /* _PPC_MATH_PRIVATE_H_ */ diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps index d971ee2..720250e 100644 --- a/sysdeps/riscv/nofpu/libm-test-ulps +++ b/sysdeps/riscv/nofpu/libm-test-ulps @@ -35,7 +35,6 @@ ldouble: 2 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -72,7 +71,6 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "atan": @@ -96,7 +94,6 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan_downward": @@ -129,7 +126,6 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "cabs": @@ -654,7 +650,6 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1276,7 +1271,6 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "tan": @@ -1312,7 +1306,6 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tgamma": diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps index 0f84906..ee5df22 100644 --- a/sysdeps/riscv/rvd/libm-test-ulps +++ b/sysdeps/riscv/rvd/libm-test-ulps @@ -35,22 +35,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -87,22 +83,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -126,22 +118,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_downward": @@ -178,22 +166,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -725,22 +709,18 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1421,22 +1401,18 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1476,22 +1452,18 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps index 76a1f3c..1491089 100644 --- a/sysdeps/s390/fpu/libm-test-ulps +++ b/sysdeps/s390/fpu/libm-test-ulps @@ -35,22 +35,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -87,22 +83,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -126,22 +118,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_downward": @@ -174,22 +162,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -717,22 +701,18 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1407,22 +1387,18 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1458,22 +1434,18 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps index 02a80c4..d894901 100644 --- a/sysdeps/sparc/fpu/libm-test-ulps +++ b/sysdeps/sparc/fpu/libm-test-ulps @@ -35,22 +35,18 @@ ldouble: 3 Function: "acospi": double: 2 -float: 1 ldouble: 2 Function: "acospi_downward": double: 1 -float: 2 ldouble: 1 Function: "acospi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "acospi_upward": double: 2 -float: 1 ldouble: 2 Function: "asin": @@ -87,22 +83,18 @@ ldouble: 4 Function: "asinpi": double: 1 -float: 1 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "asinpi_upward": double: 2 -float: 2 ldouble: 2 Function: "atan": @@ -126,22 +118,18 @@ ldouble: 2 Function: "atan2pi": double: 1 -float: 1 ldouble: 3 Function: "atan2pi_downward": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 2 ldouble: 2 Function: "atan_downward": @@ -174,22 +162,18 @@ ldouble: 4 Function: "atanpi": double: 2 -float: 1 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 ldouble: 1 Function: "atanpi_towardzero": double: 1 -float: 2 ldouble: 1 Function: "atanpi_upward": double: 1 -float: 1 ldouble: 2 Function: "cabs": @@ -717,22 +701,18 @@ ldouble: 3 Function: "cospi": double: 2 -float: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 ldouble: 2 Function: Real part of "cpow": @@ -1422,22 +1402,18 @@ ldouble: 4 Function: "sinpi": double: 2 -float: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 ldouble: 2 Function: "sinpi_towardzero": double: 2 -float: 1 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 ldouble: 2 Function: "tan": @@ -1473,22 +1449,18 @@ ldouble: 3 Function: "tanpi": double: 3 -float: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 ldouble: 4 Function: "tgamma": diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index e454a63..a4bd2ed 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -83,25 +83,21 @@ float: 2 Function: "acospi": double: 2 -float: 1 float128: 2 ldouble: 3 Function: "acospi_downward": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 3 Function: "acospi_upward": double: 2 -float: 1 float128: 2 ldouble: 2 @@ -184,25 +180,21 @@ float: 1 Function: "asinpi": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_downward": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "asinpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "asinpi_upward": double: 2 -float: 2 float128: 2 ldouble: 2 @@ -252,25 +244,21 @@ float: 2 Function: "atan2pi": double: 1 -float: 1 float128: 3 ldouble: 2 Function: "atan2pi_downward": double: 1 -float: 3 float128: 2 ldouble: 2 Function: "atan2pi_towardzero": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "atan2pi_upward": double: 1 -float: 3 float128: 2 ldouble: 2 @@ -351,25 +339,21 @@ float: 1 Function: "atanpi": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "atanpi_downward": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_towardzero": double: 1 -float: 2 float128: 1 ldouble: 2 Function: "atanpi_upward": double: 1 -float: 1 float128: 2 ldouble: 1 @@ -1066,25 +1050,21 @@ float: 2 Function: "cospi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "cospi_downward": double: 1 -float: 2 float128: 2 ldouble: 2 Function: "cospi_towardzero": double: 1 -float: 1 float128: 2 ldouble: 2 Function: "cospi_upward": double: 1 -float: 2 float128: 2 ldouble: 2 @@ -2217,25 +2197,21 @@ float: 1 Function: "sinpi": double: 2 -float: 2 float128: 2 ldouble: 2 Function: "sinpi_downward": double: 2 -float: 2 float128: 2 ldouble: 3 Function: "sinpi_towardzero": double: 2 -float: 1 float128: 2 ldouble: 2 Function: "sinpi_upward": double: 2 -float: 3 float128: 2 ldouble: 2 @@ -2316,25 +2292,21 @@ double: 1 Function: "tanpi": double: 3 -float: 3 float128: 3 ldouble: 3 Function: "tanpi_downward": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_towardzero": double: 2 -float: 3 float128: 4 ldouble: 4 Function: "tanpi_upward": double: 2 -float: 4 float128: 4 ldouble: 4 |