diff options
author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-11-11 17:38:44 -0300 |
---|---|---|
committer | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2024-11-26 15:07:57 -0300 |
commit | 3b1c5a539b7b8cb833f22012d1a95a4847594747 (patch) | |
tree | 6f8077c02fba9db87c69cfe49dc20354b5011bfd /sysdeps/ieee754 | |
parent | 32445b6dd2d23bd303dbb66fa3c7d0179c19dc76 (diff) | |
download | glibc-3b1c5a539b7b8cb833f22012d1a95a4847594747.zip glibc-3b1c5a539b7b8cb833f22012d1a95a4847594747.tar.gz glibc-3b1c5a539b7b8cb833f22012d1a95a4847594747.tar.bz2 |
math: Add internal roundeven_finite
Some CORE-MATH routines uses roundeven and most of ISA do not have
an specific instruction for the operation. In this case, the call
will be routed to generic implementation.
However, if the ISA does support round() and ctz() there is a better
alternative (as used by CORE-MATH).
This patch adds such optimization and also enables it on powerpc.
On a power10 it shows the following improvement:
expm1f master patched improvement
latency 9.8574 7.0139 28.85%
reciprocal-throughput 4.3742 2.6592 39.21%
Checked on powerpc64le-linux-gnu and aarch64-linux-gnu.
Reviewed-by: DJ Delorie <dj@redhat.com>
Diffstat (limited to 'sysdeps/ieee754')
-rw-r--r-- | sysdeps/ieee754/flt-32/e_gammaf_r.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/math_config.h | 27 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/s_expm1f.c | 2 | ||||
-rw-r--r-- | sysdeps/ieee754/flt-32/s_tanf.c | 2 |
4 files changed, 30 insertions, 3 deletions
diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c index 6b1f95d..66e8cae 100644 --- a/sysdeps/ieee754/flt-32/e_gammaf_r.c +++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c @@ -140,7 +140,7 @@ __ieee754_gammaf_r (float x, int *signgamp) }; double m = z - 0x1.7p+1; - double i = roundeven (m); + double i = roundeven_finite (m); double step = copysign (1.0, i); double d = m - i, d2 = d * d, d4 = d2 * d2, d8 = d4 * d4; double f = (c[0] + d * c[1]) + d2 * (c[2] + d * c[3]) diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h index dc07ebd..b30a03e 100644 --- a/sysdeps/ieee754/flt-32/math_config.h +++ b/sysdeps/ieee754/flt-32/math_config.h @@ -57,6 +57,33 @@ static inline int32_t converttoint (double_t x); #endif +#ifndef ROUNDEVEN_INTRINSICS +/* When set, roundeven_finite will route to the internal roundeven function. */ +# define ROUNDEVEN_INTRINSICS 1 +#endif + +/* Round x to nearest integer value in floating-point format, rounding halfway + cases to even. If the input is non finite the result is unspecified. */ +static inline double +roundeven_finite (double x) +{ + if (!isfinite (x)) + __builtin_unreachable (); +#if ROUNDEVEN_INTRINSICS + return roundeven (x); +#else + double y = round (x); + if (fabs (x - y) == 0.5) + { + union { double f; uint64_t i; } u = {y}; + union { double f; uint64_t i; } v = {y - copysign (1.0, x)}; + if (__builtin_ctzll (v.i) > __builtin_ctzll (u.i)) + y = v.f; + } + return y; +#endif +} + static inline uint32_t asuint (float f) { diff --git a/sysdeps/ieee754/flt-32/s_expm1f.c b/sysdeps/ieee754/flt-32/s_expm1f.c index edd7c9a..a36e578 100644 --- a/sysdeps/ieee754/flt-32/s_expm1f.c +++ b/sysdeps/ieee754/flt-32/s_expm1f.c @@ -95,7 +95,7 @@ __expm1f (float x) return __math_oflowf (0); } double a = iln2 * z; - double ia = roundeven (a); + double ia = roundeven_finite (a); double h = a - ia; double h2 = h * h; uint64_t u = asuint64 (ia + big); diff --git a/sysdeps/ieee754/flt-32/s_tanf.c b/sysdeps/ieee754/flt-32/s_tanf.c index ff63e72..dfe56fc 100644 --- a/sysdeps/ieee754/flt-32/s_tanf.c +++ b/sysdeps/ieee754/flt-32/s_tanf.c @@ -38,7 +38,7 @@ rltl (float z, int *q) double x = z; double idl = -0x1.b1bbead603d8bp-32 * x; double idh = 0x1.45f306ep-1 * x; - double id = roundeven (idh); + double id = roundeven_finite (idh); *q = (int64_t) id; return (idh - id) + idl; } |