Add new pow implementation

The algorithm is exp(y * log(x)), where log(x) is computed with about 1.3*2^-68 relative error (1.5*2^-68 without fma), returning the result in two doubles, and the exp part uses the same algorithm (and lookup tables) as exp, but takes the input as two doubles and a sign (to handle negative bases with odd integer exponent). The __exp1 internal symbol is no longer necessary. There is separate code path when fma is not available but the worst case error is about 0.54 ULP in both cases. The lookup table and consts for log are 4168 bytes. The .rodata+.text is decreased by 37908 bytes on aarch64. The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: pow thruput: 2.40x in [0.01 11.1]x[0.01 11.1] pow latency: 1.84x in [0.01 11.1]x[0.01 11.1] Tested on aarch64-linux-gnu (defined __FP_FAST_FMA, TOINT_INTRINSICS) and arm-linux-gnueabihf (!defined __FP_FAST_FMA, !TOINT_INTRINSICS) and x86_64-linux-gnu (!defined __FP_FAST_FMA, !TOINT_INTRINSICS) and powerpc64le-linux-gnu (defined __FP_FAST_FMA, !TOINT_INTRINSICS) targets. * NEWS: Mention pow improvements. * math/Makefile (type-double-routines): Add e_pow_log_data. * sysdeps/generic/math_private.h (__exp1): Remove. * sysdeps/i386/fpu/e_pow_log_data.c: New file. * sysdeps/ia64/fpu/e_pow_log_data.c: New file. * sysdeps/ieee754/dbl-64/Makefile (CFLAGS-e_pow.c): Allow fma contraction. * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove. (exp_inline): Remove. (__ieee754_exp): Only single double input is handled. * sysdeps/ieee754/dbl-64/e_pow.c: Rewrite. * sysdeps/ieee754/dbl-64/e_pow_log_data.c: New file. * sysdeps/ieee754/dbl-64/math_config.h (issignaling_inline): Define. (__pow_log_data): Define. * sysdeps/ieee754/dbl-64/upow.h: Remove. * sysdeps/ieee754/dbl-64/upow.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_pow_log_data.c: New file. * sysdeps/x86_64/fpu/multiarch/Makefile (CFLAGS-e_pow-fma.c): Allow fma contraction. (CFLAGS-e_pow-fma4.c): Likewise.
author: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-06-13 17:57:20 +0100
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-09-19 10:04:51 +0100
commit: 424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448 (patch)
tree: 52fbd60de3d3b1e99208b3018cf79ee8a230a878 /sysdeps/ieee754/dbl-64/e_exp.c
parent: dab9c3488e86d5304f3e4b778933760374494a82 (diff)
download: glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.zip
glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.gz
glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.bz2
1 files changed, 8 insertions, 27 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 209f20b..37fdafc 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -85,10 +85,13 @@ top12 (double x)
   return asuint64 (x) >> 52;
 }
 
-/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
-   If hastail is 0 then xtail is assumed to be 0 too.  */
-static inline double
-exp_inline (double x, double xtail, int hastail)
+#ifndef SECTION
+# define SECTION
+#endif
+
+double
+SECTION
+__ieee754_exp (double x)
 {
   uint32_t abstop;
   uint64_t ki, idx, top, sbits;
@@ -131,9 +134,6 @@ exp_inline (double x, double xtail, int hastail)
   kd -= Shift;
 #endif
   r = x + kd * NegLn2hiN + kd * NegLn2loN;
-  /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
-  if (hastail)
-    r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
   idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
@@ -149,29 +149,10 @@ exp_inline (double x, double xtail, int hastail)
   if (__glibc_unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
   scale = asdouble (sbits);
-  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+  /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return scale + scale * tmp;
 }
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-double
-SECTION
-__ieee754_exp (double x)
-{
-  return exp_inline (x, 0, 0);
-}
 #ifndef __ieee754_exp
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
-
-/* Compute e^(x+xx).  */
-double
-SECTION
-__exp1 (double x, double xx)
-{
-  return exp_inline (x, xx, 1);
-}
author	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-06-13 17:57:20 +0100
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-09-19 10:04:51 +0100
commit	424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448 (patch)
tree	52fbd60de3d3b1e99208b3018cf79ee8a230a878 /sysdeps/ieee754/dbl-64/e_exp.c
parent	dab9c3488e86d5304f3e4b778933760374494a82 (diff)
download	glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.zip glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.gz glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.bz2