Remove slow paths from pow

Remove the slow paths from pow. Like several other double precision math functions, pow is exactly rounded. This is not required from math functions and causes major overheads as it requires multiple fallbacks using higher precision arithmetic if a result is close to 0.5ULP. Ridiculous slowdowns of up to 100000x have been reported when the highest precision path triggers. All GLIBC math tests pass on AArch64 and x64 (with ULP of pow set to 1). The worst case error is ~0.506ULP. A simple test over a few hundred million values shows pow is 10% faster on average. This fixes BZ #13932. [BZ #13932] * sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove. * benchtests/pow-inputs: Update comment for slow path cases. * manual/probes.texi (slowpow_p10): Delete removed probe. (slowpow_p10): Likewise. * math/Makefile: Remove halfulp.c and slowpow.c. * sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1. * sysdeps/generic/math_private.h (__exp1): Remove error argument. (__halfulp): Remove. (__slowpow): Remove. * sysdeps/i386/fpu/halfulp.c: Delete file. * sysdeps/i386/fpu/slowpow.c: Likewise. * sysdeps/ia64/fpu/halfulp.c: Likewise. * sysdeps/ia64/fpu/slowpow.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument, improve comments and add error analysis. * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis. (power1): Remove function: (log1): Remove error argument, add error analysis. (my_log2): Remove function. * sysdeps/ieee754/dbl-64/halfulp.c: Delete file. * sysdeps/ieee754/dbl-64/slowpow.c: Likewise. * sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise. * sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise. * sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c. * sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1. * sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c, slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c. * sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define. * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise. * sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file. * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise. * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
author: Wilco Dijkstra <wdijkstr@arm.com> 2018-02-12 10:42:42 +0000
committer: Wilco Dijkstra <wdijkstr@arm.com> 2018-02-12 10:47:09 +0000
commit: c3d466cba1692708a19c6ff829d0386c83a0c6e5 (patch)
tree: d01ce6103dc25d3b662898c3429b8b103b8d3155 /sysdeps/ieee754/dbl-64/e_exp.c
parent: 7bb087bd7bfe3616c4c0974a3f7352b593353ea5 (diff)
download: glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.zip
glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.gz
glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.bz2
1 files changed, 15 insertions, 27 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 3d2560c..7a9daa5 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -233,13 +233,10 @@ ret:
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-/* Compute e^(x+xx).  The routine also receives bound of error of previous
-   calculation.  If after computing exp the error exceeds the allowed bounds,
-   the routine returns a non-positive number.  Otherwise it returns the
-   computed result, which is always positive.  */
+/* Compute e^(x+xx).  */
 double
 SECTION
-__exp1 (double x, double xx, double error)
+__exp1 (double x, double xx)
 {
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
   mynumber junk1, junk2, binexp = {{0, 0}};
@@ -249,6 +246,7 @@ __exp1 (double x, double xx, double error)
   m = junk1.i[HIGH_HALF];
   n = m & hugeint;		/* no sign */
 
+  /* fabs (x) > 5.551112e-17 and fabs (x) < 7.080010e+02.  */
   if (n > smallint && n < bigint)
     {
       y = x * log2e.x + three51.x;
@@ -276,11 +274,9 @@ __exp1 (double x, double xx, double error)
 
       rem = (bet + bet * eps) + al * eps;
       res = al + rem;
-      cor = (al - res) + rem;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x;
-      else
-	return -10.0;
+      /* Maximum relative error before rounding is 8.8e-22 (69.9 bits).
+	 Maximum ULP error is 0.500008.  */
+      return res * binexp.x;
     }
 
   if (n <= smallint)
@@ -318,6 +314,7 @@ __exp1 (double x, double xx, double error)
   cor = (al - res) + rem;
   if (m >> 31)
     {
+      /* x < 0.  */
       ex = junk1.i[LOW_HALF];
       if (res < 1.0)
 	{
@@ -328,34 +325,25 @@ __exp1 (double x, double xx, double error)
       if (ex >= -1022)
 	{
 	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
-	  if (res == (res + cor * (1.0 + error + err_1)))
-	    return res * binexp.x;
-	  else
-	    return -10.0;
+	  /* Maximum ULP error is 0.500008.  */
+	  return res * binexp.x;
 	}
+      /* Denormal case - ex < -1022.  */
       ex = -(1022 + ex);
       binexp.i[HIGH_HALF] = (1023 - ex) << 20;
       res *= binexp.x;
       cor *= binexp.x;
-      eps = 1.00000000001 + (error + err_1) * binexp.x;
       t = 1.0 + res;
       y = ((1.0 - t) + res) + cor;
       res = t + y;
-      cor = (t - res) + y;
-      if (res == (res + eps * cor))
-	{
-	  binexp.i[HIGH_HALF] = 0x00100000;
-	  return (res - 1.0) * binexp.x;
-	}
-      else
-	return -10.0;
+      binexp.i[HIGH_HALF] = 0x00100000;
+      /* Maximum ULP error is 0.500004.  */
+      return (res - 1.0) * binexp.x;
     }
   else
     {
       binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x * t256.x;
-      else
-	return -10.0;
+      /* Maximum ULP error is 0.500008.  */
+      return res * binexp.x * t256.x;
     }
 }
author	Wilco Dijkstra <wdijkstr@arm.com>	2018-02-12 10:42:42 +0000
committer	Wilco Dijkstra <wdijkstr@arm.com>	2018-02-12 10:47:09 +0000
commit	c3d466cba1692708a19c6ff829d0386c83a0c6e5 (patch)
tree	d01ce6103dc25d3b662898c3429b8b103b8d3155 /sysdeps/ieee754/dbl-64/e_exp.c
parent	7bb087bd7bfe3616c4c0974a3f7352b593353ea5 (diff)
download	glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.zip glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.gz glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.bz2