26 files changed, 90 insertions, 525 deletions
diff --git a/ChangeLog b/ChangeLog
index 89a0779..9efcaca 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,40 @@
+2018-02-12  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	[BZ #13932]
+	* sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove.
+	* benchtests/pow-inputs: Update comment for slow path cases.
+	* manual/probes.texi (slowpow_p10): Delete removed probe.
+	(slowpow_p10): Likewise.
+	* math/Makefile: Remove halfulp.c and slowpow.c.
+	* sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1.
+	* sysdeps/generic/math_private.h (__exp1): Remove error argument.
+	(__halfulp): Remove.
+	(__slowpow): Remove.
+	* sysdeps/i386/fpu/halfulp.c: Delete file.
+	* sysdeps/i386/fpu/slowpow.c: Likewise.
+	* sysdeps/ia64/fpu/halfulp.c: Likewise.
+	* sysdeps/ia64/fpu/slowpow.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument,
+	improve comments and add error analysis.
+	* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis.
+	(power1): Remove function:
+	(log1): Remove error argument, add error analysis.
+	(my_log2): Remove function.
+	* sysdeps/ieee754/dbl-64/halfulp.c: Delete file.
+	* sysdeps/ieee754/dbl-64/slowpow.c: Likewise.
+	* sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise.
+	* sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise.
+	* sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1.
+	* sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c,
+	slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c.
+	* sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define.
+	* sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise.
+	* sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file.
+	* sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
+
 2018-02-11  Samuel Thibault  <samuel.thibault@ens-lyon.org>
 
 	* nscd/connections.c (RWLOCK_INITIALIZER): Define to
diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs
index 78f8ac7..4a51aac 100644
--- a/benchtests/pow-inputs
+++ b/benchtests/pow-inputs
@@ -302,8 +302,7 @@
 0x1.c004d2256a5b8p402, -0x1.a01df480fdcb7p98
 0x1.52b9d41aaa1e9p-589, -0x1.292cb15f1459dp46
 -0x1.ea9ca6fa0919ep-279, -0x1.601e44b6a588cp40
-# pow slow path at 240 bits
-# Implemented in sysdeps/ieee754/dbl-64/slowpow.c
+# old pow slow path at 240 bits
 ## name: 240bits
 0x1.01fcd33493ea3p596, -0x1.724bd4e887783p-14
 0x1.032ff59ab34fdp-540, -0x1.61e3632080b87p-24
@@ -405,8 +404,7 @@
 0x1.fae913d4f952ep-809, -0x1.4b649402fce63p-6
 0x1.fe6d725408f24p484, -0x1.25f4f6441d2e4p-12
 0x1.ff6393f9150ccp-718, 0x1.a0cb50a9bf2f3p-31
-# pow slowest path at 768 bits
-# Implemented in sysdeps/ieee754/dbl-64/slowpow.c
+# old pow slowest path at 768 bits
 ## name: 768bits
 1.0000000000000020, 1.5
 0x1.006777b4b61dep843, -0x1.67e3145491872p-1
diff --git a/manual/probes.texi b/manual/probes.texi
index e99b7f3..9584838 100644
--- a/manual/probes.texi
+++ b/manual/probes.texi
@@ -272,22 +272,6 @@ input that results in multiple precision computation with precision
 computed output.
 @end deftp
 
-@deftp Probe slowpow_p10 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4})
-This probe is triggered when the @code{pow} function is called with
-inputs that result in multiple precision computation with precision
-10.  Arguments @var{$arg1} and @var{$arg2} are the input values,
-@code{$arg3} is the value computed in the fast phase of the algorithm
-and @code{$arg4} is the final accurate value.
-@end deftp
-
-@deftp Probe slowpow_p32 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4})
-This probe is triggered when the @code{pow} function is called with an
-input that results in multiple precision computation with precision
-32.  Arguments @var{$arg1} and @var{$arg2} are the input values,
-@code{$arg3} is the value computed in the fast phase of the algorithm
-and @code{$arg4} is the final accurate value.
-@end deftp
-
 @deftp Probe slowatan2 (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4})
 This probe is triggered when the @code{atan2} function is called with
 an input that results in multiple precision computation.  Argument
diff --git a/math/Makefile b/math/Makefile
index 718b06d..8152011 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -123,9 +123,9 @@ type-ldouble-yes := ldouble
 
 # double support
 type-double-suffix :=
-type-double-routines := branred doasin dosincos halfulp mpa mpatan2	\
+type-double-routines := branred doasin dosincos mpa mpatan2	\
 		       mpatan mpexp mplog mpsqrt mptan sincos32 slowexp	\
-		       slowpow sincostab k_rem_pio2
+		       sincostab k_rem_pio2
 
 # float support
 type-float-suffix := f
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 5603f2e..1f46980 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1938,7 +1938,9 @@ ildouble: 1
 ldouble: 1
 
 Function: "pow":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
 ildouble: 2
 ldouble: 2
diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index 0a35cb3..07c97fa 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -250,20 +250,18 @@ fabsf128 (_Float128 x)
 
 
 /* Prototypes for functions of the IBM Accurate Mathematical Library.  */
-extern double __exp1 (double __x, double __xx, double __error);
+extern double __exp1 (double __x, double __xx);
 extern double __sin (double __x);
 extern double __cos (double __x);
 extern int __branred (double __x, double *__a, double *__aa);
 extern void __doasin (double __x, double __dx, double __v[]);
 extern void __dubsin (double __x, double __dx, double __v[]);
 extern void __dubcos (double __x, double __dx, double __v[]);
-extern double __halfulp (double __x, double __y);
 extern double __sin32 (double __x, double __res, double __res1);
 extern double __cos32 (double __x, double __res, double __res1);
 extern double __mpsin (double __x, double __dx, bool __range_reduce);
 extern double __mpcos (double __x, double __dx, bool __range_reduce);
 extern double __slowexp (double __x);
-extern double __slowpow (double __x, double __y, double __z);
 extern void __docos (double __x, double __dx, double __v[]);
 
 #ifndef math_opt_barrier
diff --git a/sysdeps/i386/fpu/halfulp.c b/sysdeps/i386/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/i386/fpu/slowpow.c b/sysdeps/i386/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/ia64/fpu/halfulp.c b/sysdeps/ia64/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/ia64/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/ia64/fpu/slowpow.c b/sysdeps/ia64/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/ia64/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 3d2560c..7a9daa5 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -233,13 +233,10 @@ ret:
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-/* Compute e^(x+xx).  The routine also receives bound of error of previous
-   calculation.  If after computing exp the error exceeds the allowed bounds,
-   the routine returns a non-positive number.  Otherwise it returns the
-   computed result, which is always positive.  */
+/* Compute e^(x+xx).  */
 double
 SECTION
-__exp1 (double x, double xx, double error)
+__exp1 (double x, double xx)
 {
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
   mynumber junk1, junk2, binexp = {{0, 0}};
@@ -249,6 +246,7 @@ __exp1 (double x, double xx, double error)
   m = junk1.i[HIGH_HALF];
   n = m & hugeint;		/* no sign */
 
+  /* fabs (x) > 5.551112e-17 and fabs (x) < 7.080010e+02.  */
   if (n > smallint && n < bigint)
     {
       y = x * log2e.x + three51.x;
@@ -276,11 +274,9 @@ __exp1 (double x, double xx, double error)
 
       rem = (bet + bet * eps) + al * eps;
       res = al + rem;
-      cor = (al - res) + rem;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x;
-      else
-	return -10.0;
+      /* Maximum relative error before rounding is 8.8e-22 (69.9 bits).
+	 Maximum ULP error is 0.500008.  */
+      return res * binexp.x;
     }
 
   if (n <= smallint)
@@ -318,6 +314,7 @@ __exp1 (double x, double xx, double error)
   cor = (al - res) + rem;
   if (m >> 31)
     {
+      /* x < 0.  */
       ex = junk1.i[LOW_HALF];
       if (res < 1.0)
 	{
@@ -328,34 +325,25 @@ __exp1 (double x, double xx, double error)
       if (ex >= -1022)
 	{
 	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
-	  if (res == (res + cor * (1.0 + error + err_1)))
-	    return res * binexp.x;
-	  else
-	    return -10.0;
+	  /* Maximum ULP error is 0.500008.  */
+	  return res * binexp.x;
 	}
+      /* Denormal case - ex < -1022.  */
       ex = -(1022 + ex);
       binexp.i[HIGH_HALF] = (1023 - ex) << 20;
       res *= binexp.x;
       cor *= binexp.x;
-      eps = 1.00000000001 + (error + err_1) * binexp.x;
       t = 1.0 + res;
       y = ((1.0 - t) + res) + cor;
       res = t + y;
-      cor = (t - res) + y;
-      if (res == (res + eps * cor))
-	{
-	  binexp.i[HIGH_HALF] = 0x00100000;
-	  return (res - 1.0) * binexp.x;
-	}
-      else
-	return -10.0;
+      binexp.i[HIGH_HALF] = 0x00100000;
+      /* Maximum ULP error is 0.500004.  */
+      return (res - 1.0) * binexp.x;
     }
   else
     {
       binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
-      if (res == (res + cor * (1.0 + error + err_1)))
-	return res * binexp.x * t256.x;
-      else
-	return -10.0;
+      /* Maximum ULP error is 0.500008.  */
+      return res * binexp.x * t256.x;
     }
 }
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index f6e5fcd..542d03a 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -20,13 +20,9 @@
 /*  MODULE_NAME: upow.c                                                    */
 /*                                                                         */
 /*  FUNCTIONS: upow                                                        */
-/*             power1                                                      */
-/*             my_log2                                                     */
 /*             log1                                                        */
 /*             checkint                                                    */
 /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h                             */
-/*               halfulp.c mpexp.c mplog.c slowexp.c slowpow.c mpa.c       */
-/*                          uexp.c  upow.c				   */
 /*               root.tbl uexp.tbl upow.tbl                                */
 /* An ultimate power routine. Given two IEEE double machine numbers y,x    */
 /* it computes the correctly rounded (to nearest) value of x^y.            */
@@ -50,11 +46,8 @@
 
 static const double huge = 1.0e300, tiny = 1.0e-300;
 
-double __exp1 (double x, double xx, double error);
-static double log1 (double x, double *delta, double *error);
-static double my_log2 (double x, double *delta, double *error);
-double __slowpow (double x, double y, double z);
-static double power1 (double x, double y);
+double __exp1 (double x, double xx);
+static double log1 (double x, double *delta);
 static int checkint (double x);
 
 /* An ultimate power routine. Given two IEEE double machine numbers y, x it
@@ -63,7 +56,7 @@ double
 SECTION
 __ieee754_pow (double x, double y)
 {
-  double z, a, aa, error, t, a1, a2, y1, y2;
+  double z, a, aa, t, a1, a2, y1, y2;
   mynumber u, v;
   int k;
   int4 qx, qy;
@@ -100,7 +93,7 @@ __ieee754_pow (double x, double y)
 	   not matter if |y| <= 2**-64.  */
 	if (fabs (y) < 0x1p-64)
 	  y = y < 0 ? -0x1p-64 : 0x1p-64;
-	z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
+	z = log1 (x, &aa);	/* x^y  =e^(y log (X)) */
 	t = y * CN;
 	y1 = t - (t - y);
 	y2 = y - y1;
@@ -111,9 +104,16 @@ __ieee754_pow (double x, double y)
 	aa = y2 * a1 + y * a2;
 	a1 = a + aa;
 	a2 = (a - a1) + aa;
-	error = error * fabs (y);
-	t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
-	retval = (t > 0) ? t : power1 (x, y);
+
+	/* Maximum relative error RElog of log1 is 1.0e-21 (69.7 bits).
+	   Maximum relative error REexp of __exp1 is 8.8e-22 (69.9 bits).
+	   We actually compute exp ((1 + RElog) * log (x) * y) * (1 + REexp).
+	   Since RElog/REexp are tiny and log (x) * y is at most log (DBL_MAX),
+	   this is equivalent to pow (x, y) * (1 + 710 * RElog + REexp).
+	   So the relative error is 710 * 1.0e-21 + 8.8e-22 = 7.1e-19
+	   (60.2 bits).  The worst-case ULP error is 0.5064.  */
+
+	retval = __exp1 (a1, a2);
       }
 
       if (isinf (retval))
@@ -218,33 +218,11 @@ __ieee754_pow (double x, double y)
 strong_alias (__ieee754_pow, __pow_finite)
 #endif
 
-/* Compute x^y using more accurate but more slow log routine.  */
-static double
-SECTION
-power1 (double x, double y)
-{
-  double z, a, aa, error, t, a1, a2, y1, y2;
-  z = my_log2 (x, &aa, &error);
-  t = y * CN;
-  y1 = t - (t - y);
-  y2 = y - y1;
-  t = z * CN;
-  a1 = t - (t - z);
-  a2 = z - a1;
-  a = y * z;
-  aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
-  a1 = a + aa;
-  a2 = (a - a1) + aa;
-  error = error * fabs (y);
-  t = __exp1 (a1, a2, 1.9e16 * error);
-  return (t >= 0) ? t : __slowpow (x, y, z);
-}
-
 /* Compute log(x) (x is left argument). The result is the returned double + the
-   parameter DELTA.  The result is bounded by ERROR.  */
+   parameter DELTA.  */
 static double
 SECTION
-log1 (double x, double *delta, double *error)
+log1 (double x, double *delta)
 {
   unsigned int i, j;
   int m;
@@ -260,9 +238,7 @@ log1 (double x, double *delta, double *error)
 
   u.x = x;
   m = u.i[HIGH_HALF];
-  *error = 0;
-  *delta = 0;
-  if (m < 0x00100000)		/*  1<x<2^-1007 */
+  if (m < 0x00100000)		/* Handle denormal x.  */
     {
       x = x * t52.x;
       add = -52.0;
@@ -284,7 +260,7 @@ log1 (double x, double *delta, double *error)
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
   i = (v.i[LOW_HALF] & 0x000003ff) << 2;
-  if (two52.i[LOW_HALF] == 1023)	/* nx = 0              */
+  if (two52.i[LOW_HALF] == 1023)	/* Exponent of x is 0.  */
     {
       if (i > 1192 && i < 1208)	/* |x-1| < 1.5*2**-10  */
 	{
@@ -296,8 +272,8 @@ log1 (double x, double *delta, double *error)
 							   * (r7 + t * r8)))))
 		- 0.5 * t2 * (t + t1));
 	  res = e1 + e2;
-	  *error = 1.0e-21 * fabs (t);
 	  *delta = (e1 - res) + e2;
+	  /* Max relative error is 1.464844e-24, so accurate to 79.1 bits.  */
 	  return res;
 	}			/* |x-1| < 1.5*2**-10  */
       else
@@ -316,12 +292,12 @@ log1 (double x, double *delta, double *error)
 	  t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
 		* (p2 + e * (p3 + e * p4)));
 	  res = t1 + t2;
-	  *error = 1.0e-24;
 	  *delta = (t1 - res) + t2;
+	  /* Max relative error is 1.0e-24, so accurate to 79.7 bits.  */
 	  return res;
 	}
-    }				/* nx = 0 */
-  else				/* nx != 0   */
+    }
+  else				/* Exponent of x != 0.  */
     {
       eps = u.x - uu;
       nx = (two52.x - two52e.x) + add;
@@ -334,113 +310,13 @@ log1 (double x, double *delta, double *error)
       t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
 	    * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
       res = t1 + t2;
-      *error = 1.0e-21;
-      *delta = (t1 - res) + t2;
-      return res;
-    }				/* nx != 0   */
-}
-
-/* Slower but more accurate routine of log.  The returned result is double +
-   DELTA.  The result is bounded by ERROR.  */
-static double
-SECTION
-my_log2 (double x, double *delta, double *error)
-{
-  unsigned int i, j;
-  int m;
-  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
-  double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
-  double y, yy, z, zz, j1, j2, j7, j8;
-#ifndef DLA_FMS
-  double j3, j4, j5, j6;
-#endif
-  mynumber u, v;
-#ifdef BIG_ENDI
-  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
-#else
-# ifdef LITTLE_ENDI
-  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
-# endif
-#endif
-
-  u.x = x;
-  m = u.i[HIGH_HALF];
-  *error = 0;
-  *delta = 0;
-  add = 0;
-  if (m < 0x00100000)
-    {				/* x < 2^-1022 */
-      x = x * t52.x;
-      add = -52.0;
-      u.x = x;
-      m = u.i[HIGH_HALF];
-    }
-
-  if ((m & 0x000fffff) < 0x0006a09e)
-    {
-      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
-      two52.i[LOW_HALF] = (m >> 20);
-    }
-  else
-    {
-      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
-      two52.i[LOW_HALF] = (m >> 20) + 1;
-    }
-
-  v.x = u.x + bigu.x;
-  uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
-  /*------------------------------------- |x-1| < 2**-11-------------------------------  */
-  if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
-    {
-      t = x - 1.0;
-      EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
-      ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
-      MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
-      MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
-
-      e1 = t + z;
-      e2 = ((((t - e1) + z) + zz) + t * t * t
-	    * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
-      res = e1 + e2;
-      *error = 1.0e-25 * fabs (t);
-      *delta = (e1 - res) + e2;
-      return res;
-    }
-  /*----------------------------- |x-1| > 2**-11  --------------------------  */
-  else
-    {				/*Computing log(x) according to log table                        */
-      nx = (two52.x - two52e.x) + add;
-      ou1 = ui.x[i];
-      ou2 = ui.x[i + 1];
-      lu1 = ui.x[i + 2];
-      lu2 = ui.x[i + 3];
-      v.x = u.x * (ou1 + ou2) + bigv.x;
-      vv = v.x - bigv.x;
-      j = v.i[LOW_HALF] & 0x0007ffff;
-      j = j + j + j;
-      eps = u.x - uu * vv;
-      ov = vj.x[j];
-      lv1 = vj.x[j + 1];
-      lv2 = vj.x[j + 2];
-      a = (ou1 + ou2) * (1.0 + ov);
-      a1 = (a + 1.0e10) - 1.0e10;
-      a2 = a * (1.0 - a1 * uu * vv);
-      e1 = eps * a1;
-      e2 = eps * a2;
-      e = e1 + e2;
-      e2 = (e1 - e) + e2;
-      t = nx * ln2a.x + lu1 + lv1;
-      t1 = t + e;
-      t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
-	    * (p2 + e * (p3 + e * p4)));
-      res = t1 + t2;
-      *error = 1.0e-27;
       *delta = (t1 - res) + t2;
+      /* Max relative error is 1.0e-21, so accurate to 69.7 bits.  */
       return res;
     }
 }
 
+
 /* This function receives a double x and checks if it is an integer.  If not,
    it returns 0, else it returns 1 if even or -1 if odd.  */
 static int
diff --git a/sysdeps/ieee754/dbl-64/halfulp.c b/sysdeps/ieee754/dbl-64/halfulp.c
deleted file mode 100644
index 0768d86..0000000
--- a/sysdeps/ieee754/dbl-64/halfulp.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/************************************************************************/
-/*                                                                      */
-/* MODULE_NAME:halfulp.c                                                */
-/*                                                                      */
-/*  FUNCTIONS:halfulp                                                   */
-/*  FILES NEEDED: mydefs.h dla.h endian.h                               */
-/*                uroot.c                                               */
-/*                                                                      */
-/*Routine halfulp(double x, double y) computes x^y where result does    */
-/*not need rounding. If the result is closer to 0 than can be           */
-/*represented it returns 0.                                             */
-/*     In the following cases the function does not compute anything    */
-/*and returns a negative number:                                        */
-/*1. if the result needs rounding,                                      */
-/*2. if y is outside the interval [0,  2^20-1],                         */
-/*3. if x can be represented by  x=2**n for some integer n.             */
-/************************************************************************/
-
-#include "endian.h"
-#include "mydefs.h"
-#include <dla.h>
-#include <math_private.h>
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-static const int4 tab54[32] = {
-  262143, 11585,  1782, 511, 210, 107, 63, 42,
-  30,     22,     17,   14,  12,  10,  9, 7,
-  7,      6,      5,    5,   5,   4,   4, 4,
-  3,      3,      3,    3,   3,   3,   3, 3
-};
-
-
-double
-SECTION
-__halfulp (double x, double y)
-{
-  mynumber v;
-  double z, u, uu;
-#ifndef DLA_FMS
-  double j1, j2, j3, j4, j5;
-#endif
-  int4 k, l, m, n;
-  if (y <= 0)                 /*if power is negative or zero */
-    {
-      v.x = y;
-      if (v.i[LOW_HALF] != 0)
-	return -10.0;
-      v.x = x;
-      if (v.i[LOW_HALF] != 0)
-	return -10.0;
-      if ((v.i[HIGH_HALF] & 0x000fffff) != 0)
-	return -10;                                     /* if x =2 ^ n */
-      k = ((v.i[HIGH_HALF] & 0x7fffffff) >> 20) - 1023; /* find this n */
-      z = (double) k;
-      return (z * y == -1075.0) ? 0 : -10.0;
-    }
-  /* if y > 0  */
-  v.x = y;
-  if (v.i[LOW_HALF] != 0)
-    return -10.0;
-
-  v.x = x;
-  /*  case where x = 2**n for some integer n */
-  if (((v.i[HIGH_HALF] & 0x000fffff) | v.i[LOW_HALF]) == 0)
-    {
-      k = (v.i[HIGH_HALF] >> 20) - 1023;
-      return (((double) k) * y == -1075.0) ? 0 : -10.0;
-    }
-
-  v.x = y;
-  k = v.i[HIGH_HALF];
-  m = k << 12;
-  l = 0;
-  while (m)
-    {
-      m = m << 1; l++;
-    }
-  n = (k & 0x000fffff) | 0x00100000;
-  n = n >> (20 - l);                       /*   n is the odd integer of y    */
-  k = ((k >> 20) - 1023) - l;               /*   y = n*2**k                   */
-  if (k > 5)
-    return -10.0;
-  if (k > 0)
-    for (; k > 0; k--)
-      n *= 2;
-  if (n > 34)
-    return -10.0;
-  k = -k;
-  if (k > 5)
-    return -10.0;
-
-  /*   now treat x        */
-  while (k > 0)
-    {
-      z = __ieee754_sqrt (x);
-      EMULV (z, z, u, uu, j1, j2, j3, j4, j5);
-      if (((u - x) + uu) != 0)
-	break;
-      x = z;
-      k--;
-    }
-  if (k)
-    return -10.0;
-
-  /* it is impossible that n == 2,  so the mantissa of x must be short  */
-
-  v.x = x;
-  if (v.i[LOW_HALF])
-    return -10.0;
-  k = v.i[HIGH_HALF];
-  m = k << 12;
-  l = 0;
-  while (m)
-    {
-      m = m << 1; l++;
-    }
-  m = (k & 0x000fffff) | 0x00100000;
-  m = m >> (20 - l);                       /*   m is the odd integer of x    */
-
-  /*   now check whether the length of m**n is at most 54 bits */
-
-  if (m > tab54[n - 3])
-    return -10.0;
-
-  /* yes, it is - now compute x**n by simple multiplications  */
-
-  u = x;
-  for (k = 1; k < n; k++)
-    u = u * x;
-  return u;
-}
diff --git a/sysdeps/ieee754/dbl-64/slowpow.c b/sysdeps/ieee754/dbl-64/slowpow.c
deleted file mode 100644
index d7c7fb3..0000000
--- a/sysdeps/ieee754/dbl-64/slowpow.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*************************************************************************/
-/* MODULE_NAME:slowpow.c                                                 */
-/*                                                                       */
-/* FUNCTION:slowpow                                                      */
-/*                                                                       */
-/*FILES NEEDED:mpa.h                                                     */
-/*             mpa.c mpexp.c mplog.c halfulp.c                           */
-/*                                                                       */
-/* Given two IEEE double machine numbers y,x , routine  computes the     */
-/* correctly  rounded (to nearest) value of x^y. Result calculated  by   */
-/* multiplication (in halfulp.c) or if result isn't accurate enough      */
-/* then routine converts x and y into multi-precision doubles     and    */
-/* calls to mpexp routine                                                */
-/*************************************************************************/
-
-#include "mpa.h"
-#include <math_private.h>
-
-#include <stap-probe.h>
-
-#ifndef SECTION
-# define SECTION
-#endif
-
-void __mpexp (mp_no *x, mp_no *y, int p);
-void __mplog (mp_no *x, mp_no *y, int p);
-double ulog (double);
-double __halfulp (double x, double y);
-
-double
-SECTION
-__slowpow (double x, double y, double z)
-{
-  double res, res1;
-  mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
-  static const mp_no eps = {-3, {1.0, 4.0}};
-  int p;
-
-  /* __HALFULP returns -10 or X^Y.  */
-  res = __halfulp (x, y);
-
-  /* Return if the result was computed by __HALFULP.  */
-  if (res >= 0)
-    return res;
-
-  /* Compute pow as long double.  This is currently only used by powerpc, where
-     one may get 106 bits of accuracy.  */
-#ifdef USE_LONG_DOUBLE_FOR_MP
-  long double ldw, ldz, ldpp;
-  static const long double ldeps = 0x4.0p-96;
-
-  ldz = __ieee754_logl ((long double) x);
-  ldw = (long double) y *ldz;
-  ldpp = __ieee754_expl (ldw);
-  res = (double) (ldpp + ldeps);
-  res1 = (double) (ldpp - ldeps);
-
-  /* Return the result if it is accurate enough.  */
-  if (res == res1)
-    return res;
-#endif
-
-  /* Or else, calculate using multiple precision.  P = 10 implies accuracy of
-     240 bits accuracy, since MP_NO has a radix of 2^24.  */
-  p = 10;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-
-  /* z = x ^ y
-     log (z) = y * log (x)
-     z = exp (y * log (x))  */
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-
-  /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we
-     have last bit accuracy.  */
-  __add (&mpp, &eps, &mpr, p);
-  __mp_dbl (&mpr, &res, p);
-  __sub (&mpp, &eps, &mpr1, p);
-  __mp_dbl (&mpr1, &res1, p);
-  if (res == res1)
-    {
-      /* Track how often we get to the slow pow code plus
-	 its input/output values.  */
-      LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res);
-      return res;
-    }
-
-  /* If we don't, then we repeat using a higher precision.  768 bits of
-     precision ought to be enough for anybody.  */
-  p = 32;
-  __dbl_mp (x, &mpx, p);
-  __dbl_mp (y, &mpy, p);
-  __dbl_mp (z, &mpz, p);
-  __mplog (&mpx, &mpz, p);
-  __mul (&mpy, &mpz, &mpw, p);
-  __mpexp (&mpw, &mpp, p);
-  __mp_dbl (&mpp, &res, p);
-
-  /* Track how often we get to the uber-slow pow code plus
-     its input/output values.  */
-  LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res);
-
-  return res;
-}
diff --git a/sysdeps/ieee754/dbl-64/uexp.h b/sysdeps/ieee754/dbl-64/uexp.h
index a8a023e..2edf530 100644
--- a/sysdeps/ieee754/dbl-64/uexp.h
+++ b/sysdeps/ieee754/dbl-64/uexp.h
@@ -30,7 +30,7 @@
 #include "mydefs.h"
 
 const static double zero = 0.0, hhuge = 1.0e300, tiny = 1.0e-300,
-err_0 = 1.000014, err_1 = 0.000016;
+err_0 = 1.000014;
 const static int4 bigint = 0x40862002,
              badint = 0x40876000,smallint = 0x3C8fffff;
 const static int4 hugeint = 0x7FFFFFFF, infint = 0x7ff00000;
diff --git a/sysdeps/m68k/m680x0/fpu/halfulp.c b/sysdeps/m68k/m680x0/fpu/halfulp.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/m68k/m680x0/fpu/halfulp.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/m68k/m680x0/fpu/slowpow.c b/sysdeps/m68k/m680x0/fpu/slowpow.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/m68k/m680x0/fpu/slowpow.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/powerpc/power4/fpu/Makefile b/sysdeps/powerpc/power4/fpu/Makefile
index e17d32f..fa1b070 100644
--- a/sysdeps/powerpc/power4/fpu/Makefile
+++ b/sysdeps/powerpc/power4/fpu/Makefile
@@ -2,6 +2,5 @@
 
 ifeq ($(subdir),math)
 CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops
-CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1
 CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1
 endif
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 85552bd..48e53f7 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2468,8 +2468,10 @@ Function: "log_vlen8_avx2":
 float: 2
 
 Function: "pow":
+double: 1
 float: 1
 float128: 2
+idouble: 1
 ifloat: 1
 ifloat128: 2
 ildouble: 1
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 9a89bfc..9391eb5 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -10,9 +10,9 @@ libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
 
 libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
 			e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
-			mplog-fma mpa-fma slowexp-fma slowpow-fma \
+			mplog-fma mpa-fma slowexp-fma \
 			sincos32-fma doasin-fma dosincos-fma \
-			halfulp-fma mpexp-fma \
+			mpexp-fma \
 			mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
 
 CFLAGS-doasin-fma.c = -mfma -mavx2
@@ -22,7 +22,6 @@ CFLAGS-e_atan2-fma.c = -mfma -mavx2
 CFLAGS-e_exp-fma.c = -mfma -mavx2
 CFLAGS-e_log-fma.c = -mfma -mavx2
 CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma)
-CFLAGS-halfulp-fma.c = -mfma -mavx2
 CFLAGS-mpa-fma.c = -mfma -mavx2
 CFLAGS-mpatan-fma.c = -mfma -mavx2
 CFLAGS-mpatan2-fma.c = -mfma -mavx2
@@ -33,7 +32,6 @@ CFLAGS-mptan-fma.c = -mfma -mavx2
 CFLAGS-s_atan-fma.c = -mfma -mavx2
 CFLAGS-sincos32-fma.c = -mfma -mavx2
 CFLAGS-slowexp-fma.c = -mfma -mavx2
-CFLAGS-slowpow-fma.c = -mfma -mavx2
 CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 
@@ -53,9 +51,9 @@ CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
 			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
-			mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+			mplog-fma4 mpa-fma4 slowexp-fma4 \
 			sincos32-fma4 doasin-fma4 dosincos-fma4 \
-			halfulp-fma4 mpexp-fma4 \
+			mpexp-fma4 \
 			mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
 
 CFLAGS-doasin-fma4.c = -mfma4
@@ -65,7 +63,6 @@ CFLAGS-e_atan2-fma4.c = -mfma4
 CFLAGS-e_exp-fma4.c = -mfma4
 CFLAGS-e_log-fma4.c = -mfma4
 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
-CFLAGS-halfulp-fma4.c = -mfma4
 CFLAGS-mpa-fma4.c = -mfma4
 CFLAGS-mpatan-fma4.c = -mfma4
 CFLAGS-mpatan2-fma4.c = -mfma4
@@ -76,7 +73,6 @@ CFLAGS-mptan-fma4.c = -mfma4
 CFLAGS-s_atan-fma4.c = -mfma4
 CFLAGS-sincos32-fma4.c = -mfma4
 CFLAGS-slowexp-fma4.c = -mfma4
-CFLAGS-slowpow-fma4.c = -mfma4
 CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
index 6fd4083..73c1e7f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
@@ -1,6 +1,5 @@
 #define __ieee754_pow __ieee754_pow_fma
 #define __exp1 __exp1_fma
-#define __slowpow __slowpow_fma
 #define SECTION __attribute__ ((section (".text.fma")))
 
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
index 5b3ea8e..8971b65 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
@@ -1,6 +1,5 @@
 #define __ieee754_pow __ieee754_pow_fma4
 #define __exp1 __exp1_fma4
-#define __slowpow __slowpow_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c
deleted file mode 100644
index 6ca7046..0000000
--- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma
-#define SECTION __attribute__ ((section (".text.fma")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
deleted file mode 100644
index a00c17c..0000000
--- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c
deleted file mode 100644
index 160ed68..0000000
--- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma
-#define __add __add_fma
-#define __dbl_mp __dbl_mp_fma
-#define __mpexp __mpexp_fma
-#define __mplog __mplog_fma
-#define __mul __mul_fma
-#define __sub __sub_fma
-#define __halfulp __halfulp_fma
-#define SECTION __attribute__ ((section (".text.fma")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
deleted file mode 100644
index 69d6982..0000000
--- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mplog __mplog_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>