diff options
26 files changed, 90 insertions, 525 deletions
@@ -1,3 +1,40 @@ +2018-02-12 Wilco Dijkstra <wdijkstr@arm.com> + + [BZ #13932] + * sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove. + * benchtests/pow-inputs: Update comment for slow path cases. + * manual/probes.texi (slowpow_p10): Delete removed probe. + (slowpow_p10): Likewise. + * math/Makefile: Remove halfulp.c and slowpow.c. + * sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1. + * sysdeps/generic/math_private.h (__exp1): Remove error argument. + (__halfulp): Remove. + (__slowpow): Remove. + * sysdeps/i386/fpu/halfulp.c: Delete file. + * sysdeps/i386/fpu/slowpow.c: Likewise. + * sysdeps/ia64/fpu/halfulp.c: Likewise. + * sysdeps/ia64/fpu/slowpow.c: Likewise. + * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument, + improve comments and add error analysis. + * sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis. + (power1): Remove function: + (log1): Remove error argument, add error analysis. + (my_log2): Remove function. + * sysdeps/ieee754/dbl-64/halfulp.c: Delete file. + * sysdeps/ieee754/dbl-64/slowpow.c: Likewise. + * sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise. + * sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise. + * sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c. + * sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1. + * sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c, + slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c. + * sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define. + * sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise. + * sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file. + * sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise. + * sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise. + 2018-02-11 Samuel Thibault <samuel.thibault@ens-lyon.org> * nscd/connections.c (RWLOCK_INITIALIZER): Define to diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs index 78f8ac7..4a51aac 100644 --- a/benchtests/pow-inputs +++ b/benchtests/pow-inputs @@ -302,8 +302,7 @@ 0x1.c004d2256a5b8p402, -0x1.a01df480fdcb7p98 0x1.52b9d41aaa1e9p-589, -0x1.292cb15f1459dp46 -0x1.ea9ca6fa0919ep-279, -0x1.601e44b6a588cp40 -# pow slow path at 240 bits -# Implemented in sysdeps/ieee754/dbl-64/slowpow.c +# old pow slow path at 240 bits ## name: 240bits 0x1.01fcd33493ea3p596, -0x1.724bd4e887783p-14 0x1.032ff59ab34fdp-540, -0x1.61e3632080b87p-24 @@ -405,8 +404,7 @@ 0x1.fae913d4f952ep-809, -0x1.4b649402fce63p-6 0x1.fe6d725408f24p484, -0x1.25f4f6441d2e4p-12 0x1.ff6393f9150ccp-718, 0x1.a0cb50a9bf2f3p-31 -# pow slowest path at 768 bits -# Implemented in sysdeps/ieee754/dbl-64/slowpow.c +# old pow slowest path at 768 bits ## name: 768bits 1.0000000000000020, 1.5 0x1.006777b4b61dep843, -0x1.67e3145491872p-1 diff --git a/manual/probes.texi b/manual/probes.texi index e99b7f3..9584838 100644 --- a/manual/probes.texi +++ b/manual/probes.texi @@ -272,22 +272,6 @@ input that results in multiple precision computation with precision computed output. @end deftp -@deftp Probe slowpow_p10 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) -This probe is triggered when the @code{pow} function is called with -inputs that result in multiple precision computation with precision -10. Arguments @var{$arg1} and @var{$arg2} are the input values, -@code{$arg3} is the value computed in the fast phase of the algorithm -and @code{$arg4} is the final accurate value. -@end deftp - -@deftp Probe slowpow_p32 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) -This probe is triggered when the @code{pow} function is called with an -input that results in multiple precision computation with precision -32. Arguments @var{$arg1} and @var{$arg2} are the input values, -@code{$arg3} is the value computed in the fast phase of the algorithm -and @code{$arg4} is the final accurate value. -@end deftp - @deftp Probe slowatan2 (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) This probe is triggered when the @code{atan2} function is called with an input that results in multiple precision computation. Argument diff --git a/math/Makefile b/math/Makefile index 718b06d..8152011 100644 --- a/math/Makefile +++ b/math/Makefile @@ -123,9 +123,9 @@ type-ldouble-yes := ldouble # double support type-double-suffix := -type-double-routines := branred doasin dosincos halfulp mpa mpatan2 \ +type-double-routines := branred doasin dosincos mpa mpatan2 \ mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \ - slowpow sincostab k_rem_pio2 + sincostab k_rem_pio2 # float support type-float-suffix := f diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps index 5603f2e..1f46980 100644 --- a/sysdeps/aarch64/libm-test-ulps +++ b/sysdeps/aarch64/libm-test-ulps @@ -1938,7 +1938,9 @@ ildouble: 1 ldouble: 1 Function: "pow": +double: 1 float: 1 +idouble: 1 ifloat: 1 ildouble: 2 ldouble: 2 diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h index 0a35cb3..07c97fa 100644 --- a/sysdeps/generic/math_private.h +++ b/sysdeps/generic/math_private.h @@ -250,20 +250,18 @@ fabsf128 (_Float128 x) /* Prototypes for functions of the IBM Accurate Mathematical Library. */ -extern double __exp1 (double __x, double __xx, double __error); +extern double __exp1 (double __x, double __xx); extern double __sin (double __x); extern double __cos (double __x); extern int __branred (double __x, double *__a, double *__aa); extern void __doasin (double __x, double __dx, double __v[]); extern void __dubsin (double __x, double __dx, double __v[]); extern void __dubcos (double __x, double __dx, double __v[]); -extern double __halfulp (double __x, double __y); extern double __sin32 (double __x, double __res, double __res1); extern double __cos32 (double __x, double __res, double __res1); extern double __mpsin (double __x, double __dx, bool __range_reduce); extern double __mpcos (double __x, double __dx, bool __range_reduce); extern double __slowexp (double __x); -extern double __slowpow (double __x, double __y, double __z); extern void __docos (double __x, double __dx, double __v[]); #ifndef math_opt_barrier diff --git a/sysdeps/i386/fpu/halfulp.c b/sysdeps/i386/fpu/halfulp.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/i386/fpu/halfulp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/fpu/slowpow.c b/sysdeps/i386/fpu/slowpow.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/i386/fpu/slowpow.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/ia64/fpu/halfulp.c b/sysdeps/ia64/fpu/halfulp.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/ia64/fpu/halfulp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/ia64/fpu/slowpow.c b/sysdeps/ia64/fpu/slowpow.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/ia64/fpu/slowpow.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c index 3d2560c..7a9daa5 100644 --- a/sysdeps/ieee754/dbl-64/e_exp.c +++ b/sysdeps/ieee754/dbl-64/e_exp.c @@ -233,13 +233,10 @@ ret: strong_alias (__ieee754_exp, __exp_finite) #endif -/* Compute e^(x+xx). The routine also receives bound of error of previous - calculation. If after computing exp the error exceeds the allowed bounds, - the routine returns a non-positive number. Otherwise it returns the - computed result, which is always positive. */ +/* Compute e^(x+xx). */ double SECTION -__exp1 (double x, double xx, double error) +__exp1 (double x, double xx) { double bexp, t, eps, del, base, y, al, bet, res, rem, cor; mynumber junk1, junk2, binexp = {{0, 0}}; @@ -249,6 +246,7 @@ __exp1 (double x, double xx, double error) m = junk1.i[HIGH_HALF]; n = m & hugeint; /* no sign */ + /* fabs (x) > 5.551112e-17 and fabs (x) < 7.080010e+02. */ if (n > smallint && n < bigint) { y = x * log2e.x + three51.x; @@ -276,11 +274,9 @@ __exp1 (double x, double xx, double error) rem = (bet + bet * eps) + al * eps; res = al + rem; - cor = (al - res) + rem; - if (res == (res + cor * (1.0 + error + err_1))) - return res * binexp.x; - else - return -10.0; + /* Maximum relative error before rounding is 8.8e-22 (69.9 bits). + Maximum ULP error is 0.500008. */ + return res * binexp.x; } if (n <= smallint) @@ -318,6 +314,7 @@ __exp1 (double x, double xx, double error) cor = (al - res) + rem; if (m >> 31) { + /* x < 0. */ ex = junk1.i[LOW_HALF]; if (res < 1.0) { @@ -328,34 +325,25 @@ __exp1 (double x, double xx, double error) if (ex >= -1022) { binexp.i[HIGH_HALF] = (1023 + ex) << 20; - if (res == (res + cor * (1.0 + error + err_1))) - return res * binexp.x; - else - return -10.0; + /* Maximum ULP error is 0.500008. */ + return res * binexp.x; } + /* Denormal case - ex < -1022. */ ex = -(1022 + ex); binexp.i[HIGH_HALF] = (1023 - ex) << 20; res *= binexp.x; cor *= binexp.x; - eps = 1.00000000001 + (error + err_1) * binexp.x; t = 1.0 + res; y = ((1.0 - t) + res) + cor; res = t + y; - cor = (t - res) + y; - if (res == (res + eps * cor)) - { - binexp.i[HIGH_HALF] = 0x00100000; - return (res - 1.0) * binexp.x; - } - else - return -10.0; + binexp.i[HIGH_HALF] = 0x00100000; + /* Maximum ULP error is 0.500004. */ + return (res - 1.0) * binexp.x; } else { binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20; - if (res == (res + cor * (1.0 + error + err_1))) - return res * binexp.x * t256.x; - else - return -10.0; + /* Maximum ULP error is 0.500008. */ + return res * binexp.x * t256.x; } } diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c index f6e5fcd..542d03a 100644 --- a/sysdeps/ieee754/dbl-64/e_pow.c +++ b/sysdeps/ieee754/dbl-64/e_pow.c @@ -20,13 +20,9 @@ /* MODULE_NAME: upow.c */ /* */ /* FUNCTIONS: upow */ -/* power1 */ -/* my_log2 */ /* log1 */ /* checkint */ /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h */ -/* halfulp.c mpexp.c mplog.c slowexp.c slowpow.c mpa.c */ -/* uexp.c upow.c */ /* root.tbl uexp.tbl upow.tbl */ /* An ultimate power routine. Given two IEEE double machine numbers y,x */ /* it computes the correctly rounded (to nearest) value of x^y. */ @@ -50,11 +46,8 @@ static const double huge = 1.0e300, tiny = 1.0e-300; -double __exp1 (double x, double xx, double error); -static double log1 (double x, double *delta, double *error); -static double my_log2 (double x, double *delta, double *error); -double __slowpow (double x, double y, double z); -static double power1 (double x, double y); +double __exp1 (double x, double xx); +static double log1 (double x, double *delta); static int checkint (double x); /* An ultimate power routine. Given two IEEE double machine numbers y, x it @@ -63,7 +56,7 @@ double SECTION __ieee754_pow (double x, double y) { - double z, a, aa, error, t, a1, a2, y1, y2; + double z, a, aa, t, a1, a2, y1, y2; mynumber u, v; int k; int4 qx, qy; @@ -100,7 +93,7 @@ __ieee754_pow (double x, double y) not matter if |y| <= 2**-64. */ if (fabs (y) < 0x1p-64) y = y < 0 ? -0x1p-64 : 0x1p-64; - z = log1 (x, &aa, &error); /* x^y =e^(y log (X)) */ + z = log1 (x, &aa); /* x^y =e^(y log (X)) */ t = y * CN; y1 = t - (t - y); y2 = y - y1; @@ -111,9 +104,16 @@ __ieee754_pow (double x, double y) aa = y2 * a1 + y * a2; a1 = a + aa; a2 = (a - a1) + aa; - error = error * fabs (y); - t = __exp1 (a1, a2, 1.9e16 * error); /* return -10 or 0 if wasn't computed exactly */ - retval = (t > 0) ? t : power1 (x, y); + + /* Maximum relative error RElog of log1 is 1.0e-21 (69.7 bits). + Maximum relative error REexp of __exp1 is 8.8e-22 (69.9 bits). + We actually compute exp ((1 + RElog) * log (x) * y) * (1 + REexp). + Since RElog/REexp are tiny and log (x) * y is at most log (DBL_MAX), + this is equivalent to pow (x, y) * (1 + 710 * RElog + REexp). + So the relative error is 710 * 1.0e-21 + 8.8e-22 = 7.1e-19 + (60.2 bits). The worst-case ULP error is 0.5064. */ + + retval = __exp1 (a1, a2); } if (isinf (retval)) @@ -218,33 +218,11 @@ __ieee754_pow (double x, double y) strong_alias (__ieee754_pow, __pow_finite) #endif -/* Compute x^y using more accurate but more slow log routine. */ -static double -SECTION -power1 (double x, double y) -{ - double z, a, aa, error, t, a1, a2, y1, y2; - z = my_log2 (x, &aa, &error); - t = y * CN; - y1 = t - (t - y); - y2 = y - y1; - t = z * CN; - a1 = t - (t - z); - a2 = z - a1; - a = y * z; - aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y; - a1 = a + aa; - a2 = (a - a1) + aa; - error = error * fabs (y); - t = __exp1 (a1, a2, 1.9e16 * error); - return (t >= 0) ? t : __slowpow (x, y, z); -} - /* Compute log(x) (x is left argument). The result is the returned double + the - parameter DELTA. The result is bounded by ERROR. */ + parameter DELTA. */ static double SECTION -log1 (double x, double *delta, double *error) +log1 (double x, double *delta) { unsigned int i, j; int m; @@ -260,9 +238,7 @@ log1 (double x, double *delta, double *error) u.x = x; m = u.i[HIGH_HALF]; - *error = 0; - *delta = 0; - if (m < 0x00100000) /* 1<x<2^-1007 */ + if (m < 0x00100000) /* Handle denormal x. */ { x = x * t52.x; add = -52.0; @@ -284,7 +260,7 @@ log1 (double x, double *delta, double *error) v.x = u.x + bigu.x; uu = v.x - bigu.x; i = (v.i[LOW_HALF] & 0x000003ff) << 2; - if (two52.i[LOW_HALF] == 1023) /* nx = 0 */ + if (two52.i[LOW_HALF] == 1023) /* Exponent of x is 0. */ { if (i > 1192 && i < 1208) /* |x-1| < 1.5*2**-10 */ { @@ -296,8 +272,8 @@ log1 (double x, double *delta, double *error) * (r7 + t * r8))))) - 0.5 * t2 * (t + t1)); res = e1 + e2; - *error = 1.0e-21 * fabs (t); *delta = (e1 - res) + e2; + /* Max relative error is 1.464844e-24, so accurate to 79.1 bits. */ return res; } /* |x-1| < 1.5*2**-10 */ else @@ -316,12 +292,12 @@ log1 (double x, double *delta, double *error) t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e * (p2 + e * (p3 + e * p4))); res = t1 + t2; - *error = 1.0e-24; *delta = (t1 - res) + t2; + /* Max relative error is 1.0e-24, so accurate to 79.7 bits. */ return res; } - } /* nx = 0 */ - else /* nx != 0 */ + } + else /* Exponent of x != 0. */ { eps = u.x - uu; nx = (two52.x - two52e.x) + add; @@ -334,113 +310,13 @@ log1 (double x, double *delta, double *error) t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6))))); res = t1 + t2; - *error = 1.0e-21; - *delta = (t1 - res) + t2; - return res; - } /* nx != 0 */ -} - -/* Slower but more accurate routine of log. The returned result is double + - DELTA. The result is bounded by ERROR. */ -static double -SECTION -my_log2 (double x, double *delta, double *error) -{ - unsigned int i, j; - int m; - double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0; - double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2; - double y, yy, z, zz, j1, j2, j7, j8; -#ifndef DLA_FMS - double j3, j4, j5, j6; -#endif - mynumber u, v; -#ifdef BIG_ENDI - mynumber /**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */ -#else -# ifdef LITTLE_ENDI - mynumber /**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */ -# endif -#endif - - u.x = x; - m = u.i[HIGH_HALF]; - *error = 0; - *delta = 0; - add = 0; - if (m < 0x00100000) - { /* x < 2^-1022 */ - x = x * t52.x; - add = -52.0; - u.x = x; - m = u.i[HIGH_HALF]; - } - - if ((m & 0x000fffff) < 0x0006a09e) - { - u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000; - two52.i[LOW_HALF] = (m >> 20); - } - else - { - u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000; - two52.i[LOW_HALF] = (m >> 20) + 1; - } - - v.x = u.x + bigu.x; - uu = v.x - bigu.x; - i = (v.i[LOW_HALF] & 0x000003ff) << 2; - /*------------------------------------- |x-1| < 2**-11------------------------------- */ - if ((two52.i[LOW_HALF] == 1023) && (i == 1200)) - { - t = x - 1.0; - EMULV (t, s3, y, yy, j1, j2, j3, j4, j5); - ADD2 (-0.5, 0, y, yy, z, zz, j1, j2); - MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8); - MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8); - - e1 = t + z; - e2 = ((((t - e1) + z) + zz) + t * t * t - * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8)))))); - res = e1 + e2; - *error = 1.0e-25 * fabs (t); - *delta = (e1 - res) + e2; - return res; - } - /*----------------------------- |x-1| > 2**-11 -------------------------- */ - else - { /*Computing log(x) according to log table */ - nx = (two52.x - two52e.x) + add; - ou1 = ui.x[i]; - ou2 = ui.x[i + 1]; - lu1 = ui.x[i + 2]; - lu2 = ui.x[i + 3]; - v.x = u.x * (ou1 + ou2) + bigv.x; - vv = v.x - bigv.x; - j = v.i[LOW_HALF] & 0x0007ffff; - j = j + j + j; - eps = u.x - uu * vv; - ov = vj.x[j]; - lv1 = vj.x[j + 1]; - lv2 = vj.x[j + 2]; - a = (ou1 + ou2) * (1.0 + ov); - a1 = (a + 1.0e10) - 1.0e10; - a2 = a * (1.0 - a1 * uu * vv); - e1 = eps * a1; - e2 = eps * a2; - e = e1 + e2; - e2 = (e1 - e) + e2; - t = nx * ln2a.x + lu1 + lv1; - t1 = t + e; - t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e - * (p2 + e * (p3 + e * p4))); - res = t1 + t2; - *error = 1.0e-27; *delta = (t1 - res) + t2; + /* Max relative error is 1.0e-21, so accurate to 69.7 bits. */ return res; } } + /* This function receives a double x and checks if it is an integer. If not, it returns 0, else it returns 1 if even or -1 if odd. */ static int diff --git a/sysdeps/ieee754/dbl-64/halfulp.c b/sysdeps/ieee754/dbl-64/halfulp.c deleted file mode 100644 index 0768d86..0000000 --- a/sysdeps/ieee754/dbl-64/halfulp.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2018 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/************************************************************************/ -/* */ -/* MODULE_NAME:halfulp.c */ -/* */ -/* FUNCTIONS:halfulp */ -/* FILES NEEDED: mydefs.h dla.h endian.h */ -/* uroot.c */ -/* */ -/*Routine halfulp(double x, double y) computes x^y where result does */ -/*not need rounding. If the result is closer to 0 than can be */ -/*represented it returns 0. */ -/* In the following cases the function does not compute anything */ -/*and returns a negative number: */ -/*1. if the result needs rounding, */ -/*2. if y is outside the interval [0, 2^20-1], */ -/*3. if x can be represented by x=2**n for some integer n. */ -/************************************************************************/ - -#include "endian.h" -#include "mydefs.h" -#include <dla.h> -#include <math_private.h> - -#ifndef SECTION -# define SECTION -#endif - -static const int4 tab54[32] = { - 262143, 11585, 1782, 511, 210, 107, 63, 42, - 30, 22, 17, 14, 12, 10, 9, 7, - 7, 6, 5, 5, 5, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3 -}; - - -double -SECTION -__halfulp (double x, double y) -{ - mynumber v; - double z, u, uu; -#ifndef DLA_FMS - double j1, j2, j3, j4, j5; -#endif - int4 k, l, m, n; - if (y <= 0) /*if power is negative or zero */ - { - v.x = y; - if (v.i[LOW_HALF] != 0) - return -10.0; - v.x = x; - if (v.i[LOW_HALF] != 0) - return -10.0; - if ((v.i[HIGH_HALF] & 0x000fffff) != 0) - return -10; /* if x =2 ^ n */ - k = ((v.i[HIGH_HALF] & 0x7fffffff) >> 20) - 1023; /* find this n */ - z = (double) k; - return (z * y == -1075.0) ? 0 : -10.0; - } - /* if y > 0 */ - v.x = y; - if (v.i[LOW_HALF] != 0) - return -10.0; - - v.x = x; - /* case where x = 2**n for some integer n */ - if (((v.i[HIGH_HALF] & 0x000fffff) | v.i[LOW_HALF]) == 0) - { - k = (v.i[HIGH_HALF] >> 20) - 1023; - return (((double) k) * y == -1075.0) ? 0 : -10.0; - } - - v.x = y; - k = v.i[HIGH_HALF]; - m = k << 12; - l = 0; - while (m) - { - m = m << 1; l++; - } - n = (k & 0x000fffff) | 0x00100000; - n = n >> (20 - l); /* n is the odd integer of y */ - k = ((k >> 20) - 1023) - l; /* y = n*2**k */ - if (k > 5) - return -10.0; - if (k > 0) - for (; k > 0; k--) - n *= 2; - if (n > 34) - return -10.0; - k = -k; - if (k > 5) - return -10.0; - - /* now treat x */ - while (k > 0) - { - z = __ieee754_sqrt (x); - EMULV (z, z, u, uu, j1, j2, j3, j4, j5); - if (((u - x) + uu) != 0) - break; - x = z; - k--; - } - if (k) - return -10.0; - - /* it is impossible that n == 2, so the mantissa of x must be short */ - - v.x = x; - if (v.i[LOW_HALF]) - return -10.0; - k = v.i[HIGH_HALF]; - m = k << 12; - l = 0; - while (m) - { - m = m << 1; l++; - } - m = (k & 0x000fffff) | 0x00100000; - m = m >> (20 - l); /* m is the odd integer of x */ - - /* now check whether the length of m**n is at most 54 bits */ - - if (m > tab54[n - 3]) - return -10.0; - - /* yes, it is - now compute x**n by simple multiplications */ - - u = x; - for (k = 1; k < n; k++) - u = u * x; - return u; -} diff --git a/sysdeps/ieee754/dbl-64/slowpow.c b/sysdeps/ieee754/dbl-64/slowpow.c deleted file mode 100644 index d7c7fb3..0000000 --- a/sysdeps/ieee754/dbl-64/slowpow.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2018 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/*************************************************************************/ -/* MODULE_NAME:slowpow.c */ -/* */ -/* FUNCTION:slowpow */ -/* */ -/*FILES NEEDED:mpa.h */ -/* mpa.c mpexp.c mplog.c halfulp.c */ -/* */ -/* Given two IEEE double machine numbers y,x , routine computes the */ -/* correctly rounded (to nearest) value of x^y. Result calculated by */ -/* multiplication (in halfulp.c) or if result isn't accurate enough */ -/* then routine converts x and y into multi-precision doubles and */ -/* calls to mpexp routine */ -/*************************************************************************/ - -#include "mpa.h" -#include <math_private.h> - -#include <stap-probe.h> - -#ifndef SECTION -# define SECTION -#endif - -void __mpexp (mp_no *x, mp_no *y, int p); -void __mplog (mp_no *x, mp_no *y, int p); -double ulog (double); -double __halfulp (double x, double y); - -double -SECTION -__slowpow (double x, double y, double z) -{ - double res, res1; - mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; - static const mp_no eps = {-3, {1.0, 4.0}}; - int p; - - /* __HALFULP returns -10 or X^Y. */ - res = __halfulp (x, y); - - /* Return if the result was computed by __HALFULP. */ - if (res >= 0) - return res; - - /* Compute pow as long double. This is currently only used by powerpc, where - one may get 106 bits of accuracy. */ -#ifdef USE_LONG_DOUBLE_FOR_MP - long double ldw, ldz, ldpp; - static const long double ldeps = 0x4.0p-96; - - ldz = __ieee754_logl ((long double) x); - ldw = (long double) y *ldz; - ldpp = __ieee754_expl (ldw); - res = (double) (ldpp + ldeps); - res1 = (double) (ldpp - ldeps); - - /* Return the result if it is accurate enough. */ - if (res == res1) - return res; -#endif - - /* Or else, calculate using multiple precision. P = 10 implies accuracy of - 240 bits accuracy, since MP_NO has a radix of 2^24. */ - p = 10; - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - - /* z = x ^ y - log (z) = y * log (x) - z = exp (y * log (x)) */ - __mplog (&mpx, &mpz, p); - __mul (&mpy, &mpz, &mpw, p); - __mpexp (&mpw, &mpp, p); - - /* Add and subtract EPS to ensure that the result remains unchanged, i.e. we - have last bit accuracy. */ - __add (&mpp, &eps, &mpr, p); - __mp_dbl (&mpr, &res, p); - __sub (&mpp, &eps, &mpr1, p); - __mp_dbl (&mpr1, &res1, p); - if (res == res1) - { - /* Track how often we get to the slow pow code plus - its input/output values. */ - LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res); - return res; - } - - /* If we don't, then we repeat using a higher precision. 768 bits of - precision ought to be enough for anybody. */ - p = 32; - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - __mplog (&mpx, &mpz, p); - __mul (&mpy, &mpz, &mpw, p); - __mpexp (&mpw, &mpp, p); - __mp_dbl (&mpp, &res, p); - - /* Track how often we get to the uber-slow pow code plus - its input/output values. */ - LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res); - - return res; -} diff --git a/sysdeps/ieee754/dbl-64/uexp.h b/sysdeps/ieee754/dbl-64/uexp.h index a8a023e..2edf530 100644 --- a/sysdeps/ieee754/dbl-64/uexp.h +++ b/sysdeps/ieee754/dbl-64/uexp.h @@ -30,7 +30,7 @@ #include "mydefs.h" const static double zero = 0.0, hhuge = 1.0e300, tiny = 1.0e-300, -err_0 = 1.000014, err_1 = 0.000016; +err_0 = 1.000014; const static int4 bigint = 0x40862002, badint = 0x40876000,smallint = 0x3C8fffff; const static int4 hugeint = 0x7FFFFFFF, infint = 0x7ff00000; diff --git a/sysdeps/m68k/m680x0/fpu/halfulp.c b/sysdeps/m68k/m680x0/fpu/halfulp.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/m68k/m680x0/fpu/halfulp.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/m68k/m680x0/fpu/slowpow.c b/sysdeps/m68k/m680x0/fpu/slowpow.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/m68k/m680x0/fpu/slowpow.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/powerpc/power4/fpu/Makefile b/sysdeps/powerpc/power4/fpu/Makefile index e17d32f..fa1b070 100644 --- a/sysdeps/powerpc/power4/fpu/Makefile +++ b/sysdeps/powerpc/power4/fpu/Makefile @@ -2,6 +2,5 @@ ifeq ($(subdir),math) CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1 CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1 endif diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 85552bd..48e53f7 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -2468,8 +2468,10 @@ Function: "log_vlen8_avx2": float: 2 Function: "pow": +double: 1 float: 1 float128: 2 +idouble: 1 ifloat: 1 ifloat128: 2 ildouble: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 9a89bfc..9391eb5 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -10,9 +10,9 @@ libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ - mplog-fma mpa-fma slowexp-fma slowpow-fma \ + mplog-fma mpa-fma slowexp-fma \ sincos32-fma doasin-fma dosincos-fma \ - halfulp-fma mpexp-fma \ + mpexp-fma \ mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma CFLAGS-doasin-fma.c = -mfma -mavx2 @@ -22,7 +22,6 @@ CFLAGS-e_atan2-fma.c = -mfma -mavx2 CFLAGS-e_exp-fma.c = -mfma -mavx2 CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma) -CFLAGS-halfulp-fma.c = -mfma -mavx2 CFLAGS-mpa-fma.c = -mfma -mavx2 CFLAGS-mpatan-fma.c = -mfma -mavx2 CFLAGS-mpatan2-fma.c = -mfma -mavx2 @@ -33,7 +32,6 @@ CFLAGS-mptan-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-sincos32-fma.c = -mfma -mavx2 CFLAGS-slowexp-fma.c = -mfma -mavx2 -CFLAGS-slowpow-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 @@ -53,9 +51,9 @@ CFLAGS-s_sincosf-fma.c = -mfma -mavx2 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ - mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ + mplog-fma4 mpa-fma4 slowexp-fma4 \ sincos32-fma4 doasin-fma4 dosincos-fma4 \ - halfulp-fma4 mpexp-fma4 \ + mpexp-fma4 \ mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 CFLAGS-doasin-fma4.c = -mfma4 @@ -65,7 +63,6 @@ CFLAGS-e_atan2-fma4.c = -mfma4 CFLAGS-e_exp-fma4.c = -mfma4 CFLAGS-e_log-fma4.c = -mfma4 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma) -CFLAGS-halfulp-fma4.c = -mfma4 CFLAGS-mpa-fma4.c = -mfma4 CFLAGS-mpatan-fma4.c = -mfma4 CFLAGS-mpatan2-fma4.c = -mfma4 @@ -76,7 +73,6 @@ CFLAGS-mptan-fma4.c = -mfma4 CFLAGS-s_atan-fma4.c = -mfma4 CFLAGS-sincos32-fma4.c = -mfma4 CFLAGS-slowexp-fma4.c = -mfma4 -CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c index 6fd4083..73c1e7f 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma #define __exp1 __exp1_fma -#define __slowpow __slowpow_fma #define SECTION __attribute__ ((section (".text.fma"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c index 5b3ea8e..8971b65 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma4 #define __exp1 __exp1_fma4 -#define __slowpow __slowpow_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c deleted file mode 100644 index 6ca7046..0000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma -#define SECTION __attribute__ ((section (".text.fma"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c deleted file mode 100644 index a00c17c..0000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c deleted file mode 100644 index 160ed68..0000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma -#define __add __add_fma -#define __dbl_mp __dbl_mp_fma -#define __mpexp __mpexp_fma -#define __mplog __mplog_fma -#define __mul __mul_fma -#define __sub __sub_fma -#define __halfulp __halfulp_fma -#define SECTION __attribute__ ((section (".text.fma"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c deleted file mode 100644 index 69d6982..0000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mplog __mplog_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> |