aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ieee754/dbl-64/e_exp.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ieee754/dbl-64/e_exp.c')
-rw-r--r--sysdeps/ieee754/dbl-64/e_exp.c485
1 files changed, 154 insertions, 331 deletions
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index 7d8b414..209f20b 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -1,48 +1,158 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2018 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/***************************************************************************/
-/* MODULE_NAME:uexp.c */
-/* */
-/* FUNCTION:uexp */
-/* exp1 */
-/* */
-/* FILES NEEDED:dla.h endian.h mpa.h mydefs.h uexp.h */
-/* */
-/* An ultimate exp routine. Given an IEEE double machine number x */
-/* it computes an almost correctly rounded (to nearest) value of e^x */
-/* Assumption: Machine arithmetic operations are performed in */
-/* round to nearest mode of IEEE 754 standard. */
-/* */
-/***************************************************************************/
+/* Double-precision e^x function.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <math.h>
-#include "endian.h"
-#include "uexp.h"
-#include "mydefs.h"
-#include "MathLib.h"
-#include "uexp.tbl"
+#include <stdint.h>
#include <math-barriers.h>
-#include <math_private.h>
-#include <fenv_private.h>
-#include <fenv.h>
-#include <float.h>
-#include "eexp.tbl"
+#include <math-narrow-eval.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
+
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble (sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return check_oflow (y);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble (sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0)
+ {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = math_narrow_eval (hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ math_force_eval (math_opt_barrier (0x1p-1022) * 0x1p-1022);
+ }
+ y = 0x1p-1022 * y;
+ return check_uflow (y);
+}
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+top12 (double x)
+{
+ return asuint64 (x) >> 52;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ If hastail is 0 then xtail is assumed to be 0 too. */
+static inline double
+exp_inline (double x, double xtail, int hastail)
+{
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, z, r, r2, scale, tail, tmp;
+
+ abstop = top12 (x) & 0x7ff;
+ if (__glibc_unlikely (abstop - top12 (0x1p-54)
+ >= top12 (512.0) - top12 (0x1p-54)))
+ {
+ if (abstop - top12 (0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12 (1024.0))
+ {
+ if (asuint64 (x) == asuint64 (-INFINITY))
+ return 0.0;
+ if (abstop >= top12 (INFINITY))
+ return 1.0 + x;
+ if (asuint64 (x) >> 63)
+ return __math_uflow (0);
+ else
+ return __math_oflow (0);
+ }
+ /* Large x is special cased below. */
+ abstop = 0;
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ z = InvLn2N * x;
+#if TOINT_INTRINSICS
+ kd = roundtoint (z);
+ ki = converttoint (z);
+#else
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ kd = math_narrow_eval (z + Shift);
+ ki = asuint64 (kd);
+ kd -= Shift;
+#endif
+ r = x + kd * NegLn2hiN + kd * NegLn2loN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ if (hastail)
+ r += xtail;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble (T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.25/N ulp larger. */
+ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
+ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+ if (__glibc_unlikely (abstop == 0))
+ return specialcase (tmp, sbits, ki);
+ scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return scale + scale * tmp;
+}
#ifndef SECTION
# define SECTION
@@ -52,187 +162,7 @@ double
SECTION
__ieee754_exp (double x)
{
- double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- double z;
- mynumber junk1, junk2, binexp = {{0, 0}};
- int4 i, j, m, n, ex;
- int4 k;
- double retval;
-
- {
- SET_RESTORE_ROUND (FE_TONEAREST);
-
- junk1.x = x;
- m = junk1.i[HIGH_HALF];
- n = m & hugeint;
-
- if (n < 0x3ff0a2b2) /* |x| < 1.03972053527832 */
- {
- if (n < 0x3f862e42) /* |x| < 3/2 ln 2 */
- {
- if (n < 0x3ed00000) /* |x| < 1/64 ln 2 */
- {
- if (n < 0x3e300000) /* |x| < 2^18 */
- {
- retval = one + junk1.x;
- goto ret;
- }
- retval = one + junk1.x * (one + half * junk1.x);
- goto ret;
- }
- t = junk1.x * junk1.x;
- retval = junk1.x + (t * (half + junk1.x * t2) +
- (t * t) * (t3 + junk1.x * t4 + t * t5));
- retval = one + retval;
- goto ret;
- }
-
- /* Find the multiple of 2^-6 nearest x. */
- k = n >> 20;
- j = (0x00100000 | (n & 0x000fffff)) >> (0x40c - k);
- j = (j - 1) & ~1;
- if (m < 0)
- j += 134;
- z = junk1.x - TBL2[j];
- t = z * z;
- retval = z + (t * (half + (z * t2))
- + (t * t) * (t3 + z * t4 + t * t5));
- retval = TBL2[j + 1] + TBL2[j + 1] * retval;
- goto ret;
- }
-
- if (n < bigint) /* && |x| >= 1.03972053527832 */
- {
- y = x * log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp * ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
- eps = del + del * del * (p3.x * del + p2.x);
-
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
-
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
-
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
-
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- /* Maximum relative error is 7.8e-22 (70.1 bits).
- Maximum ULP error is 0.500007. */
- retval = res * binexp.x;
- goto ret;
- }
-
- if (n >= badint)
- {
- if (n > infint)
- {
- retval = x + x;
- goto ret;
- } /* x is NaN */
- if (n < infint)
- {
- if (x > 0)
- goto ret_huge;
- else
- goto ret_tiny;
- }
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0)
- {
- retval = x + x;
- goto ret;
- } /* x is NaN */
- retval = (x > 0) ? inf.x : zero; /* |x| = inf; return either inf or 0 */
- goto ret;
- }
-
- y = x * log2e.x + three51.x;
- bexp = y - three51.x;
- junk1.x = y;
- eps = bexp * ln_two2.x;
- t = x - bexp * ln_two1.x;
- y = t + three33.x;
- base = y - three33.x;
- junk2.x = y;
- del = (t - base) - eps;
- eps = del + del * del * (p3.x * del + p2.x);
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (m >> 31)
- {
- ex = junk1.i[LOW_HALF];
- if (res < 1.0)
- {
- res += res;
- cor += cor;
- ex -= 1;
- }
- if (ex >= -1022)
- {
- binexp.i[HIGH_HALF] = (1023 + ex) << 20;
- /* Does not underflow: res >= 1.0, binexp >= 0x1p-1022
- Maximum relative error is 7.8e-22 (70.1 bits).
- Maximum ULP error is 0.500007. */
- retval = res * binexp.x;
- goto ret;
- }
- ex = -(1022 + ex);
- binexp.i[HIGH_HALF] = (1023 - ex) << 20;
- res *= binexp.x;
- cor *= binexp.x;
- t = 1.0 + res;
- y = ((1.0 - t) + res) + cor;
- res = t + y;
- /* Maximum ULP error is 0.5000035. */
- binexp.i[HIGH_HALF] = 0x00100000;
- retval = (res - 1.0) * binexp.x;
- if (retval < DBL_MIN)
- {
- double force_underflow = tiny * tiny;
- math_force_eval (force_underflow);
- }
- if (retval == 0)
- goto ret_tiny;
- goto ret;
- }
- else
- {
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
- /* Maximum relative error is 7.8e-22 (70.1 bits).
- Maximum ULP error is 0.500007. */
- retval = res * binexp.x * t256.x;
- if (isinf (retval))
- goto ret_huge;
- else
- goto ret;
- }
- }
-ret:
- return retval;
-
- ret_huge:
- return hhuge * hhuge;
-
- ret_tiny:
- return tiny * tiny;
+ return exp_inline (x, 0, 0);
}
#ifndef __ieee754_exp
strong_alias (__ieee754_exp, __exp_finite)
@@ -243,112 +173,5 @@ double
SECTION
__exp1 (double x, double xx)
{
- double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0, 0}};
- int4 i, j, m, n, ex;
-
- junk1.x = x;
- m = junk1.i[HIGH_HALF];
- n = m & hugeint; /* no sign */
-
- /* fabs (x) > 5.551112e-17 and fabs (x) < 7.080010e+02. */
- if (n > smallint && n < bigint)
- {
- y = x * log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp * ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) + (xx - eps); /* x = bexp*ln(2) + base + del */
- eps = del + del * del * (p3.x * del + p2.x);
-
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
-
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
-
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
-
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- /* Maximum relative error before rounding is 8.8e-22 (69.9 bits).
- Maximum ULP error is 0.500008. */
- return res * binexp.x;
- }
-
- if (n <= smallint)
- return 1.0; /* if x->0 e^x=1 */
-
- if (n >= badint)
- {
- if (n > infint)
- return (zero / zero); /* x is NaN, return invalid */
- if (n < infint)
- return ((x > 0) ? (hhuge * hhuge) : (tiny * tiny));
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0)
- return (zero / zero); /* x is NaN */
- return ((x > 0) ? inf.x : zero); /* |x| = inf; return either inf or 0 */
- }
-
- y = x * log2e.x + three51.x;
- bexp = y - three51.x;
- junk1.x = y;
- eps = bexp * ln_two2.x;
- t = x - bexp * ln_two1.x;
- y = t + three33.x;
- base = y - three33.x;
- junk2.x = y;
- del = (t - base) + (xx - eps);
- eps = del + del * del * (p3.x * del + p2.x);
- i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
- j = (junk2.i[LOW_HALF] & 511) << 1;
- al = coar.x[i] * fine.x[j];
- bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
- + coar.x[i + 1] * fine.x[j + 1]);
- rem = (bet + bet * eps) + al * eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (m >> 31)
- {
- /* x < 0. */
- ex = junk1.i[LOW_HALF];
- if (res < 1.0)
- {
- res += res;
- cor += cor;
- ex -= 1;
- }
- if (ex >= -1022)
- {
- binexp.i[HIGH_HALF] = (1023 + ex) << 20;
- /* Maximum ULP error is 0.500008. */
- return res * binexp.x;
- }
- /* Denormal case - ex < -1022. */
- ex = -(1022 + ex);
- binexp.i[HIGH_HALF] = (1023 - ex) << 20;
- res *= binexp.x;
- cor *= binexp.x;
- t = 1.0 + res;
- y = ((1.0 - t) + res) + cor;
- res = t + y;
- binexp.i[HIGH_HALF] = 0x00100000;
- /* Maximum ULP error is 0.500004. */
- return (res - 1.0) * binexp.x;
- }
- else
- {
- binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
- /* Maximum ULP error is 0.500008. */
- return res * binexp.x * t256.x;
- }
+ return exp_inline (x, xx, 1);
}