aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/libm-ieee754/e_exp.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/libm-ieee754/e_exp.c')
-rw-r--r--sysdeps/libm-ieee754/e_exp.c330
1 files changed, 171 insertions, 159 deletions
diff --git a/sysdeps/libm-ieee754/e_exp.c b/sysdeps/libm-ieee754/e_exp.c
index 9eba853..a6d53eb 100644
--- a/sysdeps/libm-ieee754/e_exp.c
+++ b/sysdeps/libm-ieee754/e_exp.c
@@ -1,167 +1,179 @@
-/* @(#)e_exp.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: e_exp.c,v 1.8 1995/05/10 20:45:03 jtc Exp $";
-#endif
+/* Double-precision floating point e^x.
+ Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Geoffrey Keating <geoffk@ozemail.com.au>
-/* __ieee754_exp(x)
- * Returns the exponential of x.
- *
- * Method
- * 1. Argument reduction:
- * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
- * Given x, find r and integer k such that
- *
- * x = k*ln2 + r, |r| <= 0.5*ln2.
- *
- * Here r will be represented as r = hi-lo for better
- * accuracy.
- *
- * 2. Approximation of exp(r) by a special rational function on
- * the interval [0,0.34658]:
- * Write
- * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
- * We use a special Reme algorithm on [0,0.34658] to generate
- * a polynomial of degree 5 to approximate R. The maximum error
- * of this polynomial approximation is bounded by 2**-59. In
- * other words,
- * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
- * (where z=r*r, and the values of P1 to P5 are listed below)
- * and
- * | 5 | -59
- * | 2.0+P1*z+...+P5*z - R(z) | <= 2
- * | |
- * The computation of exp(r) thus becomes
- * 2*r
- * exp(r) = 1 + -------
- * R - r
- * r*R1(r)
- * = 1 + r + ----------- (for better accuracy)
- * 2 - R1(r)
- * where
- * 2 4 10
- * R1(r) = r - (P1*r + P2*r + ... + P5*r ).
- *
- * 3. Scale back to obtain exp(x):
- * From step 1, we have
- * exp(x) = 2^k * exp(r)
- *
- * Special cases:
- * exp(INF) is INF, exp(NaN) is NaN;
- * exp(-INF) is 0, and
- * for finite argument, only exp(0)=1 is exact.
- *
- * Accuracy:
- * according to an error analysis, the error is always less than
- * 1 ulp (unit in the last place).
- *
- * Misc. info.
- * For IEEE double
- * if x > 7.09782712893383973096e+02 then exp(x) overflow
- * if x < -7.45133219101941108420e+02 then exp(x) underflow
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
- */
-
-#include "math.h"
-#include "math_private.h"
-
-#ifdef __STDC__
-static const double
-#else
-static double
-#endif
-one = 1.0,
-halF[2] = {0.5,-0.5,},
-huge = 1.0e+300,
-twom1000= 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
-o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
-u_threshold= -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
-ln2HI[2] ={ 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
- -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */
-ln2LO[2] ={ 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
- -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */
-invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
-P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
-
-
-#ifdef __STDC__
- double __ieee754_exp(double x) /* default IEEE double exp */
-#else
- double __ieee754_exp(x) /* default IEEE double exp */
- double x;
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* How this works:
+ The basic design here is from
+ Shmuel Gal and Boris Bachelis, "An Accurate Elementary Mathematical
+ Library for the IEEE Floating Point Standard", ACM Trans. Math. Soft.,
+ 17 (1), March 1991, pp. 26-45.
+
+ The input value, x, is written as
+
+ x = n * ln(2)_0 + t/512 + delta[t] + x + n * ln(2)_1
+
+ where:
+ - n is an integer, 1024 >= n >= -1075;
+ - ln(2)_0 is the first 43 bits of ln(2), and ln(2)_1 is the remainder, so
+ that |ln(2)_1| < 2^-32;
+ - t is an integer, 177 >= t >= -177
+ - delta is based on a table entry, delta[t] < 2^-28
+ - x is whatever is left, |x| < 2^-10
+
+ Then e^x is approximated as
+
+ e^x = 2^n_1 ( 2^n_0 e^(t/512 + delta[t])
+ + ( 2^n_0 e^(t/512 + delta[t])
+ * ( p(x + n * ln(2)_1)
+ - n*ln(2)_1
+ - n*ln(2)_1 * p(x + n * ln(2)_1) ) ) )
+
+ where
+ - p(x) is a polynomial approximating e(x)-1;
+ - e^(t/512 + delta[t]) is obtained from a table;
+ - n_1 + n_0 = n, so that |n_0| < DBL_MIN_EXP-1.
+
+ If it happens that n_1 == 0 (this is the usual case), that multiplication
+ is omitted.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
#endif
+#include <float.h>
+#include <ieee754.h>
+#include <math.h>
+#include <fenv.h>
+#include <inttypes.h>
+#include <math_private.h>
+
+extern const float __exp_deltatable[178];
+extern const double __exp_atable[355] /* __attribute__((mode(DF))) */;
+
+static const volatile double TWO1023 = 8.988465674311579539e+307;
+static const volatile double TWOM1000 = 9.3326361850321887899e-302;
+
+double
+__ieee754_exp (double x)
{
- double y,hi,lo,c,t;
- int32_t k,xsb;
- u_int32_t hx;
-
- GET_HIGH_WORD(hx,x);
- xsb = (hx>>31)&1; /* sign bit of x */
- hx &= 0x7fffffff; /* high word of |x| */
-
- /* filter out non-finite argument */
- if(hx >= 0x40862E42) { /* if |x|>=709.78... */
- if(hx>=0x7ff00000) {
- u_int32_t lx;
- GET_LOW_WORD(lx,x);
- if(((hx&0xfffff)|lx)!=0)
- return x+x; /* NaN */
- else return (xsb==0)? x:0.0; /* exp(+-inf)={inf,0} */
- }
- if(x > o_threshold) return huge*huge; /* overflow */
- if(x < u_threshold) return twom1000*twom1000; /* underflow */
+ static const uint32_t a_minf = 0xff800000;
+ static const double himark = 709.7827128933840868;
+ static const double lomark = -745.1332191019412221;
+ /* Check for usual case. */
+ if (isless (x, himark) && isgreater (x, lomark))
+ {
+ static const float TWO43 = 8796093022208.0;
+ static const float TWO52 = 4503599627370496.0;
+ /* 1/ln(2). */
+ static const double M_1_LN2 = 1.442695040888963387;
+ /* ln(2), part 1 */
+ static const double M_LN2_0 = .6931471805598903302;
+ /* ln(2), part 2 */
+ static const double M_LN2_1 = 5.497923018708371155e-14;
+
+ int tval, unsafe, n_i;
+ double x22, n, t, dely, result;
+ union ieee754_double ex2_u, scale_u;
+ fenv_t oldenv;
+
+ feholdexcept (&oldenv);
+ fesetround (FE_TONEAREST);
+
+ /* Calculate n. */
+ if (x >= 0)
+ {
+ n = x * M_1_LN2 + TWO52;
+ n -= TWO52;
}
+ else
+ {
+ n = x * M_1_LN2 - TWO52;
+ n += TWO52;
+ }
+ x = x - n*M_LN2_0;
+ if (x >= 0)
+ {
+ /* Calculate t/512. */
+ t = x + TWO43;
+ t -= TWO43;
+ x -= t;
+
+ /* Compute tval = t. */
+ tval = (int) (t * 512.0);
- /* argument reduction */
- if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
- if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
- hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
- } else {
- k = invln2*x+halF[xsb];
- t = k;
- hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
- lo = t*ln2LO[0];
- }
- x = hi - lo;
- }
- else if(hx < 0x3e300000) { /* when |x|<2**-28 */
- if(huge+x>one) return one+x;/* trigger inexact */
+ x -= __exp_deltatable[tval];
}
- else k = 0;
-
- /* x is now in primary range */
- t = x*x;
- c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
- if(k==0) return one-((x*c)/(c-2.0)-x);
- else y = one-((lo-(x*c)/(2.0-c))-hi);
- if(k >= -1021) {
- u_int32_t hy;
- GET_HIGH_WORD(hy,y);
- SET_HIGH_WORD(y,hy+(k<<20)); /* add k to y's exponent */
- return y;
- } else {
- u_int32_t hy;
- GET_HIGH_WORD(hy,y);
- SET_HIGH_WORD(y,hy+((k+1000)<<20)); /* add k to y's exponent */
- return y*twom1000;
+ else
+ {
+ /* As above, but x is negative. */
+ t = x - TWO43;
+ t += TWO43;
+ x -= t;
+
+ tval = (int) (t * 512.0);
+
+ x += __exp_deltatable[-tval];
}
+
+ /* Now, the variable x contains x + n*ln(2)_1. */
+ dely = n*M_LN2_1;
+
+ /* Compute ex2 = 2^n_0 e^(t/512+delta[t]). */
+ ex2_u.d = __exp_atable[tval+177];
+ n_i = (int)n;
+ /* 'unsafe' is 1 iff n_1 != 0. */
+ unsafe = abs(n_i) >= -DBL_MIN_EXP - 1;
+ ex2_u.ieee.exponent += n_i >> unsafe;
+
+ /* Compute scale = 2^n_1. */
+ scale_u.d = 1.0;
+ scale_u.ieee.exponent += n_i - (n_i >> unsafe);
+
+ /* Approximate e^x2 - 1, using a fourth-degree polynomial,
+ with maximum error in [-2^-10-2^-28,2^-10+2^-28]
+ less than 4.9e-19. */
+ x22 = (((0.04166666898464281565
+ * x + 0.1666666766008501610)
+ * x + 0.499999999999990008)
+ * x + 0.9999999999999976685) * x;
+ /* Allow for impact of dely. */
+ x22 -= dely + dely*x22;
+
+ /* Return result. */
+ fesetenv (&oldenv);
+
+ result = x22 * ex2_u.d + ex2_u.d;
+ if (!unsafe)
+ return result;
+ else
+ return result * scale_u.d;
+ }
+ /* Exceptional cases: */
+ else if (isless (x, himark))
+ {
+ if (x == *(const float *) &a_minf)
+ /* e^-inf == 0, with no error. */
+ return 0;
+ else
+ /* Underflow */
+ return TWOM1000 * TWOM1000;
+ }
+ else
+ /* Return x, if x is a NaN or Inf; or overflow, otherwise. */
+ return TWO1023*x;
}