1 files changed, 171 insertions, 159 deletions
diff --git a/sysdeps/libm-ieee754/e_exp.c b/sysdeps/libm-ieee754/e_exp.c
index 9eba853..a6d53eb 100644
--- a/sysdeps/libm-ieee754/e_exp.c
+++ b/sysdeps/libm-ieee754/e_exp.c
@@ -1,167 +1,179 @@
-/* @(#)e_exp.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice 
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: e_exp.c,v 1.8 1995/05/10 20:45:03 jtc Exp $";
-#endif
+/* Double-precision floating point e^x.
+   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Geoffrey Keating <geoffk@ozemail.com.au>
 
-/* __ieee754_exp(x)
- * Returns the exponential of x.
- *
- * Method
- *   1. Argument reduction:
- *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
- *	Given x, find r and integer k such that
- *
- *               x = k*ln2 + r,  |r| <= 0.5*ln2.  
- *
- *      Here r will be represented as r = hi-lo for better 
- *	accuracy.
- *
- *   2. Approximation of exp(r) by a special rational function on
- *	the interval [0,0.34658]:
- *	Write
- *	    R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
- *      We use a special Reme algorithm on [0,0.34658] to generate 
- * 	a polynomial of degree 5 to approximate R. The maximum error 
- *	of this polynomial approximation is bounded by 2**-59. In
- *	other words,
- *	    R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
- *  	(where z=r*r, and the values of P1 to P5 are listed below)
- *	and
- *	    |                  5          |     -59
- *	    | 2.0+P1*z+...+P5*z   -  R(z) | <= 2 
- *	    |                             |
- *	The computation of exp(r) thus becomes
- *                             2*r
- *		exp(r) = 1 + -------
- *		              R - r
- *                                 r*R1(r)	
- *		       = 1 + r + ----------- (for better accuracy)
- *		                  2 - R1(r)
- *	where
- *			         2       4             10
- *		R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
- *	
- *   3. Scale back to obtain exp(x):
- *	From step 1, we have
- *	   exp(x) = 2^k * exp(r)
- *
- * Special cases:
- *	exp(INF) is INF, exp(NaN) is NaN;
- *	exp(-INF) is 0, and
- *	for finite argument, only exp(0)=1 is exact.
- *
- * Accuracy:
- *	according to an error analysis, the error is always less than
- *	1 ulp (unit in the last place).
- *
- * Misc. info.
- *	For IEEE double 
- *	    if x >  7.09782712893383973096e+02 then exp(x) overflow
- *	    if x < -7.45133219101941108420e+02 then exp(x) underflow
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following 
- * constants. The decimal values may be used, provided that the 
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
- */
-
-#include "math.h"
-#include "math_private.h"
-
-#ifdef __STDC__
-static const double
-#else
-static double
-#endif
-one	= 1.0,
-halF[2]	= {0.5,-0.5,},
-huge	= 1.0e+300,
-twom1000= 9.33263618503218878990e-302,     /* 2**-1000=0x01700000,0*/
-o_threshold=  7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
-u_threshold= -7.45133219101941108420e+02,  /* 0xc0874910, 0xD52D3051 */
-ln2HI[2]   ={ 6.93147180369123816490e-01,  /* 0x3fe62e42, 0xfee00000 */
-	     -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */
-ln2LO[2]   ={ 1.90821492927058770002e-10,  /* 0x3dea39ef, 0x35793c76 */
-	     -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */
-invln2 =  1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
-P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
-P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
-P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
-P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
-P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
-
-
-#ifdef __STDC__
-	double __ieee754_exp(double x)	/* default IEEE double exp */
-#else
-	double __ieee754_exp(x)	/* default IEEE double exp */
-	double x;
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* How this works:
+   The basic design here is from
+   Shmuel Gal and Boris Bachelis, "An Accurate Elementary Mathematical
+   Library for the IEEE Floating Point Standard", ACM Trans. Math. Soft.,
+   17 (1), March 1991, pp. 26-45.
+
+   The input value, x, is written as
+
+   x = n * ln(2)_0 + t/512 + delta[t] + x + n * ln(2)_1
+
+   where:
+   - n is an integer, 1024 >= n >= -1075;
+   - ln(2)_0 is the first 43 bits of ln(2), and ln(2)_1 is the remainder, so
+     that |ln(2)_1| < 2^-32;
+   - t is an integer, 177 >= t >= -177
+   - delta is based on a table entry, delta[t] < 2^-28
+   - x is whatever is left, |x| < 2^-10
+
+   Then e^x is approximated as
+
+   e^x = 2^n_1 ( 2^n_0 e^(t/512 + delta[t])
+               + ( 2^n_0 e^(t/512 + delta[t])
+                   * ( p(x + n * ln(2)_1)
+                       - n*ln(2)_1
+                       - n*ln(2)_1 * p(x + n * ln(2)_1) ) ) )
+
+   where
+   - p(x) is a polynomial approximating e(x)-1;
+   - e^(t/512 + delta[t]) is obtained from a table;
+   - n_1 + n_0 = n, so that |n_0| < DBL_MIN_EXP-1.
+
+   If it happens that n_1 == 0 (this is the usual case), that multiplication
+   is omitted.
+   */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
 #endif
+#include <float.h>
+#include <ieee754.h>
+#include <math.h>
+#include <fenv.h>
+#include <inttypes.h>
+#include <math_private.h>
+
+extern const float __exp_deltatable[178];
+extern const double __exp_atable[355] /* __attribute__((mode(DF))) */;
+
+static const volatile double TWO1023 = 8.988465674311579539e+307;
+static const volatile double TWOM1000 = 9.3326361850321887899e-302;
+
+double
+__ieee754_exp (double x)
 {
-	double y,hi,lo,c,t;
-	int32_t k,xsb;
-	u_int32_t hx;
-
-	GET_HIGH_WORD(hx,x);
-	xsb = (hx>>31)&1;		/* sign bit of x */
-	hx &= 0x7fffffff;		/* high word of |x| */
-
-    /* filter out non-finite argument */
-	if(hx >= 0x40862E42) {			/* if |x|>=709.78... */
-            if(hx>=0x7ff00000) {
-	        u_int32_t lx;
-		GET_LOW_WORD(lx,x);
-		if(((hx&0xfffff)|lx)!=0) 
-		     return x+x; 		/* NaN */
-		else return (xsb==0)? x:0.0;	/* exp(+-inf)={inf,0} */
-	    }
-	    if(x > o_threshold) return huge*huge; /* overflow */
-	    if(x < u_threshold) return twom1000*twom1000; /* underflow */
+  static const uint32_t a_minf = 0xff800000;
+  static const double himark = 709.7827128933840868;
+  static const double lomark = -745.1332191019412221;
+  /* Check for usual case.  */
+  if (isless (x, himark) && isgreater (x, lomark))
+    {
+      static const float TWO43 = 8796093022208.0;
+      static const float TWO52 = 4503599627370496.0;
+      /* 1/ln(2).  */
+      static const double M_1_LN2 = 1.442695040888963387;
+      /* ln(2), part 1 */
+      static const double M_LN2_0 = .6931471805598903302;
+      /* ln(2), part 2 */
+      static const double M_LN2_1 = 5.497923018708371155e-14;
+
+      int tval, unsafe, n_i;
+      double x22, n, t, dely, result;
+      union ieee754_double ex2_u, scale_u;
+      fenv_t oldenv;
+
+      feholdexcept (&oldenv);
+      fesetround (FE_TONEAREST);
+
+      /* Calculate n.  */
+      if (x >= 0)
+	{
+	  n = x * M_1_LN2 + TWO52;
+	  n -= TWO52;
 	}
+      else
+	{
+	  n = x * M_1_LN2 - TWO52;
+	  n += TWO52;
+	}
+      x = x - n*M_LN2_0;
+      if (x >= 0)
+	{
+	  /* Calculate t/512.  */
+	  t = x + TWO43;
+	  t -= TWO43;
+	  x -= t;
+
+	  /* Compute tval = t.  */
+	  tval = (int) (t * 512.0);
 
-    /* argument reduction */
-	if(hx > 0x3fd62e42) {		/* if  |x| > 0.5 ln2 */ 
-	    if(hx < 0x3FF0A2B2) {	/* and |x| < 1.5 ln2 */
-		hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
-	    } else {
-		k  = invln2*x+halF[xsb];
-		t  = k;
-		hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */
-		lo = t*ln2LO[0];
-	    }
-	    x  = hi - lo;
-	} 
-	else if(hx < 0x3e300000)  {	/* when |x|<2**-28 */
-	    if(huge+x>one) return one+x;/* trigger inexact */
+	  x -= __exp_deltatable[tval];
 	}
-	else k = 0;
-
-    /* x is now in primary range */
-	t  = x*x;
-	c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	if(k==0) 	return one-((x*c)/(c-2.0)-x); 
-	else 		y = one-((lo-(x*c)/(2.0-c))-hi);
-	if(k >= -1021) {
-	    u_int32_t hy;
-	    GET_HIGH_WORD(hy,y);
-	    SET_HIGH_WORD(y,hy+(k<<20));	/* add k to y's exponent */
-	    return y;
-	} else {
-	    u_int32_t hy;
-	    GET_HIGH_WORD(hy,y);
-	    SET_HIGH_WORD(y,hy+((k+1000)<<20));	/* add k to y's exponent */
-	    return y*twom1000;
+      else
+	{
+	  /* As above, but x is negative.  */
+	  t = x - TWO43;
+	  t += TWO43;
+	  x -= t;
+
+	  tval = (int) (t * 512.0);
+
+	  x += __exp_deltatable[-tval];
 	}
+
+      /* Now, the variable x contains x + n*ln(2)_1.  */
+      dely = n*M_LN2_1;
+
+      /* Compute ex2 = 2^n_0 e^(t/512+delta[t]).  */
+      ex2_u.d = __exp_atable[tval+177];
+      n_i = (int)n;
+      /* 'unsafe' is 1 iff n_1 != 0.  */
+      unsafe = abs(n_i) >= -DBL_MIN_EXP - 1;
+      ex2_u.ieee.exponent += n_i >> unsafe;
+
+      /* Compute scale = 2^n_1.  */
+      scale_u.d = 1.0;
+      scale_u.ieee.exponent += n_i - (n_i >> unsafe);
+
+      /* Approximate e^x2 - 1, using a fourth-degree polynomial,
+	 with maximum error in [-2^-10-2^-28,2^-10+2^-28]
+	 less than 4.9e-19.  */
+      x22 = (((0.04166666898464281565
+	       * x + 0.1666666766008501610)
+	      * x + 0.499999999999990008)
+	     * x + 0.9999999999999976685) * x;
+      /* Allow for impact of dely.  */
+      x22 -= dely + dely*x22;
+
+      /* Return result.  */
+      fesetenv (&oldenv);
+
+      result = x22 * ex2_u.d + ex2_u.d;
+      if (!unsafe)
+	return result;
+      else
+	return result * scale_u.d;
+    }
+  /* Exceptional cases:  */
+  else if (isless (x, himark))
+    {
+      if (x == *(const float *) &a_minf)
+	/* e^-inf == 0, with no error.  */
+	return 0;
+      else
+	/* Underflow */
+	return TWOM1000 * TWOM1000;
+    }
+  else
+    /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
+    return TWO1023*x;
 }