diff options
author | Ulrich Drepper <drepper@redhat.com> | 1998-04-04 07:46:55 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 1998-04-04 07:46:55 +0000 |
commit | 923609d1497f3116d57b297e3e84fc07b2b15b20 (patch) | |
tree | 2d49bc0960358fd0d21e4545905c43e1d9e258d8 /sysdeps | |
parent | 0d9f67937f0c9329c35c2c0d15848ab8316dc520 (diff) | |
download | glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.zip glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.tar.gz glibc-923609d1497f3116d57b297e3e84fc07b2b15b20.tar.bz2 |
Update.
1998-04-03 23:38 Ulrich Drepper <drepper@cygnus.com>
* sysdeps/libm-ieee754/e_acos.c: Optimize by splitting large
expressions and using array variables.
* sysdeps/libm-ieee754/e_asin.c: Likewise.
* sysdeps/libm-ieee754/e_j0.c: Likewise.
* sysdeps/libm-ieee754/e_j1.c: Likewise.
* sysdeps/libm-ieee754/e_log.c: Likewise.
* sysdeps/libm-ieee754/e_pow.c: Likewise.
* sysdeps/libm-ieee754/k_cos.c: Likewise.
* sysdeps/libm-ieee754/k_sin.c: Likewise.
* sysdeps/libm-ieee754/k_tan.c: Likewise.
* sysdeps/libm-ieee754/s_atan.c: Likewise.
* sysdeps/libm-ieee754/s_erf.c: Likewise.
* sysdeps/libm-ieee754/s_expm1.c: Likewise.
* sysdeps/libm-ieee754/s_log1p.c: Likewise.
Patch by Naohiko Shimizu <nshimizu@et.u-tokai.ac.jp>.
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/libm-ieee754/e_acos.c | 67 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/e_asin.c | 48 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/e_j0.c | 92 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/e_j1.c | 72 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/e_log.c | 40 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/e_pow.c | 160 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/k_cos.c | 55 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/k_sin.c | 46 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/k_tan.c | 36 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/s_atan.c | 21 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/s_erf.c | 213 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/s_expm1.c | 24 | ||||
-rw-r--r-- | sysdeps/libm-ieee754/s_log1p.c | 27 |
13 files changed, 644 insertions, 257 deletions
diff --git a/sysdeps/libm-ieee754/e_acos.c b/sysdeps/libm-ieee754/e_acos.c index 78bdae9..fa858de 100644 --- a/sysdeps/libm-ieee754/e_acos.c +++ b/sysdeps/libm-ieee754/e_acos.c @@ -5,24 +5,27 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. + */ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $"; #endif /* __ieee754_acos(x) - * Method : + * Method : * acos(x) = pi/2 - asin(x) * acos(-x) = pi/2 + asin(x) * For |x|<=0.5 * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) * For x>0.5 * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) - * = 2asin(sqrt((1-x)/2)) + * = 2asin(sqrt((1-x)/2)) * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) * = 2f + (2c + 2s*z*R(z)) * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term @@ -40,26 +43,26 @@ static char rcsid[] = "$NetBSD: e_acos.c,v 1.9 1995/05/12 04:57:13 jtc Exp $"; #include "math.h" #include "math_private.h" +#define one qS[0] #ifdef __STDC__ -static const double +static const double #else -static double +static double #endif -one= 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ -pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ -pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ -pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ -pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ -pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ -pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ -qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ -qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ -qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ -qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ + -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ + 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ + -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ + 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ + 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */ +qS[] ={1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ + 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ + -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ + 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */ #ifdef __STDC__ double __ieee754_acos(double x) @@ -68,7 +71,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ double x; #endif { - double z,p,q,r,w,s,c,df; + double z,p,q,r,w,s,c,df,p1,p2,p3,q1,q2,q3,z2,z4,z6; int32_t hx,ix; GET_HIGH_WORD(hx,x); ix = hx&0x7fffffff; @@ -84,14 +87,34 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ if(ix<0x3fe00000) { /* |x| < 0.5 */ if(ix<=0x3c600000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/ z = x*x; +#ifdef DO_NOT_USE_THIS p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); +#else + p1 = z*pS[0]; z2=z*z; + p2 = pS[1]+z*pS[2]; z4=z2*z2; + p3 = pS[3]+z*pS[4]; z6=z4*z2; + q1 = one+z*qS[1]; + q2 = qS[2]+z*qS[3]; + p = p1 + z2*p2 + z4*p3 + z6*pS[5]; + q = q1 + z2*q2 + z4*qS[4]; +#endif r = p/q; return pio2_hi - (x - (pio2_lo-x*r)); } else if (hx<0) { /* x < -0.5 */ z = (one+x)*0.5; +#ifdef DO_NOT_USE_THIS p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); +#else + p1 = z*pS[0]; z2=z*z; + p2 = pS[1]+z*pS[2]; z4=z2*z2; + p3 = pS[3]+z*pS[4]; z6=z4*z2; + q1 = one+z*qS[1]; + q2 = qS[2]+z*qS[3]; + p = p1 + z2*p2 + z4*p3 + z6*pS[5]; + q = q1 + z2*q2 + z4*qS[4]; +#endif s = __ieee754_sqrt(z); r = p/q; w = r*s-pio2_lo; @@ -102,8 +125,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ df = s; SET_LOW_WORD(df,0); c = (z-df*df)/(s+df); +#ifdef DO_NOT_USE_THIS p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); +#else + p1 = z*pS[0]; z2=z*z; + p2 = pS[1]+z*pS[2]; z4=z2*z2; + p3 = pS[3]+z*pS[4]; z6=z4*z2; + q1 = one+z*qS[1]; + q2 = qS[2]+z*qS[3]; + p = p1 + z2*p2 + z4*p3 + z6*pS[5]; + q = q1 + z2*q2 + z4*qS[4]; +#endif r = p/q; w = r*s+c; return 2.0*(df+w); diff --git a/sysdeps/libm-ieee754/e_asin.c b/sysdeps/libm-ieee754/e_asin.c index 09d06a9..65dc318 100644 --- a/sysdeps/libm-ieee754/e_asin.c +++ b/sysdeps/libm-ieee754/e_asin.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $"; @@ -47,28 +50,27 @@ static char rcsid[] = "$NetBSD: e_asin.c,v 1.9 1995/05/12 04:57:22 jtc Exp $"; #include "math.h" #include "math_private.h" - +#define one qS[0] #ifdef __STDC__ static const double #else static double #endif -one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ huge = 1.000e+300, pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ /* coefficient for R(x^2) */ -pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ -pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ -pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ -pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ -pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ -pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ -qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ -qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ -qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ -qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ +pS[] = {1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ + -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ + 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ + -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ + 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ + 3.47933107596021167570e-05}, /* 0x3F023DE1, 0x0DFDF709 */ +qS[] = {1.0, -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ + 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ + -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ + 7.70381505559019352791e-02}; /* 0x3FB3B8C5, 0xB12E9282 */ #ifdef __STDC__ double __ieee754_asin(double x) @@ -77,7 +79,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ double x; #endif { - double t,w,p,q,c,r,s; + double t,w,p,q,c,r,s,p1,p2,p3,q1,q2,q3,z2,z4,z6; int32_t hx,ix; GET_HIGH_WORD(hx,x); ix = hx&0x7fffffff; @@ -93,8 +95,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ if(huge+x>one) return x;/* return x with inexact if x!=0*/ } else { t = x*x; +#ifdef DO_NOT_USE_THIS p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); +#else + p1 = t*pS[0]; z2=t*t; + p2 = pS[1]+t*pS[2]; z4=z2*z2; + p3 = pS[3]+t*pS[4]; z6=z4*z2; + q1 = one+t*qS[1]; + q2 = qS[2]+t*qS[3]; + p = p1 + z2*p2 + z4*p3 + z6*pS[5]; + q = q1 + z2*q2 + z4*qS[4]; +#endif w = p/q; return x+x*w; } @@ -102,8 +114,18 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ /* 1> |x|>= 0.5 */ w = one-fabs(x); t = w*0.5; +#ifdef DO_NOT_USE_THIS p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); +#else + p1 = t*pS[0]; z2=t*t; + p2 = pS[1]+t*pS[2]; z4=z2*z2; + p3 = pS[3]+t*pS[4]; z6=z4*z2; + q1 = one+t*qS[1]; + q2 = qS[2]+t*qS[3]; + p = p1 + z2*p2 + z4*p3 + z6*pS[5]; + q = q1 + z2*q2 + z4*qS[4]; +#endif s = __ieee754_sqrt(t); if(ix>=0x3FEF3333) { /* if |x| > 0.975 */ w = p/q; diff --git a/sysdeps/libm-ieee754/e_j0.c b/sysdeps/libm-ieee754/e_j0.c index ac7e00a..ff4c73b 100644 --- a/sysdeps/libm-ieee754/e_j0.c +++ b/sysdeps/libm-ieee754/e_j0.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_j0.c,v 1.8 1995/05/10 20:45:23 jtc Exp $"; @@ -78,14 +81,14 @@ one = 1.0, invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ /* R0/S0 on [0, 2.00] */ -R02 = 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */ -R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */ -R04 = 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */ -R05 = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */ -S01 = 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */ -S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ -S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ -S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ +R[] = {0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */ + -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */ + 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */ + -4.61832688532103189199e-09}, /* 0xBE33D5E7, 0x73D63FCE */ +S[] = {0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */ + 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ + 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ + 1.16614003333790000205e-09}; /* 0x3E1408BC, 0xF4745D8F */ #ifdef __STDC__ static const double zero = 0.0; @@ -100,7 +103,7 @@ static double zero = 0.0; double x; #endif { - double z, s,c,ss,cc,r,u,v; + double z, s,c,ss,cc,r,u,v,r1,r2,s1,s2,z2,z4; int32_t hx,ix; GET_HIGH_WORD(hx,x); @@ -135,8 +138,17 @@ static double zero = 0.0; } } z = x*x; +#ifdef DO_NOT_USE_THIS r = z*(R02+z*(R03+z*(R04+z*R05))); s = one+z*(S01+z*(S02+z*(S03+z*S04))); +#else + r1 = z*R[2]; z2=z*z; + r2 = R[3]+z*R[4]; z4=z2*z2; + r = r1 + z2*r2 + z4*R[5]; + s1 = one+z*S[1]; + s2 = S[2]+z*S[3]; + s = s1 + z2*s2 + z4*S[4]; +#endif if(ix < 0x3FF00000) { /* |x| < 1.00 */ return one + z*(-0.25+(r/s)); } else { @@ -150,17 +162,17 @@ static const double #else static double #endif -u00 = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */ -u01 = 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */ -u02 = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */ -u03 = 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */ -u04 = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */ -u05 = 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */ -u06 = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */ -v01 = 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */ -v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */ -v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */ -v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ +U[] = {-7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */ + 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */ + -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */ + 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */ + -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */ + 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */ + -3.98205194132103398453e-11}, /* 0xBDC5E43D, 0x693FB3C8 */ +V[] = {1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */ + 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */ + 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */ + 4.41110311332675467403e-10}; /* 0x3DFE5018, 0x3BD6D9EF */ #ifdef __STDC__ double __ieee754_y0(double x) @@ -169,7 +181,7 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ double x; #endif { - double z, s,c,ss,cc,u,v; + double z, s,c,ss,cc,u,v,z2,z4,z6,u1,u2,u3,v1,v2; int32_t hx,ix,lx; EXTRACT_WORDS(hx,lx,x); @@ -211,11 +223,21 @@ v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ return z; } if(ix<=0x3e400000) { /* x < 2**-27 */ - return(u00 + tpi*__ieee754_log(x)); + return(U[0] + tpi*__ieee754_log(x)); } z = x*x; +#ifdef DO_NOT_USE_THIS u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06))))); v = one+z*(v01+z*(v02+z*(v03+z*v04))); +#else + u1 = U[0]+z*U[1]; z2=z*z; + u2 = U[2]+z*U[3]; z4=z2*z2; + u3 = U[4]+z*U[5]; z6=z4*z2; + u = u1 + z2*u2 + z4*u3 + z6*U[6]; + v1 = one+z*V[1]; + v2 = V[2]+z*V[3]; + v = v1 + z2*v2 + z4*V[4]; +#endif return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x))); } @@ -336,7 +358,7 @@ static double pS2[5] = { #else double *p,*q; #endif - double z,r,s; + double z,r,s,z2,z4,r1,r2,r3,s1,s2,s3; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; @@ -345,8 +367,19 @@ static double pS2[5] = { else if(ix>=0x4006DB6D){p = pR3; q= pS3;} else if(ix>=0x40000000){p = pR2; q= pS2;} z = one/(x*x); +#ifdef DO_NOT_USE_THIS r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); +#else + r1 = p[0]+z*p[1]; z2=z*z; + r2 = p[2]+z*p[3]; z4=z2*z2; + r3 = p[4]+z*p[5]; + r = r1 + z2*r2 + z4*r3; + s1 = one+z*q[0]; + s2 = q[1]+z*q[2]; + s3 = q[3]+z*q[4]; + s = s1 + z2*s2 + z4*s3; +#endif return one+ r/s; } @@ -472,7 +505,7 @@ static double qS2[6] = { #else double *p,*q; #endif - double s,r,z; + double s,r,z,z2,z4,z6,r1,r2,r3,s1,s2,s3; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; @@ -481,7 +514,18 @@ static double qS2[6] = { else if(ix>=0x4006DB6D){p = qR3; q= qS3;} else if(ix>=0x40000000){p = qR2; q= qS2;} z = one/(x*x); +#ifdef DO_NOT_USE_THIS r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); +#else + r1 = p[0]+z*p[1]; z2=z*z; + r2 = p[2]+z*p[3]; z4=z2*z2; + r3 = p[4]+z*p[5]; z6=z4*z2; + r= r1 + z2*r2 + z4*r3; + s1 = one+z*q[0]; + s2 = q[1]+z*q[2]; + s3 = q[3]+z*q[4]; + s = s1 + z2*s2 + z4*s3 +z6*q[5]; +#endif return (-.125 + r/s)/x; } diff --git a/sysdeps/libm-ieee754/e_j1.c b/sysdeps/libm-ieee754/e_j1.c index cdc18dd..daf025f 100644 --- a/sysdeps/libm-ieee754/e_j1.c +++ b/sysdeps/libm-ieee754/e_j1.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_j1.c,v 1.8 1995/05/10 20:45:27 jtc Exp $"; @@ -78,15 +81,15 @@ one = 1.0, invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ /* R0/S0 on [0,2] */ -r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */ -r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */ -r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */ -r03 = 4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */ -s01 = 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */ -s02 = 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */ -s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */ -s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */ -s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ +R[] = {-6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */ + 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */ + -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */ + 4.96727999609584448412e-08}, /* 0x3E6AAAFA, 0x46CA0BD9 */ +S[] = {0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */ + 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */ + 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */ + 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */ + 1.23542274426137913908e-11}; /* 0x3DAB2ACF, 0xCFB97ED8 */ #ifdef __STDC__ static const double zero = 0.0; @@ -101,7 +104,7 @@ static double zero = 0.0; double x; #endif { - double z, s,c,ss,cc,r,u,v,y; + double z, s,c,ss,cc,r,u,v,y,r1,r2,s1,s2,s3,z2,z4; int32_t hx,ix; GET_HIGH_WORD(hx,x); @@ -134,9 +137,20 @@ static double zero = 0.0; if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */ } z = x*x; +#ifdef DO_NOT_USE_THIS r = z*(r00+z*(r01+z*(r02+z*r03))); s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05)))); r *= x; +#else + r1 = z*R[0]; z2=z*z; + r2 = R[1]+z*R[2]; z4=z2*z2; + r = r1 + z2*r2 + z4*R[3]; + r *= x; + s1 = one+z*S[1]; + s2 = S[2]+z*S[3]; + s3 = S[4]+z*S[5]; + s = s1 + z2*s2 + z4*s3; +#endif return(x*0.5+r/s); } @@ -170,7 +184,7 @@ static double V0[5] = { double x; #endif { - double z, s,c,ss,cc,u,v; + double z, s,c,ss,cc,u,v,u1,u2,v1,v2,v3,z2,z4; int32_t hx,ix,lx; EXTRACT_WORDS(hx,lx,x); @@ -211,8 +225,18 @@ static double V0[5] = { return(-tpi/x); } z = x*x; +#ifdef DO_NOT_USE_THIS u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); +#else + u1 = U0[0]+z*U0[1];z2=z*z; + u2 = U0[2]+z*U0[3];z4=z2*z2; + u = u1 + z2*u2 + z4*U0[4]; + v1 = one+z*V0[0]; + v2 = V0[1]+z*V0[2]; + v3 = V0[3]+z*V0[4]; + v = v1 + z2*v2 + z4*v3; +#endif return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x)); } @@ -334,7 +358,7 @@ static double ps2[5] = { #else double *p,*q; #endif - double z,r,s; + double z,r,s,r1,r2,r3,s1,s2,s3,z2,z4; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; @@ -343,8 +367,19 @@ static double ps2[5] = { else if(ix>=0x4006DB6D){p = pr3; q= ps3;} else if(ix>=0x40000000){p = pr2; q= ps2;} z = one/(x*x); +#ifdef DO_NOT_USE_THIS r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); +#else + r1 = p[0]+z*p[1]; z2=z*z; + r2 = p[2]+z*p[3]; z4=z2*z2; + r3 = p[4]+z*p[5]; + r = r1 + z2*r2 + z4*r3; + s1 = one+z*q[0]; + s2 = q[1]+z*q[2]; + s3 = q[3]+z*q[4]; + s = s1 + z2*s2 + z4*s3; +#endif return one+ r/s; } @@ -471,7 +506,7 @@ static double qs2[6] = { #else double *p,*q; #endif - double s,r,z; + double s,r,z,r1,r2,r3,s1,s2,s3,z2,z4,z6; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; @@ -480,7 +515,18 @@ static double qs2[6] = { else if(ix>=0x4006DB6D){p = qr3; q= qs3;} else if(ix>=0x40000000){p = qr2; q= qs2;} z = one/(x*x); +#ifdef DO_NOT_USE_THIS r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); +#else + r1 = p[0]+z*p[1]; z2=z*z; + r2 = p[2]+z*p[3]; z4=z2*z2; + r3 = p[4]+z*p[5]; z6=z4*z2; + r = r1 + z2*r2 + z4*r3; + s1 = one+z*q[0]; + s2 = q[1]+z*q[2]; + s3 = q[3]+z*q[4]; + s = s1 + z2*s2 + z4*s3 + z6*q[5]; +#endif return (.375 + r/s)/x; } diff --git a/sysdeps/libm-ieee754/e_log.c b/sysdeps/libm-ieee754/e_log.c index e4e6eab..5b8c7b3 100644 --- a/sysdeps/libm-ieee754/e_log.c +++ b/sysdeps/libm-ieee754/e_log.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $"; @@ -67,7 +70,8 @@ static char rcsid[] = "$NetBSD: e_log.c,v 1.8 1995/05/10 20:45:49 jtc Exp $"; #include "math.h" #include "math_private.h" - +#define half Lg[8] +#define two Lg[9] #ifdef __STDC__ static const double #else @@ -76,14 +80,16 @@ static double ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ -Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ - + Lg[] = {0.0, + 6.666666666666735130e-01, /* 3FE55555 55555593 */ + 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ + 2.857142874366239149e-01, /* 3FD24924 94229359 */ + 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ + 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ + 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ + 1.479819860511658591e-01, /* 3FC2F112 DF3E5244 */ + 0.5, + 2.0}; #ifdef __STDC__ static const double zero = 0.0; #else @@ -97,7 +103,7 @@ static double zero = 0.0; double x; #endif { - double hfsq,f,s,z,R,w,t1,t2,dk; + double hfsq,f,s,z,R,w,t1,t2,dk,t11,t12,t21,t22,w2,zw2; int32_t k,hx,i,j; u_int32_t lx; @@ -121,20 +127,28 @@ static double zero = 0.0; if((0x000fffff&(2+hx))<3) { /* |f| < 2**-20 */ if(f==zero) if(k==0) return zero; else {dk=(double)k; return dk*ln2_hi+dk*ln2_lo;} - R = f*f*(0.5-0.33333333333333333*f); + R = f*f*(half-0.33333333333333333*f); if(k==0) return f-R; else {dk=(double)k; return dk*ln2_hi-((R-dk*ln2_lo)-f);} } - s = f/(2.0+f); + s = f/(two+f); dk = (double)k; z = s*s; i = hx-0x6147a; w = z*z; j = 0x6b851-hx; +#ifdef DO_NOT_USE_THIS t1= w*(Lg2+w*(Lg4+w*Lg6)); t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - i |= j; R = t2+t1; +#else + t21 = Lg[5]+w*Lg[7]; w2=w*w; + t22 = Lg[1]+w*Lg[3]; zw2=z*w2; + t11 = Lg[4]+w*Lg[6]; + t12 = w*Lg[2]; + R = t12 + w2*t11 + z*t22 + zw2*t21; +#endif + i |= j; if(i>0) { hfsq=0.5*f*f; if(k==0) return f-(hfsq-s*(hfsq+R)); else diff --git a/sysdeps/libm-ieee754/e_pow.c b/sysdeps/libm-ieee754/e_pow.c index 4b9ba3d..02b16c5 100644 --- a/sysdeps/libm-ieee754/e_pow.c +++ b/sysdeps/libm-ieee754/e_pow.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $"; @@ -61,6 +64,33 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $"; #include "math.h" #include "math_private.h" +#define zero C[0] +#define one C[1] +#define two C[2] +#define two53 C[3] +#define huge C[4] +#define tiny C[5] +#define L1 C[6] +#define L2 C[7] +#define L3 C[8] +#define L4 C[9] +#define L5 C[10] +#define L6 C[11] +#define P1 C[12] +#define P2 C[13] +#define P3 C[14] +#define P4 C[15] +#define P5 C[16] +#define lg2 C[17] +#define lg2_h C[18] +#define lg2_l C[19] +#define ovt C[20] +#define cp C[21] +#define cp_h C[22] +#define cp_l C[23] +#define ivln2 C[24] +#define ivln2_h C[25] +#define ivln2_l C[26] #ifdef __STDC__ static const double @@ -70,34 +100,34 @@ static double bp[] = {1.0, 1.5,}, dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */ -zero = 0.0, -one = 1.0, -two = 2.0, -two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */ -huge = 1.0e300, -tiny = 1.0e-300, - /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */ -L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */ -L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */ -L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */ -L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */ -L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */ -L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */ -P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ -P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ -P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ -P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ -P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */ -lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ -lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */ -lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */ -ovt = 8.0085662595372944372e-0017, /* -(1024-log2(ovfl+.5ulp)) */ -cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */ -cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */ -cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/ -ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */ -ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/ -ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ +C[] = { +0.0, +1.0, +2.0, +9007199254740992.0 , +1.0e300, +1.0e-300, +5.99999999999994648725e-01 , +4.28571428578550184252e-01 , +3.33333329818377432918e-01 , +2.72728123808534006489e-01 , +2.30660745775561754067e-01 , +2.06975017800338417784e-01 , +1.66666666666666019037e-01 , +-2.77777777770155933842e-03 , +6.61375632143793436117e-05 , +-1.65339022054652515390e-06 , +4.13813679705723846039e-08 , +6.93147180559945286227e-01 , +6.93147182464599609375e-01 , +-1.90465429995776804525e-09 , +8.0085662595372944372e-0017 , +9.61796693925975554329e-01 , +9.61796700954437255859e-01 , +-7.02846165095275826516e-09 , +1.44269504088896338700e+00 , +1.44269502162933349609e+00 , +1.92596299112661746887e-08 }; #ifdef __STDC__ double __ieee754_pow(double x, double y) @@ -107,7 +137,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ #endif { double z,ax,z_h,z_l,p_h,p_l; - double y1,t1,t2,r,s,t,u,v,w; + double y1,t1,t2,r,s,t,u,v,w, t12,t14,r_1,r_2,r_3; int32_t i,j,k,yisint,n; int32_t hx,hy,ix,iy; u_int32_t lx,ly; @@ -117,7 +147,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ ix = hx&0x7fffffff; iy = hy&0x7fffffff; /* y==zero: x**0 = 1 */ - if((iy|ly)==0) return one; + if((iy|ly)==0) return C[1]; /* +-NaN return x+y */ if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) || @@ -150,12 +180,12 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ if(((ix-0x3ff00000)|lx)==0) return y - y; /* inf**+-1 is NaN */ else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */ - return (hy>=0)? y: zero; + return (hy>=0)? y: C[0]; else /* (|x|<1)**-,+inf = inf,0 */ - return (hy<0)?-y: zero; + return (hy<0)?-y: C[0]; } if(iy==0x3ff00000) { /* y is +-1 */ - if(hy<0) return one/x; else return x; + if(hy<0) return C[1]/x; else return x; } if(hy==0x40000000) return x*x; /* y is 2 */ if(hy==0x3fe00000) { /* y is 0.5 */ @@ -169,7 +199,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ if(lx==0) { if(ix==0x7ff00000||ix==0||ix==0x3ff00000){ z = ax; /*x is +-0,+-inf,+-1*/ - if(hy<0) z = one/z; /* z = (1/|x|) */ + if(hy<0) z = C[1]/z; /* z = (1/|x|) */ if(hx<0) { if(((ix-0x3ff00000)|yisint)==0) { z = (z-z)/(z-z); /* (-1)**non-int is NaN */ @@ -186,27 +216,27 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ /* |y| is huge */ if(iy>0x41e00000) { /* if |y| > 2**31 */ if(iy>0x43f00000){ /* if |y| > 2**64, must o/uflow */ - if(ix<=0x3fefffff) return (hy<0)? huge*huge:tiny*tiny; - if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny; + if(ix<=0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5]; + if(ix>=0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5]; } /* over/underflow if x is not close to one */ - if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny; - if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny; + if(ix<0x3fefffff) return (hy<0)? C[4]*C[4]:C[5]*C[5]; + if(ix>0x3ff00000) return (hy>0)? C[4]*C[4]:C[5]*C[5]; /* now |1-x| is tiny <= 2**-20, suffice to compute log(x) by x-x^2/2+x^3/3-x^4/4 */ t = x-1; /* t has 20 trailing zeros */ w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25)); - u = ivln2_h*t; /* ivln2_h has 21 sig. bits */ - v = t*ivln2_l-w*ivln2; + u = C[25]*t; /* ivln2_h has 21 sig. bits */ + v = t*C[26]-w*C[24]; t1 = u+v; SET_LOW_WORD(t1,0); t2 = v-(t1-u); } else { - double s2,s_h,s_l,t_h,t_l; + double s2,s_h,s_l,t_h,t_l,s22,s24,s26,r1,r2,r3; n = 0; /* take care subnormal number */ if(ix<0x00100000) - {ax *= two53; n -= 53; GET_HIGH_WORD(ix,ax); } + {ax *= C[3]; n -= 53; GET_HIGH_WORD(ix,ax); } n += ((ix)>>20)-0x3ff; j = ix&0x000fffff; /* determine interval */ @@ -218,18 +248,25 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ - v = one/(ax+bp[k]); + v = C[1]/(ax+bp[k]); s = u*v; s_h = s; SET_LOW_WORD(s_h,0); /* t_h=ax+bp[k] High */ - t_h = zero; + t_h = C[0]; SET_HIGH_WORD(t_h,((ix>>1)|0x20000000)+0x00080000+(k<<18)); t_l = ax - (t_h-bp[k]); s_l = v*((u-s_h*t_h)-s_h*t_l); /* compute log(ax) */ s2 = s*s; +#ifdef DO_NOT_USE_THIS r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6))))); +#else + r1 = C[10]+s2*C[11]; s22=s2*s2; + r2 = C[8]+s2*C[9]; s24=s22*s22; + r3 = C[6]+s2*C[7]; s26=s24*s22; + r = r3*s22 + r2*s24 + r1*s26; +#endfi r += s_l*(s_h+s); s2 = s_h*s_h; t_h = 3.0+s2+r; @@ -242,8 +279,8 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ p_h = u+v; SET_LOW_WORD(p_h,0); p_l = v-(p_h-u); - z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ - z_l = cp_l*p_h+p_l*cp+dp_l[k]; + z_h = C[22]*p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = C[23]*p_h+p_l*C[21]+dp_l[k]; /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ t = (double)n; t1 = (((z_h+z_l)+dp_h[k])+t); @@ -251,9 +288,9 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ t2 = z_l-(((t1-t)-dp_h[k])-z_h); } - s = one; /* s (sign of result -ve**odd) = -1 else = 1 */ + s = C[1]; /* s (sign of result -ve**odd) = -1 else = 1 */ if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0) - s = -one;/* (-ve)**(odd int) */ + s = -C[1];/* (-ve)**(odd int) */ /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ y1 = y; @@ -264,15 +301,15 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ EXTRACT_WORDS(j,i,z); if (j>=0x40900000) { /* z >= 1024 */ if(((j-0x40900000)|i)!=0) /* if z > 1024 */ - return s*huge*huge; /* overflow */ + return s*C[4]*C[4]; /* overflow */ else { - if(p_l+ovt>z-p_h) return s*huge*huge; /* overflow */ + if(p_l+C[20]>z-p_h) return s*C[4]*C[4]; /* overflow */ } } else if((j&0x7fffffff)>=0x4090cc00 ) { /* z <= -1075 */ if(((j-0xc090cc00)|i)!=0) /* z < -1075 */ - return s*tiny*tiny; /* underflow */ + return s*C[5]*C[5]; /* underflow */ else { - if(p_l<=z-p_h) return s*tiny*tiny; /* underflow */ + if(p_l<=z-p_h) return s*C[5]*C[5]; /* underflow */ } } /* @@ -284,7 +321,7 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ if(i>0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */ n = j+(0x00100000>>(k+1)); k = ((n&0x7fffffff)>>20)-0x3ff; /* new k for n */ - t = zero; + t = C[0]; SET_HIGH_WORD(t,n&~(0x000fffff>>k)); n = ((n&0x000fffff)|0x00100000)>>(20-k); if(j<0) n = -n; @@ -292,14 +329,21 @@ ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ } t = p_l+p_h; SET_LOW_WORD(t,0); - u = t*lg2_h; - v = (p_l-(t-p_h))*lg2+t*lg2_l; + u = t*C[18]; + v = (p_l-(t-p_h))*C[17]+t*C[19]; z = u+v; w = v-(z-u); t = z*z; - t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); - r = (z*t1)/(t1-two)-(w+z*w); - z = one-(r-z); +#ifdef DO_NOT_USE_THIS + t1 = z - t*(C[12]+t*(C[13]+t*(C[14]+t*(C[15]+t*C[16])))); +#else + r_1 = C[15]+t*C[16]; t12 = t*t; + r_2 = C[13]+t*C[14]; t14 = t12*t12; + r_3 = t*C[12]; + t1 = z - r_3 - t12*r_2 - t14*r_1; +#endif + r = (z*t1)/(t1-C[2])-(w+z*w); + z = C[1]-(r-z); GET_HIGH_WORD(j,z); j += (n<<20); if((j>>20)<=0) z = __scalbn(z,n); /* subnormal output */ diff --git a/sysdeps/libm-ieee754/k_cos.c b/sysdeps/libm-ieee754/k_cos.c index d8740b3..7e38ef7 100644 --- a/sysdeps/libm-ieee754/k_cos.c +++ b/sysdeps/libm-ieee754/k_cos.c @@ -5,10 +5,13 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $"; @@ -18,7 +21,7 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $"; * __kernel_cos( x, y ) * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 * Input x is assumed to be bounded by ~pi/4 in magnitude. - * Input y is the tail of x. + * Input y is the tail of x. * * Algorithm * 1. Since cos(-x) = cos(x), we need only to consider positive x. @@ -28,15 +31,15 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $"; * 4 14 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x * where the remez error is - * + * * | 2 4 6 8 10 12 14 | -58 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 - * | | - * - * 4 6 8 10 12 14 + * | | + * + * 4 6 8 10 12 14 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then * cos(x) = 1 - x*x/2 + r - * since cos(x+y) ~ cos(x) - sin(x)*y + * since cos(x+y) ~ cos(x) - sin(x)*y * ~ cos(x) - x*y, * a correction term is necessary in cos(x) and hence * cos(x+y) = 1 - (x*x/2 - (r - x*y)) @@ -53,17 +56,18 @@ static char rcsid[] = "$NetBSD: k_cos.c,v 1.8 1995/05/10 20:46:22 jtc Exp $"; #include "math_private.h" #ifdef __STDC__ -static const double +static const double #else -static double +static double #endif -one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ -C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ -C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ -C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ -C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ -C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ -C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ +C[] = { + 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ + 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ + -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ + 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ + -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ + 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ + -1.13596475577881948265e-11}; /* 0xBDA8FAE9, 0xBE8838D4 */ #ifdef __STDC__ double __kernel_cos(double x, double y) @@ -72,17 +76,24 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ double x,y; #endif { - double a,hz,z,r,qx; + double a,hz,z,r,qx,r1,r2,r3,z1,z2,z3; int32_t ix; + z = x*x; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; /* ix = |x|'s high word*/ if(ix<0x3e400000) { /* if x < 2**27 */ - if(((int)x)==0) return one; /* generate inexact */ + if(((int)x)==0) return C[0]; /* generate inexact */ } - z = x*x; +#ifdef DO_NOT_USE_THIS r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); - if(ix < 0x3FD33333) /* if |x| < 0.3 */ - return one - (0.5*z - (z*r - x*y)); +#else + r1=z*C[6];r1=r1+C[5];z1=z*z; + r2=z*C[4];r2=r2+C[3];z2=z1*z; + r3=z*C[2];r3=r3+C[1];z3=z2*z1; + r=z3*r1+z2*r2+z*r3; +#endif + if(ix < 0x3FD33333) /* if |x| < 0.3 */ + return C[0] - (0.5*z - (z*r - x*y)); else { if(ix > 0x3fe90000) { /* x > 0.78125 */ qx = 0.28125; @@ -90,7 +101,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */ } hz = 0.5*z-qx; - a = one-qx; + a = C[0]-qx; return a - (hz - (z*r-x*y)); } } diff --git a/sysdeps/libm-ieee754/k_sin.c b/sysdeps/libm-ieee754/k_sin.c index 86b9552..49c5922 100644 --- a/sysdeps/libm-ieee754/k_sin.c +++ b/sysdeps/libm-ieee754/k_sin.c @@ -5,10 +5,13 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $"; @@ -18,24 +21,24 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $"; * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 * Input x is assumed to be bounded by ~pi/4 in magnitude. * Input y is the tail of x. - * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). * * Algorithm - * 1. Since sin(-x) = -sin(x), we need only to consider positive x. + * 1. Since sin(-x) = -sin(x), we need only to consider positive x. * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. * 3. sin(x) is approximated by a polynomial of degree 13 on * [0,pi/4] * 3 13 * sin(x) ~ x + S1*x + ... + S6*x * where - * + * * |sin(x) 2 4 6 8 10 12 | -58 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 - * | x | - * + * | x | + * * 4. sin(x+y) = sin(x) + sin'(x')*y * ~ sin(x) + (1-x*x/2)*y - * For better accuracy, let + * For better accuracy, let * 3 2 2 2 2 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) * then 3 2 @@ -46,17 +49,18 @@ static char rcsid[] = "$NetBSD: k_sin.c,v 1.8 1995/05/10 20:46:31 jtc Exp $"; #include "math_private.h" #ifdef __STDC__ -static const double +static const double #else -static double +static double #endif -half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ -S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ -S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ -S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ -S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ -S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ -S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ +S[] = { + 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ + -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ + 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ + -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ + 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ + -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ + 1.58969099521155010221e-10}; /* 0x3DE5D93A, 0x5ACFD57C */ #ifdef __STDC__ double __kernel_sin(double x, double y, int iy) @@ -65,7 +69,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ double x,y; int iy; /* iy=0 if y is zero */ #endif { - double z,r,v; + double z,r,v,z1,r1,r2; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; /* high word of x */ @@ -73,7 +77,15 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ {if((int)x==0) return x;} /* generate inexact */ z = x*x; v = z*x; +#ifdef DO_NOT_USE_THIS r = S2+z*(S3+z*(S4+z*(S5+z*S6))); if(iy==0) return x+v*(S1+z*r); else return x-((z*(half*y-v*r)-y)-v*S1); +#else + r1 = S[5]+z*S[6]; z1 = z*z*z; + r2 = S[3]+z*S[4]; + r = S[2] + z*r2 + z1*r1; + if(iy==0) return x+v*(S[1]+z*r); + else return x-((z*(S[0]*y-v*r)-y)-v*S[1]); +#endif } diff --git a/sysdeps/libm-ieee754/k_tan.c b/sysdeps/libm-ieee754/k_tan.c index aa9c67c..55dafb8 100644 --- a/sysdeps/libm-ieee754/k_tan.c +++ b/sysdeps/libm-ieee754/k_tan.c @@ -5,10 +5,13 @@ * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice + * software is freely granted, provided that this notice * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $"; @@ -18,25 +21,25 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $"; * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 * Input x is assumed to be bounded by ~pi/4 in magnitude. * Input y is the tail of x. - * Input k indicates whether tan (if k=1) or + * Input k indicates whether tan (if k=1) or * -1/tan (if k= -1) is returned. * * Algorithm - * 1. Since tan(-x) = -tan(x), we need only to consider positive x. + * 1. Since tan(-x) = -tan(x), we need only to consider positive x. * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. * 3. tan(x) is approximated by a odd polynomial of degree 27 on * [0,0.67434] * 3 27 * tan(x) ~ x + T1*x + ... + T13*x * where - * + * * |tan(x) 2 4 26 | -59.2 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 - * | x | - * + * | x | + * * Note: tan(x+y) = tan(x) + tan'(x)*y * ~ tan(x) + (1+x*x)*y - * Therefore, for better accuracy in computing tan(x+y), let + * Therefore, for better accuracy in computing tan(x+y), let * 3 2 2 2 2 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) * then @@ -51,9 +54,9 @@ static char rcsid[] = "$NetBSD: k_tan.c,v 1.8 1995/05/10 20:46:37 jtc Exp $"; #include "math.h" #include "math_private.h" #ifdef __STDC__ -static const double +static const double #else -static double +static double #endif one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ @@ -81,7 +84,7 @@ T[] = { double x,y; int iy; #endif { - double z,r,v,w,s; + double z,r,v,w,s,r1,r2,r3,v1,v2,v3,w2,w4; int32_t ix,hx; GET_HIGH_WORD(hx,x); ix = hx&0x7fffffff; /* high word of |x| */ @@ -105,8 +108,19 @@ T[] = { * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) */ +#ifdef DO_NOT_USE_THIS r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11])))); v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12]))))); +#else + v1 = T[10]+w*T[12]; w2=w*w; + v2 = T[6]+w*T[8]; w4=w2*w2; + v3 = T[2]+w*T[4]; v1=z*v1; + r1 = T[9]+w*T[11]; v2=z*v2; + r2 = T[5]+w*T[7]; v3=z*v3; + r3 = T[1]+w*T[3]; + v = v3 + w2*v2 + w4*v1; + r = r3 + w2*r2 + w4*r1; +#endif s = z*x; r = y + z*(s*(r+v)+y); r += T[0]*s; @@ -116,7 +130,7 @@ T[] = { return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r))); } if(iy==1) return w; - else { /* if allow error up to 2 ulp, + else { /* if allow error up to 2 ulp, simply return -1.0/(x+r) here */ /* compute -1.0/(x+r) accurately */ double a,t; diff --git a/sysdeps/libm-ieee754/s_atan.c b/sysdeps/libm-ieee754/s_atan.c index 6657b15..cad3ba1 100644 --- a/sysdeps/libm-ieee754/s_atan.c +++ b/sysdeps/libm-ieee754/s_atan.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: s_atan.c,v 1.8 1995/05/10 20:46:45 jtc Exp $"; @@ -92,7 +95,7 @@ huge = 1.0e300; double x; #endif { - double w,s1,s2,z; + double w,s1,z,s,w2,w4,s11,s12,s13,s21,s22,s23; int32_t ix,hx,id; GET_HIGH_WORD(hx,x); @@ -129,6 +132,7 @@ huge = 1.0e300; z = x*x; w = z*z; /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ +#ifdef DO_NOT_USE_THIS s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10]))))); s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9])))); if (id<0) return x - x*(s1+s2); @@ -136,6 +140,21 @@ huge = 1.0e300; z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); return (hx<0)? -z:z; } +#else + s11 = aT[8]+w*aT[10]; w2=w*w; + s12 = aT[4]+w*aT[6]; w4=w2*w2; + s13 = aT[0]+w*aT[2]; + s21 = aT[7]+w*aT[9]; + s22 = aT[3]+w*aT[5]; + s23 = w*aT[1]; + s1 = s13 + w2*s12 + w4*s11; + s = s23 + w2*s22 + w4*s21 + z*s1; + if (id<0) return x - x*(s); + else { + z = atanhi[id] - ((x*(s) - atanlo[id]) - x); + return (hx<0)? -z:z; + } +#endif } weak_alias (__atan, atan) #ifdef NO_LONG_DOUBLE diff --git a/sysdeps/libm-ieee754/s_erf.c b/sysdeps/libm-ieee754/s_erf.c index f64f0fc..d8b6629 100644 --- a/sysdeps/libm-ieee754/s_erf.c +++ b/sysdeps/libm-ieee754/s_erf.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $"; @@ -128,68 +131,68 @@ erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */ */ efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */ efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */ -pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ -pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ -pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ -pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ -pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */ -qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ -qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ -qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ -qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ -qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */ +pp[] = {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ + -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ + -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ + -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ + -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */ +qq[] = {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ + 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ + 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ + 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ + -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */ /* * Coefficients for approximation to erf in [0.84375,1.25] */ -pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ -pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ -pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ -pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ -pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ -pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ -pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */ -qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ -qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ -qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ -qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ -qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ -qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */ +pa[] = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ + 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ + -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ + 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ + -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ + 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ + -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */ +qa[] = {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ + 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ + 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ + 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ + 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ + 1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */ /* * Coefficients for approximation to erfc in [1.25,1/0.35] */ -ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ -ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ -ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ -ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ -ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ -ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ -ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ -ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */ -sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ -sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ -sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ -sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ -sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ -sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ -sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ -sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */ +ra[] = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ + -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ + -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ + -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ + -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ + -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ + -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ + -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */ +sa[] = {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ + 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ + 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ + 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ + 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ + 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ + 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ + -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */ /* * Coefficients for approximation to erfc in [1/.35,28] */ -rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ -rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ -rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ -rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ -rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ -rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ -rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */ -sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ -sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ -sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ -sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ -sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ -sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ -sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ +rb[] = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ + -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ + -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ + -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ + -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ + -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ + -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */ +sb[] = {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ + 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ + 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ + 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ + 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ + 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ + -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */ #ifdef __STDC__ double __erf(double x) @@ -208,21 +211,46 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ } if(ix < 0x3feb0000) { /* |x|<0.84375 */ + double r1,r2,s1,s2,s3,z2,z4; if(ix < 0x3e300000) { /* |x|<2**-28 */ if (ix < 0x00800000) return 0.125*(8.0*x+efx8*x); /*avoid underflow */ return x + efx*x; } z = x*x; +#ifdef DO_NOT_USE_THIS r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); +#else + r1 = pp[0]+z*pp[1]; z2=z*z; + r2 = pp[2]+z*pp[3]; z4=z2*z2; + s1 = one+z*qq[1]; + s2 = qq[2]+z*qq[3]; + s3 = qq[4]+z*qq[5]; + r = r1 + z2*r2 + z4*pp[4]; + s = s1 + z2*s2 + z4*s3; +#endif y = r/s; return x + x*y; } if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4; s = fabs(x)-one; +#ifdef DO_NOT_USE_THIS P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); +#else + P1 = pa[0]+s*pa[1]; s2=s*s; + Q1 = one+s*qa[1]; s4=s2*s2; + P2 = pa[2]+s*pa[3]; s6=s4*s2; + Q2 = qa[2]+s*qa[3]; + P3 = pa[4]+s*pa[5]; + Q3 = qa[4]+s*qa[5]; + P4 = s6*pa[6]; + Q4 = s6*qa[6]; + P = P1 + s2*P2 + s4*P3 + s6*P4; + Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4; +#endif if(hx>=0) return erx + P/Q; else return -erx - P/Q; } if (ix >= 0x40180000) { /* inf>|x|>=6 */ @@ -231,15 +259,42 @@ sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ x = fabs(x); s = one/(x*x); if(ix< 0x4006DB6E) { /* |x| < 1/0.35 */ +#ifdef DO_NOT_USE_THIS R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( ra5+s*(ra6+s*ra7)))))); S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( sa5+s*(sa6+s*(sa7+s*sa8))))))); +#else + double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8; + R1 = ra[0]+s*ra[1];s2 = s*s; + S1 = one+s*sa[1]; s4 = s2*s2; + R2 = ra[2]+s*ra[3];s6 = s4*s2; + S2 = sa[2]+s*sa[3];s8 = s4*s4; + R3 = ra[4]+s*ra[5]; + S3 = sa[4]+s*sa[5]; + R4 = ra[6]+s*ra[7]; + S4 = sa[6]+s*sa[7]; + R = R1 + s2*R2 + s4*R3 + s6*R4; + S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8]; +#endif } else { /* |x| >= 1/0.35 */ +#ifdef DO_NOT_USE_THIS R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( rb5+s*rb6))))); S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( sb5+s*(sb6+s*sb7)))))); +#else + double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6; + R1 = rb[0]+s*rb[1];s2 = s*s; + S1 = one+s*sb[1]; s4 = s2*s2; + R2 = rb[2]+s*rb[3];s6 = s4*s2; + S2 = sb[2]+s*sb[3]; + R3 = rb[4]+s*rb[5]; + S3 = sb[4]+s*sb[5]; + S4 = sb[6]+s*sb[7]; + R = R1 + s2*R2 + s4*R3 + s6*rb[6]; + S = S1 + s2*S2 + s4*S3 + s6*S4; +#endif } z = x; SET_LOW_WORD(z,0); @@ -269,11 +324,22 @@ weak_alias (__erf, erfl) } if(ix < 0x3feb0000) { /* |x|<0.84375 */ + double r1,r2,s1,s2,s3,z2,z4; if(ix < 0x3c700000) /* |x|<2**-56 */ return one-x; z = x*x; +#ifdef DO_NOT_USE_THIS r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); +#else + r1 = pp[0]+z*pp[1]; z2=z*z; + r2 = pp[2]+z*pp[3]; z4=z2*z2; + s1 = one+z*qq[1]; + s2 = qq[2]+z*qq[3]; + s3 = qq[4]+z*qq[5]; + r = r1 + z2*r2 + z4*pp[4]; + s = s1 + z2*s2 + z4*s3; +#endif y = r/s; if(hx < 0x3fd00000) { /* x<1/4 */ return one-(x+x*y); @@ -284,9 +350,23 @@ weak_alias (__erf, erfl) } } if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4; s = fabs(x)-one; +#ifdef DO_NOT_USE_THIS P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); +#else + P1 = pa[0]+s*pa[1]; s2=s*s; + Q1 = one+s*qa[1]; s4=s2*s2; + P2 = pa[2]+s*pa[3]; s6=s4*s2; + Q2 = qa[2]+s*qa[3]; + P3 = pa[4]+s*pa[5]; + Q3 = qa[4]+s*qa[5]; + P4 = s6*pa[6]; + Q4 = s6*qa[6]; + P = P1 + s2*P2 + s4*P3 + s6*P4; + Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4; +#endif if(hx>=0) { z = one-erx; return z - P/Q; } else { @@ -297,16 +377,43 @@ weak_alias (__erf, erfl) x = fabs(x); s = one/(x*x); if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/ +#ifdef DO_NOT_USE_THIS R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( ra5+s*(ra6+s*ra7)))))); S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( sa5+s*(sa6+s*(sa7+s*sa8))))))); +#else + double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8; + R1 = ra[0]+s*ra[1];s2 = s*s; + S1 = one+s*sa[1]; s4 = s2*s2; + R2 = ra[2]+s*ra[3];s6 = s4*s2; + S2 = sa[2]+s*sa[3];s8 = s4*s4; + R3 = ra[4]+s*ra[5]; + S3 = sa[4]+s*sa[5]; + R4 = ra[6]+s*ra[7]; + S4 = sa[6]+s*sa[7]; + R = R1 + s2*R2 + s4*R3 + s6*R4; + S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8]; +#endif } else { /* |x| >= 1/.35 ~ 2.857143 */ + double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6; if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */ +#ifdef DO_NOT_USE_THIS R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( rb5+s*rb6))))); S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( sb5+s*(sb6+s*sb7)))))); +#else + R1 = rb[0]+s*rb[1];s2 = s*s; + S1 = one+s*sb[1]; s4 = s2*s2; + R2 = rb[2]+s*rb[3];s6 = s4*s2; + S2 = sb[2]+s*sb[3]; + R3 = rb[4]+s*rb[5]; + S3 = sb[4]+s*sb[5]; + S4 = sb[6]+s*sb[7]; + R = R1 + s2*R2 + s4*R3 + s6*rb[6]; + S = S1 + s2*S2 + s4*S3 + s6*S4; +#endif } z = x; SET_LOW_WORD(z,0); diff --git a/sysdeps/libm-ieee754/s_expm1.c b/sysdeps/libm-ieee754/s_expm1.c index c8354b7..ed1aba5 100644 --- a/sysdeps/libm-ieee754/s_expm1.c +++ b/sysdeps/libm-ieee754/s_expm1.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $"; @@ -111,7 +114,7 @@ static char rcsid[] = "$NetBSD: s_expm1.c,v 1.8 1995/05/10 20:47:09 jtc Exp $"; #include "math.h" #include "math_private.h" - +#define one Q[0] #ifdef __STDC__ static const double #else @@ -125,11 +128,11 @@ ln2_hi = 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */ ln2_lo = 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */ invln2 = 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */ /* scaled coefficients related to expm1 */ -Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */ -Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ -Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ -Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ -Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ +Q[] = {1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */ + 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ + -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ + 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ + -2.01099218183624371326e-07}; /* BE8AFDB7 6E09C32D */ #ifdef __STDC__ double __expm1(double x) @@ -138,7 +141,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ double x; #endif { - double y,hi,lo,c,t,e,hxs,hfx,r1; + double y,hi,lo,c,t,e,hxs,hfx,r1,h2,h4,R1,R2,R3; int32_t k,xsb; u_int32_t hx; @@ -190,7 +193,14 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ /* x is now in primary range */ hfx = 0.5*x; hxs = x*hfx; +#ifdef DO_NOT_USE_THIS r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); +#else + R1 = one+hxs*Q[1]; h2 = hxs*hxs; + R2 = Q[2]+hxs*Q[3]; h4 = h2*h2; + R3 = Q[4]+hxs*Q[5]; + r1 = R1 + h2*R2 + h4*R3; +#endif t = 3.0-r1*hfx; e = hxs*((r1-t)/(6.0 - x*t)); if(k==0) return x - (x*e-hxs); /* c is 0 */ diff --git a/sysdeps/libm-ieee754/s_log1p.c b/sysdeps/libm-ieee754/s_log1p.c index 086c0dc..4ca01b1 100644 --- a/sysdeps/libm-ieee754/s_log1p.c +++ b/sysdeps/libm-ieee754/s_log1p.c @@ -9,6 +9,9 @@ * is preserved. * ==================================================== */ +/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25, + for performance improvement on pipelined processors. +*/ #if defined(LIBM_SCCS) && !defined(lint) static char rcsid[] = "$NetBSD: s_log1p.c,v 1.8 1995/05/10 20:47:46 jtc Exp $"; @@ -90,13 +93,13 @@ static double ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ -Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ -Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ -Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ -Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ -Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ -Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ -Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +Lp[] = {0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */ + 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ + 2.857142874366239149e-01, /* 3FD24924 94229359 */ + 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ + 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ + 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ + 1.479819860511658591e-01}; /* 3FC2F112 DF3E5244 */ #ifdef __STDC__ static const double zero = 0.0; @@ -111,7 +114,7 @@ static double zero = 0.0; double x; #endif { - double hfsq,f,c,s,z,R,u; + double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4; int32_t k,hx,hu,ax; GET_HIGH_WORD(hx,x); @@ -167,7 +170,15 @@ static double zero = 0.0; } s = f/(2.0+f); z = s*s; +#ifdef DO_NOT_USE_THIS R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))); +#else + R1 = z*Lp[1]; z2=z*z; + R2 = Lp[2]+z*Lp[3]; z4=z2*z2; + R3 = Lp[4]+z*Lp[5]; z6=z4*z2; + R4 = Lp[6]+z*Lp[7]; + R = R1 + z2*R2 + z4*R3 + z6*R4; +#endif if(k==0) return f-(hfsq-s*(hfsq+R)); else return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); } |