diff options
Diffstat (limited to 'sysdeps/ieee754/flt-32/e_acosf.c')
-rw-r--r-- | sysdeps/ieee754/flt-32/e_acosf.c | 195 |
1 files changed, 128 insertions, 67 deletions
diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c index e3b3bbc..cba0122 100644 --- a/sysdeps/ieee754/flt-32/e_acosf.c +++ b/sysdeps/ieee754/flt-32/e_acosf.c @@ -1,78 +1,139 @@ -/* e_acosf.c -- float version of e_acos.c. - */ +/* Correctly-rounded arc-cosine function for binary32 value. -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +Copyright (c) 2023-2024 Alexei Sibidanov. +The original version of this file was copied from the CORE-MATH +project (file src/binary32/acos/acosf.c, revision 61d7bef). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <errno.h> #include <math.h> #include <math_private.h> #include <libm-alias-finite.h> +#include "math_config.h" -static const float -one = 1.0000000000e+00, /* 0x3F800000 */ -pi = 3.1415925026e+00, /* 0x40490fda */ -pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */ -pio2_lo = 7.5497894159e-08, /* 0x33a22168 */ -pS0 = 1.6666667163e-01, /* 0x3e2aaaab */ -pS1 = -3.2556581497e-01, /* 0xbea6b090 */ -pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */ -pS3 = -4.0055535734e-02, /* 0xbd241146 */ -pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */ -pS5 = 3.4793309169e-05, /* 0x3811ef08 */ -qS1 = -2.4033949375e+00, /* 0xc019d139 */ -qS2 = 2.0209457874e+00, /* 0x4001572d */ -qS3 = -6.8828397989e-01, /* 0xbf303361 */ -qS4 = 7.7038154006e-02; /* 0x3d9dc62e */ +static __attribute__ ((noinline)) float +as_special (float x) +{ + const float pih = 0x1.921fb6p+1; + const float pil = -0x1p-24f; + uint32_t t = asuint (x); + if (t == (0x7fu << 23)) + return 0.0f; /* x=1 */ + if (t == (0x17fu << 23)) + return pih + pil; /* x=-1 */ + uint32_t ax = t << 1; + if (ax > (0xffu << 24)) + return x + x; /* nan */ + return __math_invalidf (0.0); +} + +static inline double +poly12 (double z, const double *c) +{ + double z2 = z * z, z4 = z2 * z2; + double c0 = c[0] + z * c[1]; + double c2 = c[2] + z * c[3]; + double c4 = c[4] + z * c[5]; + double c6 = c[6] + z * c[7]; + double c8 = c[8] + z * c[9]; + double c10 = c[10] + z * c[11]; + c0 += c2 * z2; + c4 += c6 * z2; + c8 += z2 * c10; + c0 += z4 * (c4 + z4 * c8); + return c0; +} float -__ieee754_acosf(float x) +__ieee754_acosf (float x) { - float z,p,q,r,w,s,c,df; - int32_t hx,ix; - GET_FLOAT_WORD(hx,x); - ix = hx&0x7fffffff; - if(ix==0x3f800000) { /* |x|==1 */ - if(hx>0) return 0.0; /* acos(1) = 0 */ - else return pi+(float)2.0*pio2_lo; /* acos(-1)= pi */ - } else if(ix>0x3f800000) { /* |x| >= 1 */ - return (x-x)/(x-x); /* acos(|x|>1) is NaN */ - } - if(ix<0x3f000000) { /* |x| < 0.5 */ - if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<=2**-26*/ - z = x*x; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - r = p/q; - return pio2_hi - (x - (pio2_lo-x*r)); - } else if (hx<0) { /* x < -0.5 */ - z = (one+x)*(float)0.5; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - s = sqrtf(z); - r = p/q; - w = r*s-pio2_lo; - return pi - (float)2.0*(s+w); - } else { /* x > 0.5 */ - int32_t idf; - z = (one-x)*(float)0.5; - s = sqrtf(z); - df = s; - GET_FLOAT_WORD(idf,df); - SET_FLOAT_WORD(df,idf&0xfffff000); - c = (z-df*df)/(s+df); - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); - r = p/q; - w = r*s+c; - return (float)2.0*(df+w); - } + const double pi2 = 0x1.921fb54442d18p+0; + static const double o[] = { 0, 0x1.921fb54442d18p+1 }; + double xs = x; + double r; + uint32_t t = asuint (x); + uint32_t ax = t << 1; + if (__glibc_unlikely (ax >= 0x7f<<24)) + return as_special (x); + if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */ + { + static const double b[] = + { + 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4, + 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1, + -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6, + 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9, + -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7, + 0x1.5c2018c0c0105p+5 + }; + /* Avoid spurious underflow exception. */ + if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */ + return (float) pi2; + double z = xs; + double z2 = z * z; + double z4 = z2 * z2; + double z8 = z4 * z4; + double z16 = z8 * z8; + r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3])) + + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7]))) + + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11])) + + z8 + * ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15])))); + float ub = 0x1.921fb54574191p+0 - r; + float lb = 0x1.921fb543118ap+0 - r; + if (ub == lb) + return ub; + } + /* accurate path */ + if (ax < (0x7eu << 24)) + { + static const double c[] = + { + 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, + 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, + 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, + 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 + }; + if (t == 0x328885a3u) + return 0x1.921fb6p+0f + 0x1p-25; + if (t == 0x39826222u) + return 0x1.920f6ap+0f + 0x1p-25; + double x2 = xs * xs; + r = (pi2 - xs) - (xs * x2) * poly12 (x2, c); + } + else + { + static const double c[] = + { + 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, + 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, + 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, + 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 + }; + double bx = fabs (xs); + double z = 1.0 - bx; + double s = copysign (sqrt (z), xs); + r = o[t >> 31] + s * poly12 (z, c); + } + return r; } libm_alias_finite (__ieee754_acosf, __acosf) |