1 files changed, 128 insertions, 67 deletions
diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c
index e3b3bbc..cba0122 100644
--- a/sysdeps/ieee754/flt-32/e_acosf.c
+++ b/sysdeps/ieee754/flt-32/e_acosf.c
@@ -1,78 +1,139 @@
-/* e_acosf.c -- float version of e_acos.c.
- */
+/* Correctly-rounded arc-cosine function for binary32 value.
 
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+Copyright (c) 2023-2024 Alexei Sibidanov.
 
+The original version of this file was copied from the CORE-MATH
+project (file src/binary32/acos/acosf.c, revision 61d7bef).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include <errno.h>
 #include <math.h>
 #include <math_private.h>
 #include <libm-alias-finite.h>
+#include "math_config.h"
 
-static const float
-one =  1.0000000000e+00, /* 0x3F800000 */
-pi =  3.1415925026e+00, /* 0x40490fda */
-pio2_hi =  1.5707962513e+00, /* 0x3fc90fda */
-pio2_lo =  7.5497894159e-08, /* 0x33a22168 */
-pS0 =  1.6666667163e-01, /* 0x3e2aaaab */
-pS1 = -3.2556581497e-01, /* 0xbea6b090 */
-pS2 =  2.0121252537e-01, /* 0x3e4e0aa8 */
-pS3 = -4.0055535734e-02, /* 0xbd241146 */
-pS4 =  7.9153501429e-04, /* 0x3a4f7f04 */
-pS5 =  3.4793309169e-05, /* 0x3811ef08 */
-qS1 = -2.4033949375e+00, /* 0xc019d139 */
-qS2 =  2.0209457874e+00, /* 0x4001572d */
-qS3 = -6.8828397989e-01, /* 0xbf303361 */
-qS4 =  7.7038154006e-02; /* 0x3d9dc62e */
+static __attribute__ ((noinline)) float
+as_special (float x)
+{
+  const float pih = 0x1.921fb6p+1;
+  const float pil = -0x1p-24f;
+  uint32_t t = asuint (x);
+  if (t == (0x7fu << 23))
+    return 0.0f; /* x=1 */
+  if (t == (0x17fu << 23))
+    return pih + pil; /* x=-1 */
+  uint32_t ax = t << 1;
+  if (ax > (0xffu << 24))
+    return x + x; /* nan */
+  return __math_invalidf (0.0);
+}
+
+static inline double
+poly12 (double z, const double *c)
+{
+  double z2 = z * z, z4 = z2 * z2;
+  double c0 = c[0] + z * c[1];
+  double c2 = c[2] + z * c[3];
+  double c4 = c[4] + z * c[5];
+  double c6 = c[6] + z * c[7];
+  double c8 = c[8] + z * c[9];
+  double c10 = c[10] + z * c[11];
+  c0 += c2 * z2;
+  c4 += c6 * z2;
+  c8 += z2 * c10;
+  c0 += z4 * (c4 + z4 * c8);
+  return c0;
+}
 
 float
-__ieee754_acosf(float x)
+__ieee754_acosf (float x)
 {
-	float z,p,q,r,w,s,c,df;
-	int32_t hx,ix;
-	GET_FLOAT_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(ix==0x3f800000) {		/* |x|==1 */
-	    if(hx>0) return 0.0;	/* acos(1) = 0  */
-	    else return pi+(float)2.0*pio2_lo;	/* acos(-1)= pi */
-	} else if(ix>0x3f800000) {	/* |x| >= 1 */
-	    return (x-x)/(x-x);		/* acos(|x|>1) is NaN */
-	}
-	if(ix<0x3f000000) {	/* |x| < 0.5 */
-	    if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<=2**-26*/
-	    z = x*x;
-	    p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-	    q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-	    r = p/q;
-	    return pio2_hi - (x - (pio2_lo-x*r));
-	} else  if (hx<0) {		/* x < -0.5 */
-	    z = (one+x)*(float)0.5;
-	    p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-	    q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-	    s = sqrtf(z);
-	    r = p/q;
-	    w = r*s-pio2_lo;
-	    return pi - (float)2.0*(s+w);
-	} else {			/* x > 0.5 */
-	    int32_t idf;
-	    z = (one-x)*(float)0.5;
-	    s = sqrtf(z);
-	    df = s;
-	    GET_FLOAT_WORD(idf,df);
-	    SET_FLOAT_WORD(df,idf&0xfffff000);
-	    c  = (z-df*df)/(s+df);
-	    p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
-	    q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-	    r = p/q;
-	    w = r*s+c;
-	    return (float)2.0*(df+w);
-	}
+  const double pi2 = 0x1.921fb54442d18p+0;
+  static const double o[] = { 0, 0x1.921fb54442d18p+1 };
+  double xs = x;
+  double r;
+  uint32_t t = asuint (x);
+  uint32_t ax = t << 1;
+  if (__glibc_unlikely (ax >= 0x7f<<24))
+    return as_special (x);
+  if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */
+    {
+      static const double b[] =
+	{
+	   0x1.fffffffd9ccb8p-1,  0x1.5555c94838007p-3,  0x1.32ded4b7c20fap-4,
+	   0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6,  0x1.56fbb04998344p-1,
+	  -0x1.403d8e4c49f52p+2,  0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6,
+	   0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9,  0x1.a558a59cc19d3p+9,
+	  -0x1.aca4b6a529ffp+9,   0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7,
+	   0x1.5c2018c0c0105p+5
+	};
+      /* Avoid spurious underflow exception.  */
+      if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */
+	return (float) pi2;
+      double z = xs;
+      double z2 = z * z;
+      double z4 = z2 * z2;
+      double z8 = z4 * z4;
+      double z16 = z8 * z8;
+      r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3]))
+		+ z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7])))
+	       + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11]))
+			+ z8
+			* ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15]))));
+      float ub = 0x1.921fb54574191p+0 - r;
+      float lb = 0x1.921fb543118ap+0 - r;
+      if (ub == lb)
+	return ub;
+    }
+  /* accurate path  */
+  if (ax < (0x7eu << 24))
+    {
+      static const double c[] =
+	{
+	  0x1.555555555529cp-3,  0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
+	  0x1.f1c72e13ac306p-6,  0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
+	  0x1.c6dbbcc7c6315p-7,  0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
+	  0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
+	};
+      if (t == 0x328885a3u)
+	return 0x1.921fb6p+0f + 0x1p-25;
+      if (t == 0x39826222u)
+	return 0x1.920f6ap+0f + 0x1p-25;
+      double x2 = xs * xs;
+      r = (pi2 - xs) - (xs * x2) * poly12 (x2, c);
+    }
+  else
+    {
+      static const double c[] =
+	{
+	  0x1.6a09e667f3bcbp+0,   0x1.e2b7dddff2db9p-4,  0x1.b27247ab42dbcp-6,
+	  0x1.02995cc4e0744p-7,   0x1.5ffb0276ec8eap-9,  0x1.033885a928decp-10,
+	  0x1.911f2be23f8c7p-12,  0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
+	  0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
+	};
+      double bx = fabs (xs);
+      double z = 1.0 - bx;
+      double s = copysign (sqrt (z), xs);
+      r = o[t >> 31] + s * poly12 (z, c);
+    }
+  return r;
 }
 libm_alias_finite (__ieee754_acosf, __acosf)