aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--NEWS2
-rw-r--r--sysdeps/aarch64/libm-test-ulps24
-rw-r--r--sysdeps/ieee754/flt-32/s_cosf.c161
-rw-r--r--sysdeps/ieee754/flt-32/s_sincosf.h171
-rw-r--r--sysdeps/ieee754/flt-32/s_sinf.c172
-rw-r--r--sysdeps/x86_64/fpu/libm-test-ulps12
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c33
-rw-r--r--sysdeps/x86_64/fpu/s_sincosf_data.c1
9 files changed, 207 insertions, 384 deletions
diff --git a/ChangeLog b/ChangeLog
index 6ed43e0..b3493b9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2018-08-14 Wilco Dijkstra <wdijkstr@arm.com>
+ Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+ * NEWS: Mention sinf, cosf, sincosf.
+ * sysdeps/aarch64/libm-test-ulps: Update ULP for sinf, cosf, sincosf.
+ * sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sinf and cosf.
+ * sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c: Add definitions of
+ constants rather than including generic sincosf.h.
+ * sysdeps/x86_64/fpu/s_sincosf_data.c: Remove.
+ * sysdeps/ieee754/flt-32/s_cosf.c (cosf): Rewrite.
+ * sysdeps/ieee754/flt-32/s_sincosf.h (reduced_sin): Remove.
+ (reduced_cos): Remove.
+ (sinf_poly): New function.
+ * sysdeps/ieee754/flt-32/s_sinf.c (sinf): Rewrite.
+
2018-08-14 Florian Weimer <fweimer@redhat.com>
[BZ #23521]
diff --git a/NEWS b/NEWS
index 7868c84..d38b6c4 100644
--- a/NEWS
+++ b/NEWS
@@ -75,6 +75,8 @@ Major new features:
transliteration tables are all updated to Unicode 11.0.0, using
generator scripts contributed by Mike FABIAN (Red Hat).
+* Optimized generic sinf, cosf and sincosf.
+
* <math.h> functions that round their results to a narrower type are added
from TS 18661-1:2014 and TS 18661-3:2015:
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index be06085..6a89028 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1021,9 +1021,9 @@ ldouble: 1
Function: "cos_downward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 3
ldouble: 3
@@ -1037,9 +1037,9 @@ ldouble: 1
Function: "cos_upward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 2
ldouble: 2
@@ -1981,9 +1981,9 @@ ldouble: 1
Function: "sin_downward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 3
ldouble: 3
@@ -1997,9 +1997,9 @@ ldouble: 2
Function: "sin_upward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 3
ldouble: 3
@@ -2013,9 +2013,9 @@ ldouble: 1
Function: "sincos_downward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 3
ldouble: 3
@@ -2029,9 +2029,9 @@ ldouble: 2
Function: "sincos_upward":
double: 1
-float: 2
+float: 1
idouble: 1
-ifloat: 2
+ifloat: 1
ildouble: 3
ldouble: 3
diff --git a/sysdeps/ieee754/flt-32/s_cosf.c b/sysdeps/ieee754/flt-32/s_cosf.c
index 061264d..13b5ffe 100644
--- a/sysdeps/ieee754/flt-32/s_cosf.c
+++ b/sysdeps/ieee754/flt-32/s_cosf.c
@@ -1,5 +1,5 @@
/* Compute cosine of argument.
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <errno.h>
+#include <stdint.h>
#include <math.h>
-#include <math_private.h>
+#include <math-barriers.h>
#include <libm-alias-float.h>
+#include "math_config.h"
#include "s_sincosf.h"
#ifndef COSF
@@ -28,121 +29,57 @@
# define COSF_FUNC COSF
#endif
+/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
+ error is 0.5303 * 2^-23. A single-step range reduction is used for
+ small values. Large inputs have their range reduced using fast integer
+ arithmetic.
+*/
float
-COSF_FUNC (float x)
+COSF_FUNC (float y)
{
- double theta = x;
- double abstheta = fabs (theta);
- if (isless (abstheta, M_PI_4))
+ double x = y;
+ double s;
+ int n;
+ const sincos_t *p = &__sincosf_table[0];
+
+ if (abstop12 (y) < abstop12 (pio4))
+ {
+ double x2 = x * x;
+
+ if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+ return 1.0f;
+
+ return sinf_poly (x, x2, p, 1);
+ }
+ else if (__glibc_likely (abstop12 (y) < abstop12 (120.0f)))
{
- double cx;
- if (abstheta >= 0x1p-5)
- {
- const double theta2 = theta * theta;
- /* Chebyshev polynomial of the form for cos:
- * 1 + x^2 (C0 + x^2 (C1 + x^2 (C2 + x^2 (C3 + x^2 * C4)))). */
- cx = C3 + theta2 * C4;
- cx = C2 + theta2 * cx;
- cx = C1 + theta2 * cx;
- cx = C0 + theta2 * cx;
- cx = 1. + theta2 * cx;
- return cx;
- }
- else if (abstheta >= 0x1p-27)
- {
- /* A simpler Chebyshev approximation is close enough for this range:
- * 1 + x^2 (CC0 + x^3 * CC1). */
- const double theta2 = theta * theta;
- cx = CC0 + theta * theta2 * CC1;
- cx = 1.0 + theta2 * cx;
- return cx;
- }
- else
- {
- /* For small enough |theta|, this is close enough. */
- return 1.0 - abstheta;
- }
+ x = reduce_fast (x, p, &n);
+
+ /* Setup the signs for sin and cos. */
+ s = p->sign[n & 3];
+
+ if (n & 2)
+ p = &__sincosf_table[1];
+
+ return sinf_poly (x * s, x * x, p, n ^ 1);
}
- else /* |theta| >= Pi/4. */
+ else if (abstop12 (y) < abstop12 (INFINITY))
{
- if (isless (abstheta, 9 * M_PI_4))
- {
- /* There are cases where FE_UPWARD rounding mode can
- produce a result of abstheta * inv_PI_4 == 9,
- where abstheta < 9pi/4, so the domain for
- pio2_table must go to 5 (9 / 2 + 1). */
- unsigned int n = (abstheta * inv_PI_4) + 1;
- theta = abstheta - pio2_table[n / 2];
- return reduced_cos (theta, n);
- }
- else if (isless (abstheta, INFINITY))
- {
- if (abstheta < 0x1p+23)
- {
- unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
- double x = n / 2;
- theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
- /* Argument reduction needed. */
- return reduced_cos (theta, n);
- }
- else /* |theta| >= 2^23. */
- {
- x = fabsf (x);
- int exponent;
- GET_FLOAT_WORD (exponent, x);
- exponent = (exponent >> FLOAT_EXPONENT_SHIFT)
- - FLOAT_EXPONENT_BIAS;
- exponent += 3;
- exponent /= 28;
- double a = invpio4_table[exponent] * x;
- double b = invpio4_table[exponent + 1] * x;
- double c = invpio4_table[exponent + 2] * x;
- double d = invpio4_table[exponent + 3] * x;
- uint64_t l = a;
- l &= ~0x7;
- a -= l;
- double e = a + b;
- l = e;
- e = a - l;
- if (l & 1)
- {
- e -= 1.0;
- e += b;
- e += c;
- e += d;
- e *= M_PI_4;
- return reduced_cos (e, l + 1);
- }
- else
- {
- e += b;
- e += c;
- e += d;
- if (e <= 1.0)
- {
- e *= M_PI_4;
- return reduced_cos (e, l + 1);
- }
- else
- {
- l++;
- e -= 2.0;
- e *= M_PI_4;
- return reduced_cos (e, l + 1);
- }
- }
- }
- }
- else
- {
- int32_t ix;
- GET_FLOAT_WORD (ix, abstheta);
- /* cos(Inf or NaN) is NaN. */
- if (ix == 0x7f800000) /* Inf. */
- __set_errno (EDOM);
- return x - x;
- }
+ uint32_t xi = asuint (y);
+ int sign = xi >> 31;
+
+ x = reduce_large (xi, &n);
+
+ /* Setup signs for sin and cos - include original sign. */
+ s = p->sign[(n + sign) & 3];
+
+ if ((n + sign) & 2)
+ p = &__sincosf_table[1];
+
+ return sinf_poly (x * s, x * x, p, n ^ 1);
}
+ else
+ return __math_invalidf (y);
}
#ifndef COSF
diff --git a/sysdeps/ieee754/flt-32/s_sincosf.h b/sysdeps/ieee754/flt-32/s_sincosf.h
index d3d7b4d..1dcb04f 100644
--- a/sysdeps/ieee754/flt-32/s_sincosf.h
+++ b/sysdeps/ieee754/flt-32/s_sincosf.h
@@ -1,5 +1,5 @@
/* Used by sinf, cosf and sincosf functions.
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -20,145 +20,6 @@
#include <math.h>
#include "math_config.h"
-/* Chebyshev constants for cos, range -PI/4 - PI/4. */
-static const double C0 = -0x1.ffffffffe98aep-2;
-static const double C1 = 0x1.55555545c50c7p-5;
-static const double C2 = -0x1.6c16b348b6874p-10;
-static const double C3 = 0x1.a00eb9ac43ccp-16;
-static const double C4 = -0x1.23c97dd8844d7p-22;
-
-/* Chebyshev constants for sin, range -PI/4 - PI/4. */
-static const double S0 = -0x1.5555555551cd9p-3;
-static const double S1 = 0x1.1111110c2688bp-7;
-static const double S2 = -0x1.a019f8b4bd1f9p-13;
-static const double S3 = 0x1.71d7264e6b5b4p-19;
-static const double S4 = -0x1.a947e1674b58ap-26;
-
-/* Chebyshev constants for sin, range 2^-27 - 2^-5. */
-static const double SS0 = -0x1.555555543d49dp-3;
-static const double SS1 = 0x1.110f475cec8c5p-7;
-
-/* Chebyshev constants for cos, range 2^-27 - 2^-5. */
-static const double CC0 = -0x1.fffffff5cc6fdp-2;
-static const double CC1 = 0x1.55514b178dac5p-5;
-
-/* PI/2 with 98 bits of accuracy. */
-static const double PI_2_hi = 0x1.921fb544p+0;
-static const double PI_2_lo = 0x1.0b4611a626332p-34;
-
-static const double SMALL = 0x1p-50; /* 2^-50. */
-static const double inv_PI_4 = 0x1.45f306dc9c883p+0; /* 4/PI. */
-
-#define FLOAT_EXPONENT_SHIFT 23
-#define FLOAT_EXPONENT_BIAS 127
-
-static const double pio2_table[] = {
- 0 * M_PI_2,
- 1 * M_PI_2,
- 2 * M_PI_2,
- 3 * M_PI_2,
- 4 * M_PI_2,
- 5 * M_PI_2
-};
-
-static const double invpio4_table[] = {
- 0x0p+0,
- 0x1.45f306cp+0,
- 0x1.c9c882ap-28,
- 0x1.4fe13a8p-58,
- 0x1.f47d4dp-85,
- 0x1.bb81b6cp-112,
- 0x1.4acc9ep-142,
- 0x1.0e4107cp-169
-};
-
-static const double ones[] = { 1.0, -1.0 };
-
-/* Compute the sine value using Chebyshev polynomials where
- THETA is the range reduced absolute value of the input
- and it is less than Pi/4,
- N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
- whether a sine or cosine approximation is more accurate and
- SIGNBIT is used to add the correct sign after the Chebyshev
- polynomial is computed. */
-static inline float
-reduced_sin (const double theta, const unsigned int n,
- const unsigned int signbit)
-{
- double sx;
- const double theta2 = theta * theta;
- /* We are operating on |x|, so we need to add back the original
- signbit for sinf. */
- double sign;
- /* Determine positive or negative primary interval. */
- sign = ones[((n >> 2) & 1) ^ signbit];
- /* Are we in the primary interval of sin or cos? */
- if ((n & 2) == 0)
- {
- /* Here sinf() is calculated using sin Chebyshev polynomial:
- x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
- sx = S3 + theta2 * S4; /* S3+x^2*S4. */
- sx = S2 + theta2 * sx; /* S2+x^2*(S3+x^2*S4). */
- sx = S1 + theta2 * sx; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */
- sx = S0 + theta2 * sx; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */
- sx = theta + theta * theta2 * sx;
- }
- else
- {
- /* Here sinf() is calculated using cos Chebyshev polynomial:
- 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */
- sx = C3 + theta2 * C4; /* C3+x^2*C4. */
- sx = C2 + theta2 * sx; /* C2+x^2*(C3+x^2*C4). */
- sx = C1 + theta2 * sx; /* C1+x^2*(C2+x^2*(C3+x^2*C4)). */
- sx = C0 + theta2 * sx; /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))). */
- sx = 1.0 + theta2 * sx;
- }
-
- /* Add in the signbit and assign the result. */
- return sign * sx;
-}
-
-/* Compute the cosine value using Chebyshev polynomials where
- THETA is the range reduced absolute value of the input
- and it is less than Pi/4,
- N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
- whether a sine or cosine approximation is more accurate and
- the sign of the result. */
-static inline float
-reduced_cos (double theta, unsigned int n)
-{
- double sign, cx;
- const double theta2 = theta * theta;
-
- /* Determine positive or negative primary interval. */
- n += 2;
- sign = ones[(n >> 2) & 1];
-
- /* Are we in the primary interval of sin or cos? */
- if ((n & 2) == 0)
- {
- /* Here cosf() is calculated using sin Chebyshev polynomial:
- x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
- cx = S3 + theta2 * S4;
- cx = S2 + theta2 * cx;
- cx = S1 + theta2 * cx;
- cx = S0 + theta2 * cx;
- cx = theta + theta * theta2 * cx;
- }
- else
- {
- /* Here cosf() is calculated using cos Chebyshev polynomial:
- 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */
- cx = C3 + theta2 * C4;
- cx = C2 + theta2 * cx;
- cx = C1 + theta2 * cx;
- cx = C0 + theta2 * cx;
- cx = 1. + theta2 * cx;
- }
- return sign * cx;
-}
-
-
/* 2PI * 2^-64. */
static const double pi63 = 0x1.921FB54442D18p-62;
/* PI / 4. */
@@ -217,6 +78,36 @@ sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
*cosp = c + x6 * c2;
}
+/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
+ N is the quadrant, and if odd the cosine polynomial is used. */
+static inline float
+sinf_poly (double x, double x2, const sincos_t *p, int n)
+{
+ double x3, x4, x6, x7, s, c, c1, c2, s1;
+
+ if ((n & 1) == 0)
+ {
+ x3 = x * x2;
+ s1 = p->s2 + x2 * p->s3;
+
+ x7 = x3 * x2;
+ s = x + x3 * p->s1;
+
+ return s + x7 * s1;
+ }
+ else
+ {
+ x4 = x2 * x2;
+ c2 = p->c3 + x2 * p->c4;
+ c1 = p->c0 + x2 * p->c1;
+
+ x6 = x4 * x2;
+ c = c1 + x4 * p->c2;
+
+ return c + x6 * c2;
+ }
+}
+
/* Fast range reduction using single multiply-subtract. Return the modulo of
X as a value between -PI/4 and PI/4 and store the quadrant in NP.
The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
diff --git a/sysdeps/ieee754/flt-32/s_sinf.c b/sysdeps/ieee754/flt-32/s_sinf.c
index 138e318..f6964e6 100644
--- a/sysdeps/ieee754/flt-32/s_sinf.c
+++ b/sysdeps/ieee754/flt-32/s_sinf.c
@@ -1,5 +1,5 @@
/* Compute sine of argument.
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
+ Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <errno.h>
+#include <stdint.h>
#include <math.h>
-#include <math_private.h>
+#include <math-barriers.h>
#include <libm-alias-float.h>
+#include "math_config.h"
#include "s_sincosf.h"
#ifndef SINF
@@ -28,127 +29,62 @@
# define SINF_FUNC SINF
#endif
+/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
+ error is 0.5303 * 2^-23. A single-step range reduction is used for
+ small values. Large inputs have their range reduced using fast integer
+ arithmetic.
+*/
float
-SINF_FUNC (float x)
+SINF_FUNC (float y)
{
- double cx;
- double theta = x;
- double abstheta = fabs (theta);
- /* If |x|< Pi/4. */
- if (isless (abstheta, M_PI_4))
+ double x = y;
+ double s;
+ int n;
+ const sincos_t *p = &__sincosf_table[0];
+
+ if (abstop12 (y) < abstop12 (pio4))
+ {
+ s = x * x;
+
+ if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+ {
+ /* Force underflow for tiny y. */
+ if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+ math_force_eval ((float)s);
+ return y;
+ }
+
+ return sinf_poly (x, s, p, 0);
+ }
+ else if (__glibc_likely (abstop12 (y) < abstop12 (120.0f)))
{
- if (abstheta >= 0x1p-5) /* |x| >= 2^-5. */
- {
- const double theta2 = theta * theta;
- /* Chebyshev polynomial of the form for sin
- x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
- cx = S3 + theta2 * S4;
- cx = S2 + theta2 * cx;
- cx = S1 + theta2 * cx;
- cx = S0 + theta2 * cx;
- cx = theta + theta * theta2 * cx;
- return cx;
- }
- else if (abstheta >= 0x1p-27) /* |x| >= 2^-27. */
- {
- /* A simpler Chebyshev approximation is close enough for this range:
- for sin: x+x^3*(SS0+x^2*SS1). */
- const double theta2 = theta * theta;
- cx = SS0 + theta2 * SS1;
- cx = theta + theta * theta2 * cx;
- return cx;
- }
- else
- {
- /* Handle some special cases. */
- if (theta)
- return theta - (theta * SMALL);
- else
- return theta;
- }
+ x = reduce_fast (x, p, &n);
+
+ /* Setup the signs for sin and cos. */
+ s = p->sign[n & 3];
+
+ if (n & 2)
+ p = &__sincosf_table[1];
+
+ return sinf_poly (x * s, x * x, p, n);
}
- else /* |x| >= Pi/4. */
+ else if (abstop12 (y) < abstop12 (INFINITY))
{
- unsigned int signbit = isless (x, 0);
- if (isless (abstheta, 9 * M_PI_4)) /* |x| < 9*Pi/4. */
- {
- /* There are cases where FE_UPWARD rounding mode can
- produce a result of abstheta * inv_PI_4 == 9,
- where abstheta < 9pi/4, so the domain for
- pio2_table must go to 5 (9 / 2 + 1). */
- unsigned int n = (abstheta * inv_PI_4) + 1;
- theta = abstheta - pio2_table[n / 2];
- return reduced_sin (theta, n, signbit);
- }
- else if (isless (abstheta, INFINITY))
- {
- if (abstheta < 0x1p+23) /* |x| < 2^23. */
- {
- unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
- double x = n / 2;
- theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
- /* Argument reduction needed. */
- return reduced_sin (theta, n, signbit);
- }
- else /* |x| >= 2^23. */
- {
- x = fabsf (x);
- int exponent;
- GET_FLOAT_WORD (exponent, x);
- exponent
- = (exponent >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
- exponent += 3;
- exponent /= 28;
- double a = invpio4_table[exponent] * x;
- double b = invpio4_table[exponent + 1] * x;
- double c = invpio4_table[exponent + 2] * x;
- double d = invpio4_table[exponent + 3] * x;
- uint64_t l = a;
- l &= ~0x7;
- a -= l;
- double e = a + b;
- l = e;
- e = a - l;
- if (l & 1)
- {
- e -= 1.0;
- e += b;
- e += c;
- e += d;
- e *= M_PI_4;
- return reduced_sin (e, l + 1, signbit);
- }
- else
- {
- e += b;
- e += c;
- e += d;
- if (e <= 1.0)
- {
- e *= M_PI_4;
- return reduced_sin (e, l + 1, signbit);
- }
- else
- {
- l++;
- e -= 2.0;
- e *= M_PI_4;
- return reduced_sin (e, l + 1, signbit);
- }
- }
- }
- }
- else
- {
- int32_t ix;
- /* High word of x. */
- GET_FLOAT_WORD (ix, abstheta);
- /* Sin(Inf or NaN) is NaN. */
- if (ix == 0x7f800000)
- __set_errno (EDOM);
- return x - x;
- }
+ uint32_t xi = asuint (y);
+ int sign = xi >> 31;
+
+ x = reduce_large (xi, &n);
+
+ /* Setup signs for sin and cos - include original sign. */
+ s = p->sign[(n + sign) & 3];
+
+ if ((n + sign) & 2)
+ p = &__sincosf_table[1];
+
+ return sinf_poly (x * s, x * x, p, n);
}
+ else
+ return __math_invalidf (y);
}
#ifndef SINF
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 912db31..a51faec 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1271,24 +1271,30 @@ ldouble: 1
Function: "cos_downward":
double: 1
+float: 1
float128: 3
idouble: 1
+ifloat: 1
ifloat128: 3
ildouble: 3
ldouble: 3
Function: "cos_towardzero":
double: 1
+float: 1
float128: 1
idouble: 1
+ifloat: 1
ifloat128: 1
ildouble: 2
ldouble: 2
Function: "cos_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 2
ldouble: 2
@@ -2539,24 +2545,30 @@ ldouble: 1
Function: "sin_downward":
double: 1
+float: 1
float128: 3
idouble: 1
+ifloat:1
ifloat128: 3
ildouble: 3
ldouble: 3
Function: "sin_towardzero":
double: 1
+float:1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 2
ldouble: 2
Function: "sin_upward":
double: 1
+float:1
float128: 3
idouble: 1
+ifloat: 1
ifloat128: 3
ildouble: 3
ldouble: 3
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
index 64abe7a..0b80c4f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
@@ -21,7 +21,6 @@
#include <math_private.h>
#include <x86intrin.h>
#include <libm-alias-float.h>
-#include "s_sincosf.h"
#define SINCOSF __sincosf_fma
@@ -31,6 +30,38 @@
# define SINCOSF_FUNC SINCOSF
#endif
+/* PI/2 with 98 bits of accuracy. */
+static const double PI_2_hi = 0x1.921fb544p+0;
+static const double PI_2_lo = 0x1.0b4611a626332p-34;
+
+static const double SMALL = 0x1p-50; /* 2^-50. */
+static const double inv_PI_4 = 0x1.45f306dc9c883p+0; /* 4/PI. */
+
+#define FLOAT_EXPONENT_SHIFT 23
+#define FLOAT_EXPONENT_BIAS 127
+
+static const double pio2_table[] = {
+ 0 * M_PI_2,
+ 1 * M_PI_2,
+ 2 * M_PI_2,
+ 3 * M_PI_2,
+ 4 * M_PI_2,
+ 5 * M_PI_2
+};
+
+static const double invpio4_table[] = {
+ 0x0p+0,
+ 0x1.45f306cp+0,
+ 0x1.c9c882ap-28,
+ 0x1.4fe13a8p-58,
+ 0x1.f47d4dp-85,
+ 0x1.bb81b6cp-112,
+ 0x1.4acc9ep-142,
+ 0x1.0e4107cp-169
+};
+
+static const double ones[] = { 1.0, -1.0 };
+
/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */
static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};
static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };
diff --git a/sysdeps/x86_64/fpu/s_sincosf_data.c b/sysdeps/x86_64/fpu/s_sincosf_data.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/x86_64/fpu/s_sincosf_data.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed. */