diff options
Diffstat (limited to 'libc/src/math/generic')
-rw-r--r-- | libc/src/math/generic/CMakeLists.txt | 81 | ||||
-rw-r--r-- | libc/src/math/generic/cos.cpp | 155 | ||||
-rw-r--r-- | libc/src/math/generic/fmaxbf16.cpp | 21 | ||||
-rw-r--r-- | libc/src/math/generic/fminbf16.cpp | 21 | ||||
-rw-r--r-- | libc/src/math/generic/range_reduction_double_common.h | 374 | ||||
-rw-r--r-- | libc/src/math/generic/range_reduction_double_fma.h | 346 | ||||
-rw-r--r-- | libc/src/math/generic/range_reduction_double_nofma.h | 347 | ||||
-rw-r--r-- | libc/src/math/generic/sin.cpp | 14 | ||||
-rw-r--r-- | libc/src/math/generic/sincos.cpp | 14 | ||||
-rw-r--r-- | libc/src/math/generic/sincos_eval.h | 138 | ||||
-rw-r--r-- | libc/src/math/generic/tan.cpp | 7 |
11 files changed, 101 insertions, 1417 deletions
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 5aeacc8..c8a8c2b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -292,23 +292,6 @@ add_header_library( ) add_header_library( - range_reduction_double - HDRS - range_reduction_double_common.h - range_reduction_double_fma.h - range_reduction_double_nofma.h - DEPENDS - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.fma - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.common - libc.src.__support.integer_literals -) - -add_header_library( sincosf_utils HDRS sincosf_utils.h @@ -329,18 +312,6 @@ add_header_library( libc.src.__support.common ) -add_header_library( - sincos_eval - HDRS - sincos_eval.h - DEPENDS - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.integer_literals -) - add_entrypoint_object( cos SRCS @@ -348,16 +319,7 @@ add_entrypoint_object( HDRS ../cos.h DEPENDS - .range_reduction_double - .sincos_eval - libc.hdr.errno_macros - libc.src.errno.errno - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.macros.optimization + libc.src.__support.math.cos ) add_entrypoint_object( @@ -436,8 +398,8 @@ add_entrypoint_object( HDRS ../sin.h DEPENDS - .range_reduction_double - .sincos_eval + libc.src.__support.math.range_reduction_double + libc.src.__support.math.sincos_eval libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -496,8 +458,8 @@ add_entrypoint_object( HDRS ../sincos.h DEPENDS - .range_reduction_double - .sincos_eval + libc.src.__support.math.range_reduction_double + libc.src.__support.math.sincos_eval libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -569,7 +531,7 @@ add_entrypoint_object( HDRS ../tan.h DEPENDS - .range_reduction_double + libc.src.__support.math.range_reduction_double libc.hdr.errno_macros libc.src.errno.errno libc.src.__support.FPUtil.double_double @@ -2361,6 +2323,21 @@ add_entrypoint_object( MISC_MATH_BASIC_OPS_OPT ) +add_entrypoint_object( + fminbf16 + SRCS + fminbf16.cpp + HDRS + ../fminbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) add_entrypoint_object( fmax @@ -2421,6 +2398,22 @@ add_entrypoint_object( ) add_entrypoint_object( + fmaxbf16 + SRCS + fmaxbf16.cpp + HDRS + ../fmaxbf16.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types + FLAGS + MISC_MATH_BASIC_OPS_OPT +) + +add_entrypoint_object( fmaximum SRCS fmaximum.cpp diff --git a/libc/src/math/generic/cos.cpp b/libc/src/math/generic/cos.cpp index 5da0f868..aabf3bc 100644 --- a/libc/src/math/generic/cos.cpp +++ b/libc/src/math/generic/cos.cpp @@ -7,161 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cos.h" -#include "hdr/errno_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" -#else -#include "range_reduction_double_nofma.h" -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE +#include "src/__support/math/cos.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = typename fputil::DyadicFloat<128>; - -LLVM_LIBC_FUNCTION(double, cos, (double x)) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - uint16_t x_e = xbits.get_biased_exponent(); - - DoubleDouble y; - unsigned k; - LargeRangeReduction range_reduction_large{}; - - // |x| < 2^16. - if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { - // |x| < 2^-7 - if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) { - // |x| < 2^-27 - if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) { - // Signed zeros. - if (LIBC_UNLIKELY(x == 0.0)) - return 1.0; - - // For |x| < 2^-27, |cos(x) - 1| < |x|^2/2 < 2^-54 = ulp(1 - 2^-53)/2. - return fputil::round_result_slightly_down(1.0); - } - // No range reduction needed. - k = 0; - y.lo = 0.0; - y.hi = x; - } else { - // Small range reduction. - k = range_reduction_small(x, y); - } - } else { - // Inf or NaN - if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - // cos(+-Inf) = NaN - if (xbits.get_mantissa() == 0) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - return x + FPBits::quiet_nan().get_val(); - } - - // Large range reduction. - k = range_reduction_large.fast(x, y); - } - - DoubleDouble sin_y, cos_y; - - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); - - // Look up sin(k * pi/128) and cos(k * pi/128) -#ifdef LIBC_MATH_HAS_SMALL_TABLES - // Memory saving versions. Use 65-entry table. - auto get_idx_dd = [](unsigned kk) -> DoubleDouble { - unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); - DoubleDouble ans = SIN_K_PI_OVER_128[idx]; - if (kk & 128) { - ans.hi = -ans.hi; - ans.lo = -ans.lo; - } - return ans; - }; - DoubleDouble msin_k = get_idx_dd(k + 128); - DoubleDouble cos_k = get_idx_dd(k + 64); -#else - // Fast look up version, but needs 256-entry table. - // -sin(k * pi/128) = sin((k + 128) * pi/128) - // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). - DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255]; - DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255]; -#endif // LIBC_MATH_HAS_SMALL_TABLES - - // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). - // So k is an integer and -pi / 256 <= y <= pi / 256. - // Then cos(x) = cos((k * pi/128 + y) - // = cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128) - DoubleDouble cos_k_cos_y = fputil::quick_mult(cos_y, cos_k); - DoubleDouble msin_k_sin_y = fputil::quick_mult(sin_y, msin_k); - - DoubleDouble rr = fputil::exact_add<false>(cos_k_cos_y.hi, msin_k_sin_y.hi); - rr.lo += msin_k_sin_y.lo + cos_k_cos_y.lo; - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return rr.hi + rr.lo; -#else - - double rlp = rr.lo + err; - double rlm = rr.lo - err; - - double r_upper = rr.hi + rlp; // (rr.lo + ERR); - double r_lower = rr.hi + rlm; // (rr.lo - ERR); - - // Ziv's rounding test. - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - Float128 u_f128, sin_u, cos_u; - if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) - u_f128 = range_reduction_small_f128(x); - else - u_f128 = range_reduction_large.accurate(); - - generic::sincos_eval(u_f128, sin_u, cos_u); - - auto get_sin_k = [](unsigned kk) -> Float128 { - unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); - Float128 ans = SIN_K_PI_OVER_128_F128[idx]; - if (kk & 128) - ans.sign = Sign::NEG; - return ans; - }; - - // -sin(k * pi/128) = sin((k + 128) * pi/128) - // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). - Float128 msin_k_f128 = get_sin_k(k + 128); - Float128 cos_k_f128 = get_sin_k(k + 64); - - // cos(x) = cos((k * pi/128 + u) - // = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128) - Float128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u), - fputil::quick_mul(msin_k_f128, sin_u)); - - // TODO: Add assertion if Ziv's accuracy tests fail in debug mode. - // https://github.com/llvm/llvm-project/issues/96452. - - return static_cast<double>(r); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS -} +LLVM_LIBC_FUNCTION(double, cos, (double x)) { return math::cos(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fmaxbf16.cpp b/libc/src/math/generic/fmaxbf16.cpp new file mode 100644 index 0000000..01d395b --- /dev/null +++ b/libc/src/math/generic/fmaxbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fmaxbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmaxbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fmaxbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmax(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fminbf16.cpp b/libc/src/math/generic/fminbf16.cpp new file mode 100644 index 0000000..c3e29ee --- /dev/null +++ b/libc/src/math/generic/fminbf16.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of fminbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fminbf16.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fminbf16, (bfloat16 x, bfloat16 y)) { + return fputil::fmin(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h deleted file mode 100644 index a93ee25..0000000 --- a/libc/src/math/generic/range_reduction_double_common.h +++ /dev/null @@ -1,374 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan -*- C++ --------*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H - -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" - -namespace LIBC_NAMESPACE_DECL { - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -static constexpr unsigned SPLIT = fputil::DefaultSplit<double>::VALUE; -#else -// When there is no-FMA instructions, in order to have exact product of 2 double -// precision with directional roundings, we need to lower the precision of the -// constants by at least 1 bit, and use a different splitting constant. -static constexpr unsigned SPLIT = 28; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - -using LIBC_NAMESPACE::fputil::DoubleDouble; -using Float128 = LIBC_NAMESPACE::fputil::DyadicFloat<128>; - -#define FAST_PASS_EXPONENT 16 - -// For 2^-7 < |x| < 2^16, return k and u such that: -// k = round(x * 128/pi) -// x mod pi/128 = x - k * pi/128 ~ u.hi + u.lo -// Error bound: -// |(x - k * pi/128) - (u_hi + u_lo)| <= max(ulp(ulp(u_hi)), 2^-119) -// <= 2^-111. -LIBC_INLINE unsigned range_reduction_small(double x, DoubleDouble &u) { - // Values of -pi/128 used for inputs with absolute value <= 2^16. - // The first 3 parts are generated with (53 - 21 = 32)-bit precision, so that - // the product k * MPI_OVER_128[i] is exact. - // Generated by Sollya with: - // > display = hexadecimal!; - // > a = round(pi/128, 32, RN); - // > b = round(pi/128 - a, 32, RN); - // > c = round(pi/128 - a - b, D, RN); - // > print(-a, ",", -b, ",", -c); - constexpr double MPI_OVER_128[3] = {-0x1.921fb544p-6, -0x1.0b4611a6p-40, - -0x1.3198a2e037073p-75}; - constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5; - double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D; - double kd = fputil::nearest_integer(prod_hi); - - // Let y = x - k * (pi/128) - // Then |y| < pi / 256 - // With extra rounding errors, we can bound |y| < 1.6 * 2^-7. - double y_hi = fputil::multiply_add(kd, MPI_OVER_128[0], x); // Exact - // |u.hi| < 1.6*2^-7 - u.hi = fputil::multiply_add(kd, MPI_OVER_128[1], y_hi); - double u0 = y_hi - u.hi; // Exact - // |u.lo| <= max(ulp(u.hi), |kd * MPI_OVER_128[2]|) - double u1 = fputil::multiply_add(kd, MPI_OVER_128[1], u0); // Exact - u.lo = fputil::multiply_add(kd, MPI_OVER_128[2], u1); - // Error bound: - // |x - k * pi/128| - (u.hi + u.lo) <= ulp(u.lo) - // <= ulp(max(ulp(u.hi), kd*MPI_OVER_128[2])) - // <= 2^(-7 - 104) = 2^-111. - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Digits of 2^(16*i) / pi, generated by Sollya with: -// > procedure ulp(x, n) { return 2^(floor(log2(abs(x))) - n); }; -// > for i from 0 to 63 do { -// if i < 3 then { pi_inv = 0.25 + 2^(16*(i - 3)) / pi; } -// else { pi_inv = 2^(16*(i-3)) / pi; }; -// pn = nearestint(pi_inv); -// pi_frac = pi_inv - pn; -// a = round(pi_frac, 51, RN); -// b = round(pi_frac - a, 51, RN); -// c = round(pi_frac - a - b, 51, RN); -// d = round(pi_frac - a - b - c, D, RN); -// print("{", 2^7 * a, ",", 2^7 * b, ",", 2^7 * c, ",", 2^7 * d, "},"); -// }; -// -// Notice that for [0..2] the leading bit of 2^(16*(i - 3)) / pi is very small, -// so we add 0.25 so that the conditions for the algorithms are still satisfied, -// and one of those conditions guarantees that ulp(0.25 * x_reduced) >= 2, and -// will safely be discarded. - -static constexpr double ONE_TWENTY_EIGHT_OVER_PI[64][4] = { - {0x1.0000000000014p5, 0x1.7cc1b727220a8p-49, 0x1.4fe13abe8fa9cp-101, - -0x1.911f924eb5336p-153}, - {0x1.0000000145f3p5, 0x1.b727220a94fep-49, 0x1.3abe8fa9a6eep-101, - 0x1.b6c52b3278872p-155}, - {0x1.000145f306dc8p5, 0x1.c882a53f84ebp-47, -0x1.70565911f925p-101, - 0x1.4acc9e21c821p-153}, - {0x1.45f306dc9c884p5, -0x1.5ac07b1505c14p-47, -0x1.96447e493ad4cp-99, - -0x1.b0ef1bef806bap-152}, - {-0x1.f246c6efab58p4, -0x1.ec5417056591p-49, -0x1.f924eb53361ep-101, - 0x1.c820ff28b1d5fp-153}, - {0x1.391054a7f09d4p4, 0x1.f47d4d377036cp-48, 0x1.8a5664f10e41p-100, - 0x1.fe5163abdebbcp-154}, - {0x1.529fc2757d1f4p2, 0x1.34ddc0db62958p-50, 0x1.93c439041fe5p-102, - 0x1.63abdebbc561bp-154}, - {-0x1.ec5417056591p-1, -0x1.f924eb53361ep-53, 0x1.c820ff28b1d6p-105, - -0x1.0a21d4f246dc9p-157}, - {-0x1.505c1596447e4p5, -0x1.275a99b0ef1cp-48, 0x1.07f9458eaf7bp-100, - -0x1.0ea79236e4717p-152}, - {-0x1.596447e493ad4p1, -0x1.9b0ef1bef806cp-52, 0x1.63abdebbc561cp-106, - -0x1.1b7238b7b645ap-159}, - {0x1.bb81b6c52b328p5, -0x1.de37df00d74e4p-49, 0x1.5ef5de2b0db94p-101, - -0x1.c8e2ded9169p-153}, - {0x1.b6c52b3278874p5, -0x1.f7c035d38a844p-47, 0x1.778ac36e48dc8p-99, - -0x1.6f6c8b47fe6dbp-152}, - {0x1.2b3278872084p5, -0x1.ae9c5421443a8p-50, -0x1.e48db91c5bdb4p-102, - 0x1.d2e006492eea1p-154}, - {-0x1.8778df7c035d4p5, 0x1.d5ef5de2b0db8p-49, 0x1.2371d2126e97p-101, - 0x1.924bba8274648p-160}, - {-0x1.bef806ba71508p4, -0x1.443a9e48db91cp-50, -0x1.6f6c8b47fe6dcp-104, - 0x1.77504e8c90e7fp-157}, - {-0x1.ae9c5421443a8p-2, -0x1.e48db91c5bdb4p-54, 0x1.d2e006492eeap-106, - 0x1.3a32439fc3bd6p-159}, - {-0x1.38a84288753c8p5, -0x1.1b7238b7b645cp-47, 0x1.c00c925dd413cp-99, - -0x1.cdbc603c429c7p-151}, - {-0x1.0a21d4f246dc8p3, -0x1.c5bdb22d1ff9cp-50, 0x1.25dd413a32438p-103, - 0x1.fc3bd63962535p-155}, - {-0x1.d4f246dc8e2ep3, 0x1.26e9700324978p-49, -0x1.5f62e6de301e4p-102, - 0x1.eb1cb129a73efp-154}, - {-0x1.236e4716f6c8cp4, 0x1.700324977505p-49, -0x1.736f180f10a7p-101, - -0x1.a76b2c608bbeep-153}, - {0x1.b8e909374b8p4, 0x1.924bba8274648p-48, 0x1.cfe1deb1cb128p-102, - 0x1.a73ee88235f53p-154}, - {0x1.09374b801924cp4, -0x1.15f62e6de302p-50, 0x1.deb1cb129a74p-102, - -0x1.177dca0ad144cp-154}, - {-0x1.68ffcdb688afcp3, 0x1.d1921cfe1debp-50, 0x1.cb129a73ee884p-102, - -0x1.ca0ad144bb7b1p-154}, - {0x1.924bba8274648p0, 0x1.cfe1deb1cb128p-54, 0x1.a73ee88235f54p-106, - -0x1.144bb7b16639p-158}, - {-0x1.a22bec5cdbc6p5, -0x1.e214e34ed658cp-50, -0x1.177dca0ad144cp-106, - 0x1.213a671c09ad1p-160}, - {0x1.3a32439fc3bd8p1, -0x1.c69dacb1822fp-51, 0x1.1afa975da2428p-105, - -0x1.6638fd94ba082p-158}, - {-0x1.b78c0788538d4p4, 0x1.29a73ee88236p-50, -0x1.5a28976f62cc8p-103, - 0x1.c09ad17df904ep-156}, - {0x1.fc3bd63962534p5, 0x1.cfba208d7d4bcp-48, -0x1.12edec598e3f8p-100, - 0x1.ad17df904e647p-152}, - {-0x1.4e34ed658c118p2, 0x1.046bea5d7689p-51, 0x1.3a671c09ad17cp-104, - 0x1.f904e64758e61p-156}, - {0x1.62534e7dd1048p5, -0x1.415a28976f62cp-47, -0x1.8e3f652e8207p-100, - 0x1.3991d63983534p-154}, - {-0x1.63045df7282b4p4, -0x1.44bb7b16638fcp-50, -0x1.94ba081bec67p-102, - 0x1.d639835339f4ap-154}, - {0x1.d1046bea5d768p5, 0x1.213a671c09adp-48, 0x1.7df904e64759p-100, - -0x1.9f2b3182d8defp-152}, - {0x1.afa975da24274p3, 0x1.9c7026b45f7e4p-50, 0x1.3991d63983534p-106, - -0x1.82d8dee81d108p-160}, - {-0x1.a28976f62cc7p5, -0x1.fb29741037d8cp-47, -0x1.b8a719f2b3184p-100, - 0x1.272117e2ef7e5p-152}, - {-0x1.76f62cc71fb28p5, -0x1.741037d8cdc54p-47, 0x1.cc1a99cfa4e44p-101, - -0x1.d03a21036be27p-153}, - {0x1.d338e04d68bfp5, -0x1.bec66e29c67ccp-50, 0x1.339f49c845f8cp-102, - -0x1.081b5f13801dap-156}, - {0x1.c09ad17df905p4, -0x1.9b8a719f2b318p-48, -0x1.6c6f740e8840cp-103, - -0x1.af89c00ed0004p-155}, - {0x1.68befc827323cp5, -0x1.38cf9598c16c8p-47, 0x1.08bf177bf2508p-99, - -0x1.3801da00087eap-152}, - {-0x1.037d8cdc538dp5, 0x1.a99cfa4e422fcp-49, 0x1.77bf250763ffp-103, - 0x1.2fffbc0b301fep-155}, - {-0x1.8cdc538cf9598p5, -0x1.82d8dee81d108p-48, -0x1.b5f13801dap-104, - -0x1.0fd33f8086877p-157}, - {-0x1.4e33e566305bp3, -0x1.bdd03a21036cp-49, 0x1.d8ffc4bffef04p-101, - -0x1.33f80868773a5p-153}, - {-0x1.f2b3182d8dee8p4, -0x1.d1081b5f138p-52, -0x1.da00087e99fcp-104, - -0x1.0d0ee74a5f593p-158}, - {-0x1.8c16c6f740e88p5, -0x1.036be27003b4p-49, -0x1.0fd33f8086878p-109, - 0x1.8b5a0a6d1f6d3p-162}, - {0x1.3908bf177bf24p5, 0x1.0763ff12fffbcp-47, 0x1.6603fbcbc462cp-104, - 0x1.6829b47db4dap-156}, - {0x1.7e2ef7e4a0ec8p4, -0x1.da00087e99fcp-56, -0x1.0d0ee74a5f594p-110, - 0x1.1f6d367ecf27dp-162}, - {-0x1.081b5f13801dcp4, 0x1.fff7816603fbcp-48, 0x1.788c5ad05369p-101, - -0x1.25930261b069fp-155}, - {-0x1.af89c00ed0004p5, -0x1.fa67f010d0ee8p-50, 0x1.6b414da3eda6cp-103, - 0x1.fb3c9f2c26dd4p-156}, - {-0x1.c00ed00043f4cp5, -0x1.fc04343b9d298p-48, 0x1.4da3eda6cfdap-103, - -0x1.b069ec9161738p-155}, - {0x1.2fffbc0b301fcp5, 0x1.e5e2316b414dcp-47, -0x1.c125930261b08p-99, - 0x1.6136e9e8c7ecdp-151}, - {-0x1.0fd33f8086878p3, 0x1.8b5a0a6d1f6d4p-50, -0x1.30261b069ec9p-103, - -0x1.61738132c3403p-155}, - {-0x1.9fc04343b9d28p4, -0x1.7d64b824b2604p-48, -0x1.86c1a7b24585cp-101, - -0x1.c09961a015d29p-154}, - {-0x1.0d0ee74a5f594p2, 0x1.1f6d367ecf27cp-50, 0x1.6136e9e8c7eccp-103, - 0x1.3cbfd45aea4f7p-155}, - {-0x1.dce94beb25c14p5, 0x1.a6cfd9e4f9614p-47, -0x1.22c2e70265868p-100, - -0x1.5d28ad8453814p-158}, - {-0x1.4beb25c12593p5, -0x1.30d834f648b0cp-50, 0x1.8fd9a797fa8b4p-104, - 0x1.d49eeb1faf97cp-156}, - {0x1.b47db4d9fb3c8p4, 0x1.f2c26dd3d18fcp-48, 0x1.9a797fa8b5d48p-100, - 0x1.eeb1faf97c5edp-152}, - {-0x1.25930261b06ap5, 0x1.36e9e8c7ecd3cp-47, 0x1.7fa8b5d49eebp-100, - 0x1.faf97c5ecf41dp-152}, - {0x1.fb3c9f2c26dd4p4, -0x1.738132c3402bcp-51, 0x1.aea4f758fd7ccp-103, - -0x1.d0985f18c10ebp-159}, - {-0x1.b069ec9161738p5, -0x1.32c3402ba515cp-51, 0x1.eeb1faf97c5ecp-104, - 0x1.e839cfbc52949p-157}, - {-0x1.ec9161738132cp5, -0x1.a015d28ad8454p-50, 0x1.faf97c5ecf41cp-104, - 0x1.cfbc529497536p-157}, - {-0x1.61738132c3404p5, 0x1.45aea4f758fd8p-47, -0x1.a0e84c2f8c608p-102, - -0x1.d6b5b45650128p-156}, - {0x1.fb34f2ff516bcp3, -0x1.6c229c0a0d074p-49, -0x1.30be31821d6b4p-104, - -0x1.b4565012813b8p-156}, - {0x1.3cbfd45aea4f8p5, -0x1.4e050683a130cp-48, 0x1.ce7de294a4ba8p-104, - 0x1.afed7ec47e357p-156}, - {-0x1.5d28ad8453814p2, -0x1.a0e84c2f8c608p-54, -0x1.d6b5b45650128p-108, - -0x1.3b81ca8bdea7fp-164}, - {-0x1.15b08a702834p5, -0x1.d0985f18c10ecp-47, 0x1.4a4ba9afed7ecp-100, - 0x1.1f8d5d0856033p-154}, -}; - -// For large range |x| >= 2^16, we perform the range reduction computations as: -// u = x - k * pi/128 = (pi/128) * (x * (128/pi) - k). -// We use the exponent of x to find 4 double-chunks of 128/pi: -// c_hi, c_mid, c_lo, c_lo_2 such that: -// 1) ulp(round(x * c_hi, D, RN)) >= 2^8 = 256, -// 2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then -// min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53. -// This will allow us to drop the high part ph_hi and the addition: -// (ph_lo + pm_hi) mod 1 -// can be exactly representable in a double precision. -// This will allow us to do split the computations as: -// (x * 256/pi) ~ x * (c_hi + c_mid + c_lo + c_lo_2) (mod 256) -// ~ (ph_lo + pm_hi) + (pm_lo + x * c_lo) + x * c_lo_2. -// Then, -// round(x * 128/pi) = round(ph_lo + pm_hi) (mod 256) -// And the high part of fractional part of (x * 128/pi) can simply be: -// {x * 128/pi}_hi = {ph_lo + pm_hi}. -// To prevent overflow when x is very large, we simply scale up -// (c_hi, c_mid, c_lo, c_lo_2) by a fixed power of 2 (based on the index) and -// scale down x by the same amount. - -struct LargeRangeReduction { - - // To be implemented in range_reduction_double_fma.h and - // range_reduction_double_nofma.h. - unsigned fast(double x, DoubleDouble &u); - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - LIBC_INLINE Float128 accurate() const { - constexpr Float128 PI_OVER_128_F128 = { - Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; - - // y_lo = x * c_lo + pm.lo - Float128 y_lo_0(x_reduced * ONE_TWENTY_EIGHT_OVER_PI[idx][3]); - Float128 y_lo_1 = fputil::quick_add(Float128(y_lo), y_lo_0); - Float128 y_mid_f128 = fputil::quick_add(Float128(y_mid.lo), y_lo_1); - Float128 y_hi_f128 = fputil::quick_add(Float128(y_hi), Float128(y_mid.hi)); - Float128 y = fputil::quick_add(y_hi_f128, y_mid_f128); - - return fputil::quick_mul(y, PI_OVER_128_F128); - } -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -private: - // Index of x in the look-up table ONE_TWENTY_EIGHT_OVER_PI. - unsigned idx; - // x scaled down by 2^(-16 *(idx - 3))). - double x_reduced; - // Parts of (x * 128/pi) mod 1. - double y_hi, y_lo; - DoubleDouble y_mid; -}; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -LIBC_INLINE static Float128 range_reduction_small_f128(double x) { - constexpr Float128 PI_OVER_128_F128 = { - Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; - constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5; - double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI_D; - double kd = fputil::nearest_integer(prod_hi); - - Float128 mk_f128(-kd); - Float128 x_f128(x); - Float128 p_hi = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][0])); - Float128 p_mid = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][1])); - Float128 p_lo = - fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][2])); - Float128 s_hi = fputil::quick_add(p_hi, mk_f128); - Float128 s_lo = fputil::quick_add(p_mid, p_lo); - Float128 y = fputil::quick_add(s_hi, s_lo); - - return fputil::quick_mul(y, PI_OVER_128_F128); -} - -static constexpr Float128 SIN_K_PI_OVER_128_F128[65] = { - {Sign::POS, 0, 0}, - {Sign::POS, -133, 0xc90a'afbd'1b33'efc9'c539'edcb'fda0'cf2c_u128}, - {Sign::POS, -132, 0xc8fb'2f88'6ec0'9f37'6a17'954b'2b7c'5171_u128}, - {Sign::POS, -131, 0x96a9'0496'70cf'ae65'f775'7409'4d3c'35c4_u128}, - {Sign::POS, -131, 0xc8bd'35e1'4da1'5f0e'c739'6c89'4bbf'7389_u128}, - {Sign::POS, -131, 0xfab2'72b5'4b98'71a2'7047'29ae'56d7'8a37_u128}, - {Sign::POS, -130, 0x9640'8374'7309'd113'000a'89a1'1e07'c1fe_u128}, - {Sign::POS, -130, 0xaf10'a224'59fe'32a6'3fee'f3bb'58b1'f10d_u128}, - {Sign::POS, -130, 0xc7c5'c1e3'4d30'55b2'5cc8'c00e'4fcc'd850_u128}, - {Sign::POS, -130, 0xe05c'1353'f27b'17e5'0ebc'61ad'e6ca'83cd_u128}, - {Sign::POS, -130, 0xf8cf'cbd9'0af8'd57a'4221'dc4b'a772'598d_u128}, - {Sign::POS, -129, 0x888e'9315'8fb3'bb04'9841'56f5'5334'4306_u128}, - {Sign::POS, -129, 0x94a0'3176'acf8'2d45'ae4b'a773'da6b'f754_u128}, - {Sign::POS, -129, 0xa09a'e4a0'bb30'0a19'2f89'5f44'a303'cc0b_u128}, - {Sign::POS, -129, 0xac7c'd3ad'58fe'e7f0'811f'9539'84ef'f83e_u128}, - {Sign::POS, -129, 0xb844'2987'd22c'f576'9cc3'ef36'746d'e3b8_u128}, - {Sign::POS, -129, 0xc3ef'1535'754b'168d'3122'c2a5'9efd'dc37_u128}, - {Sign::POS, -129, 0xcf7b'ca1d'476c'516d'a812'90bd'baad'62e4_u128}, - {Sign::POS, -129, 0xdae8'804f'0ae6'015b'362c'b974'182e'3030_u128}, - {Sign::POS, -129, 0xe633'74c9'8e22'f0b4'2872'ce1b'fc7a'd1cd_u128}, - {Sign::POS, -129, 0xf15a'e9c0'37b1'd8f0'6c48'e9e3'420b'0f1e_u128}, - {Sign::POS, -129, 0xfc5d'26df'c4d5'cfda'27c0'7c91'1290'b8d1_u128}, - {Sign::POS, -128, 0x839c'3cc9'17ff'6cb4'bfd7'9717'f288'0abf_u128}, - {Sign::POS, -128, 0x88f5'9aa0'da59'1421'b892'ca83'61d8'c84c_u128}, - {Sign::POS, -128, 0x8e39'd9cd'7346'4364'bba4'cfec'bff5'4867_u128}, - {Sign::POS, -128, 0x9368'2a66'e896'f544'b178'2191'1e71'c16e_u128}, - {Sign::POS, -128, 0x987f'bfe7'0b81'a708'19ce'c845'ac87'a5c6_u128}, - {Sign::POS, -128, 0x9d7f'd149'0285'c9e3'e25e'3954'9638'ae68_u128}, - {Sign::POS, -128, 0xa267'9928'48ee'b0c0'3b51'67ee'359a'234e_u128}, - {Sign::POS, -128, 0xa736'55df'1f2f'489e'149f'6e75'9934'68a3_u128}, - {Sign::POS, -128, 0xabeb'49a4'6764'fd15'1bec'da80'89c1'a94c_u128}, - {Sign::POS, -128, 0xb085'baa8'e966'f6da'e4ca'd00d'5c94'bcd2_u128}, - {Sign::POS, -128, 0xb504'f333'f9de'6484'597d'89b3'754a'be9f_u128}, - {Sign::POS, -128, 0xb968'41bf'7ffc'b21a'9de1'e3b2'2b8b'f4db_u128}, - {Sign::POS, -128, 0xbdae'f913'557d'76f0'ac85'320f'528d'6d5d_u128}, - {Sign::POS, -128, 0xc1d8'705f'fcbb'6e90'bdf0'715c'b8b2'0bd7_u128}, - {Sign::POS, -128, 0xc5e4'0358'a8ba'05a7'43da'25d9'9267'326b_u128}, - {Sign::POS, -128, 0xc9d1'124c'931f'da7a'8335'241b'e169'3225_u128}, - {Sign::POS, -128, 0xcd9f'023f'9c3a'059e'23af'31db'7179'a4aa_u128}, - {Sign::POS, -128, 0xd14d'3d02'313c'0eed'744f'ea20'e8ab'ef92_u128}, - {Sign::POS, -128, 0xd4db'3148'750d'1819'f630'e8b6'dac8'3e69_u128}, - {Sign::POS, -128, 0xd848'52c0'a80f'fcdb'24b9'fe00'6635'74a4_u128}, - {Sign::POS, -128, 0xdb94'1a28'cb71'ec87'2c19'b632'53da'43fc_u128}, - {Sign::POS, -128, 0xdebe'0563'7ca9'4cfb'4b19'aa71'fec3'ae6d_u128}, - {Sign::POS, -128, 0xe1c5'978c'05ed'8691'f4e8'a837'2f8c'5810_u128}, - {Sign::POS, -128, 0xe4aa'5909'a08f'a7b4'1227'85ae'67f5'515d_u128}, - {Sign::POS, -128, 0xe76b'd7a1'e63b'9786'1251'2952'9d48'a92f_u128}, - {Sign::POS, -128, 0xea09'a68a'6e49'cd62'15ad'45b4'a1b5'e823_u128}, - {Sign::POS, -128, 0xec83'5e79'946a'3145'7e61'0231'ac1d'6181_u128}, - {Sign::POS, -128, 0xeed8'9db6'6611'e307'86f8'c20f'b664'b01b_u128}, - {Sign::POS, -128, 0xf109'0827'b437'25fd'6712'7db3'5b28'7316_u128}, - {Sign::POS, -128, 0xf314'4762'4708'8f74'a548'6bdc'455d'56a2_u128}, - {Sign::POS, -128, 0xf4fa'0ab6'316e'd2ec'163c'5c7f'03b7'18c5_u128}, - {Sign::POS, -128, 0xf6ba'073b'424b'19e8'2c79'1f59'cc1f'fc23_u128}, - {Sign::POS, -128, 0xf853'f7dc'9186'b952'c7ad'c6b4'9888'91bb_u128}, - {Sign::POS, -128, 0xf9c7'9d63'272c'4628'4504'ae08'd19b'2980_u128}, - {Sign::POS, -128, 0xfb14'be7f'bae5'8156'2172'a361'fd2a'722f_u128}, - {Sign::POS, -128, 0xfc3b'27d3'8a5d'49ab'2567'78ff'cb5c'1769_u128}, - {Sign::POS, -128, 0xfd3a'abf8'4528'b50b'eae6'bd95'1c1d'abbe_u128}, - {Sign::POS, -128, 0xfe13'2387'0cfe'9a3d'90cd'1d95'9db6'74ef_u128}, - {Sign::POS, -128, 0xfec4'6d1e'8929'2cf0'4139'0efd'c726'e9ef_u128}, - {Sign::POS, -128, 0xff4e'6d68'0c41'd0a9'0f66'8633'f1ab'858a_u128}, - {Sign::POS, -128, 0xffb1'0f1b'cb6b'ef1d'421e'8eda'af59'453e_u128}, - {Sign::POS, -128, 0xffec'4304'2668'65d9'5657'5523'6696'1732_u128}, - {Sign::POS, 0, 1}, -}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H diff --git a/libc/src/math/generic/range_reduction_double_fma.h b/libc/src/math/generic/range_reduction_double_fma.h deleted file mode 100644 index 160fb24..0000000 --- a/libc/src/math/generic/range_reduction_double_fma.h +++ /dev/null @@ -1,346 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan w/ FMA -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/math/generic/range_reduction_double_common.h" - -namespace LIBC_NAMESPACE_DECL { - -using LIBC_NAMESPACE::fputil::DoubleDouble; - -LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62); - idx = static_cast<unsigned>((x_e_m62 >> 4) + 3); - // Scale x down by 2^(-(16 * (idx - 3)) - xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62); - // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 - x_reduced = xbits.get_val(); - // x * c_hi = ph.hi + ph.lo exactly. - DoubleDouble ph = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]); - // x * c_mid = pm.hi + pm.lo exactly. - DoubleDouble pm = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]); - // x * c_lo = pl.hi + pl.lo exactly. - DoubleDouble pl = fputil::exact_mult<double, SPLIT>( - x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]); - // Extract integral parts and fractional parts of (ph.lo + pm.hi). - double sum_hi = ph.lo + pm.hi; - double kd = fputil::nearest_integer(sum_hi); - - // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo - y_hi = (ph.lo - kd) + pm.hi; // Exact - y_mid = fputil::exact_add(pm.lo, pl.hi); - y_lo = pl.lo; - - // y_l = x * c_lo_2 + pl.lo - double y_l = - fputil::multiply_add(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][3], y_lo); - DoubleDouble y = fputil::exact_add(y_hi, y_mid.hi); - y.lo += (y_mid.lo + y_l); - - // Digits of pi/128, generated by Sollya with: - // > a = round(pi/128, D, RN); - // > b = round(pi/128 - a, D, RN); - constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60, - 0x1.921fb54442d18p-6}; - - // Error bound: with {a} denote the fractional part of a, i.e.: - // {a} = a - round(a) - // Then, - // | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105 - // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110 - u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD); - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Lookup table for sin(k * pi / 128) with k = 0, ..., 255. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 255 do { -// a = D(sin(k * pi/128)); }; -// b = D(sin(k * pi/128) - a); -// print("{", b, ",", a, "},"); -// }; -LIBC_INLINE constexpr DoubleDouble SIN_K_PI_OVER_128[] = { - {0, 0}, - {-0x1.b1d63091a013p-64, 0x1.92155f7a3667ep-6}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {-0x1.9a088a8bf6b2cp-59, 0x1.2d52092ce19f6p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {0x1.a2704729ae56dp-59, 0x1.f564e56a9730ep-4}, - {0x1.13000a89a11ep-58, 0x1.2c8106e8e613ap-3}, - {0x1.531ff779ddac6p-57, 0x1.5e214448b3fc6p-3}, - {-0x1.26d19b9ff8d82p-57, 0x1.8f8b83c69a60bp-3}, - {-0x1.af1439e521935p-62, 0x1.c0b826a7e4f63p-3}, - {-0x1.42deef11da2c4p-57, 0x1.f19f97b215f1bp-3}, - {0x1.824c20ab7aa9ap-56, 0x1.111d262b1f677p-2}, - {-0x1.5d28da2c4612dp-56, 0x1.294062ed59f06p-2}, - {0x1.0c97c4afa2518p-56, 0x1.4135c94176601p-2}, - {-0x1.efdc0d58cf62p-62, 0x1.58f9a75ab1fddp-2}, - {-0x1.44b19e0864c5dp-56, 0x1.7088530fa459fp-2}, - {-0x1.72cedd3d5a61p-57, 0x1.87de2a6aea963p-2}, - {0x1.6da81290bdbabp-57, 0x1.9ef7943a8ed8ap-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {0x1.6850e59c37f8fp-58, 0x1.cc66e9931c45ep-2}, - {0x1.e0d891d3c6841p-58, 0x1.e2b5d3806f63bp-2}, - {-0x1.2ec1fc1b776b8p-60, 0x1.f8ba4dbf89abap-2}, - {-0x1.a5a014347406cp-55, 0x1.073879922ffeep-1}, - {-0x1.ef23b69abe4f1p-55, 0x1.11eb3541b4b23p-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.5da743ef3770cp-55, 0x1.26d054cdd12dfp-1}, - {-0x1.efcc626f74a6fp-57, 0x1.30ff7fce17035p-1}, - {0x1.e3e25e3954964p-56, 0x1.3affa292050b9p-1}, - {0x1.8076a2cfdc6b3p-57, 0x1.44cf325091dd6p-1}, - {0x1.3c293edceb327p-57, 0x1.4e6cabbe3e5e9p-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {-0x1.251b352ff2a37p-56, 0x1.610b7551d2cdfp-1}, - {-0x1.bdd3413b26456p-55, 0x1.6a09e667f3bcdp-1}, - {0x1.0d4ef0f1d915cp-55, 0x1.72d0837efff96p-1}, - {-0x1.0f537acdf0ad7p-56, 0x1.7b5df226aafafp-1}, - {-0x1.6f420f8ea3475p-56, 0x1.83b0e0bff976ep-1}, - {-0x1.2c5e12ed1336dp-55, 0x1.8bc806b151741p-1}, - {0x1.3d419a920df0bp-55, 0x1.93a22499263fbp-1}, - {-0x1.30ee286712474p-55, 0x1.9b3e047f38741p-1}, - {-0x1.128bb015df175p-56, 0x1.a29a7a0462782p-1}, - {0x1.9f630e8b6dac8p-60, 0x1.a9b66290ea1a3p-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {-0x1.bc69f324e6d61p-55, 0x1.b728345196e3ep-1}, - {-0x1.825a732ac700ap-55, 0x1.bd7c0ac6f952ap-1}, - {-0x1.6e0b1757c8d07p-56, 0x1.c38b2f180bdb1p-1}, - {-0x1.2fb761e946603p-58, 0x1.c954b213411f5p-1}, - {-0x1.e7b6bb5ab58aep-58, 0x1.ced7af43cc773p-1}, - {-0x1.4ef5295d25af2p-55, 0x1.d4134d14dc93ap-1}, - {0x1.457e610231ac2p-56, 0x1.d906bcf328d46p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {-0x1.014c76c126527p-55, 0x1.e212104f686e5p-1}, - {-0x1.16b56f2847754p-57, 0x1.e6288ec48e112p-1}, - {0x1.760b1e2e3f81ep-55, 0x1.e9f4156c62ddap-1}, - {0x1.e82c791f59cc2p-56, 0x1.ed740e7684963p-1}, - {0x1.52c7adc6b4989p-56, 0x1.f0a7efb9230d7p-1}, - {-0x1.d7bafb51f72e6p-56, 0x1.f38f3ac64e589p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.ab256778ffcb6p-56, 0x1.f8764fa714ba9p-1}, - {-0x1.7a0a8ca13571fp-55, 0x1.fa7557f08a517p-1}, - {0x1.1ec8668ecaceep-55, 0x1.fc26470e19fd3p-1}, - {-0x1.87df6378811c7p-55, 0x1.fd88da3d12526p-1}, - {0x1.521ecd0c67e35p-57, 0x1.fe9cdad01883ap-1}, - {-0x1.c57bc2e24aa15p-57, 0x1.ff621e3796d7ep-1}, - {-0x1.1354d4556e4cbp-55, 0x1.ffd886084cd0dp-1}, - {0, 1}, -#ifndef LIBC_MATH_HAS_SMALL_TABLES - {-0x1.1354d4556e4cbp-55, 0x1.ffd886084cd0dp-1}, - {-0x1.c57bc2e24aa15p-57, 0x1.ff621e3796d7ep-1}, - {0x1.521ecd0c67e35p-57, 0x1.fe9cdad01883ap-1}, - {-0x1.87df6378811c7p-55, 0x1.fd88da3d12526p-1}, - {0x1.1ec8668ecaceep-55, 0x1.fc26470e19fd3p-1}, - {-0x1.7a0a8ca13571fp-55, 0x1.fa7557f08a517p-1}, - {0x1.ab256778ffcb6p-56, 0x1.f8764fa714ba9p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {-0x1.d7bafb51f72e6p-56, 0x1.f38f3ac64e589p-1}, - {0x1.52c7adc6b4989p-56, 0x1.f0a7efb9230d7p-1}, - {0x1.e82c791f59cc2p-56, 0x1.ed740e7684963p-1}, - {0x1.760b1e2e3f81ep-55, 0x1.e9f4156c62ddap-1}, - {-0x1.16b56f2847754p-57, 0x1.e6288ec48e112p-1}, - {-0x1.014c76c126527p-55, 0x1.e212104f686e5p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {0x1.457e610231ac2p-56, 0x1.d906bcf328d46p-1}, - {-0x1.4ef5295d25af2p-55, 0x1.d4134d14dc93ap-1}, - {-0x1.e7b6bb5ab58aep-58, 0x1.ced7af43cc773p-1}, - {-0x1.2fb761e946603p-58, 0x1.c954b213411f5p-1}, - {-0x1.6e0b1757c8d07p-56, 0x1.c38b2f180bdb1p-1}, - {-0x1.825a732ac700ap-55, 0x1.bd7c0ac6f952ap-1}, - {-0x1.bc69f324e6d61p-55, 0x1.b728345196e3ep-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {0x1.9f630e8b6dac8p-60, 0x1.a9b66290ea1a3p-1}, - {-0x1.128bb015df175p-56, 0x1.a29a7a0462782p-1}, - {-0x1.30ee286712474p-55, 0x1.9b3e047f38741p-1}, - {0x1.3d419a920df0bp-55, 0x1.93a22499263fbp-1}, - {-0x1.2c5e12ed1336dp-55, 0x1.8bc806b151741p-1}, - {-0x1.6f420f8ea3475p-56, 0x1.83b0e0bff976ep-1}, - {-0x1.0f537acdf0ad7p-56, 0x1.7b5df226aafafp-1}, - {0x1.0d4ef0f1d915cp-55, 0x1.72d0837efff96p-1}, - {-0x1.bdd3413b26456p-55, 0x1.6a09e667f3bcdp-1}, - {-0x1.251b352ff2a37p-56, 0x1.610b7551d2cdfp-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {0x1.3c293edceb327p-57, 0x1.4e6cabbe3e5e9p-1}, - {0x1.8076a2cfdc6b3p-57, 0x1.44cf325091dd6p-1}, - {0x1.e3e25e3954964p-56, 0x1.3affa292050b9p-1}, - {-0x1.efcc626f74a6fp-57, 0x1.30ff7fce17035p-1}, - {-0x1.5da743ef3770cp-55, 0x1.26d054cdd12dfp-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.ef23b69abe4f1p-55, 0x1.11eb3541b4b23p-1}, - {-0x1.a5a014347406cp-55, 0x1.073879922ffeep-1}, - {-0x1.2ec1fc1b776b8p-60, 0x1.f8ba4dbf89abap-2}, - {0x1.e0d891d3c6841p-58, 0x1.e2b5d3806f63bp-2}, - {0x1.6850e59c37f8fp-58, 0x1.cc66e9931c45ep-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {0x1.6da81290bdbabp-57, 0x1.9ef7943a8ed8ap-2}, - {-0x1.72cedd3d5a61p-57, 0x1.87de2a6aea963p-2}, - {-0x1.44b19e0864c5dp-56, 0x1.7088530fa459fp-2}, - {-0x1.efdc0d58cf62p-62, 0x1.58f9a75ab1fddp-2}, - {0x1.0c97c4afa2518p-56, 0x1.4135c94176601p-2}, - {-0x1.5d28da2c4612dp-56, 0x1.294062ed59f06p-2}, - {0x1.824c20ab7aa9ap-56, 0x1.111d262b1f677p-2}, - {-0x1.42deef11da2c4p-57, 0x1.f19f97b215f1bp-3}, - {-0x1.af1439e521935p-62, 0x1.c0b826a7e4f63p-3}, - {-0x1.26d19b9ff8d82p-57, 0x1.8f8b83c69a60bp-3}, - {0x1.531ff779ddac6p-57, 0x1.5e214448b3fc6p-3}, - {0x1.13000a89a11ep-58, 0x1.2c8106e8e613ap-3}, - {0x1.a2704729ae56dp-59, 0x1.f564e56a9730ep-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {-0x1.9a088a8bf6b2cp-59, 0x1.2d52092ce19f6p-4}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {-0x1.b1d63091a013p-64, 0x1.92155f7a3667ep-6}, - {0, 0}, - {0x1.b1d63091a013p-64, -0x1.92155f7a3667ep-6}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {0x1.9a088a8bf6b2cp-59, -0x1.2d52092ce19f6p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {-0x1.a2704729ae56dp-59, -0x1.f564e56a9730ep-4}, - {-0x1.13000a89a11ep-58, -0x1.2c8106e8e613ap-3}, - {-0x1.531ff779ddac6p-57, -0x1.5e214448b3fc6p-3}, - {0x1.26d19b9ff8d82p-57, -0x1.8f8b83c69a60bp-3}, - {0x1.af1439e521935p-62, -0x1.c0b826a7e4f63p-3}, - {0x1.42deef11da2c4p-57, -0x1.f19f97b215f1bp-3}, - {-0x1.824c20ab7aa9ap-56, -0x1.111d262b1f677p-2}, - {0x1.5d28da2c4612dp-56, -0x1.294062ed59f06p-2}, - {-0x1.0c97c4afa2518p-56, -0x1.4135c94176601p-2}, - {0x1.efdc0d58cf62p-62, -0x1.58f9a75ab1fddp-2}, - {0x1.44b19e0864c5dp-56, -0x1.7088530fa459fp-2}, - {0x1.72cedd3d5a61p-57, -0x1.87de2a6aea963p-2}, - {-0x1.6da81290bdbabp-57, -0x1.9ef7943a8ed8ap-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {-0x1.6850e59c37f8fp-58, -0x1.cc66e9931c45ep-2}, - {-0x1.e0d891d3c6841p-58, -0x1.e2b5d3806f63bp-2}, - {0x1.2ec1fc1b776b8p-60, -0x1.f8ba4dbf89abap-2}, - {0x1.a5a014347406cp-55, -0x1.073879922ffeep-1}, - {0x1.ef23b69abe4f1p-55, -0x1.11eb3541b4b23p-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.5da743ef3770cp-55, -0x1.26d054cdd12dfp-1}, - {0x1.efcc626f74a6fp-57, -0x1.30ff7fce17035p-1}, - {-0x1.e3e25e3954964p-56, -0x1.3affa292050b9p-1}, - {-0x1.8076a2cfdc6b3p-57, -0x1.44cf325091dd6p-1}, - {-0x1.3c293edceb327p-57, -0x1.4e6cabbe3e5e9p-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {0x1.251b352ff2a37p-56, -0x1.610b7551d2cdfp-1}, - {0x1.bdd3413b26456p-55, -0x1.6a09e667f3bcdp-1}, - {-0x1.0d4ef0f1d915cp-55, -0x1.72d0837efff96p-1}, - {0x1.0f537acdf0ad7p-56, -0x1.7b5df226aafafp-1}, - {0x1.6f420f8ea3475p-56, -0x1.83b0e0bff976ep-1}, - {0x1.2c5e12ed1336dp-55, -0x1.8bc806b151741p-1}, - {-0x1.3d419a920df0bp-55, -0x1.93a22499263fbp-1}, - {0x1.30ee286712474p-55, -0x1.9b3e047f38741p-1}, - {0x1.128bb015df175p-56, -0x1.a29a7a0462782p-1}, - {-0x1.9f630e8b6dac8p-60, -0x1.a9b66290ea1a3p-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {0x1.bc69f324e6d61p-55, -0x1.b728345196e3ep-1}, - {0x1.825a732ac700ap-55, -0x1.bd7c0ac6f952ap-1}, - {0x1.6e0b1757c8d07p-56, -0x1.c38b2f180bdb1p-1}, - {0x1.2fb761e946603p-58, -0x1.c954b213411f5p-1}, - {0x1.e7b6bb5ab58aep-58, -0x1.ced7af43cc773p-1}, - {0x1.4ef5295d25af2p-55, -0x1.d4134d14dc93ap-1}, - {-0x1.457e610231ac2p-56, -0x1.d906bcf328d46p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {0x1.014c76c126527p-55, -0x1.e212104f686e5p-1}, - {0x1.16b56f2847754p-57, -0x1.e6288ec48e112p-1}, - {-0x1.760b1e2e3f81ep-55, -0x1.e9f4156c62ddap-1}, - {-0x1.e82c791f59cc2p-56, -0x1.ed740e7684963p-1}, - {-0x1.52c7adc6b4989p-56, -0x1.f0a7efb9230d7p-1}, - {0x1.d7bafb51f72e6p-56, -0x1.f38f3ac64e589p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.ab256778ffcb6p-56, -0x1.f8764fa714ba9p-1}, - {0x1.7a0a8ca13571fp-55, -0x1.fa7557f08a517p-1}, - {-0x1.1ec8668ecaceep-55, -0x1.fc26470e19fd3p-1}, - {0x1.87df6378811c7p-55, -0x1.fd88da3d12526p-1}, - {-0x1.521ecd0c67e35p-57, -0x1.fe9cdad01883ap-1}, - {0x1.c57bc2e24aa15p-57, -0x1.ff621e3796d7ep-1}, - {0x1.1354d4556e4cbp-55, -0x1.ffd886084cd0dp-1}, - {0, -1}, - {0x1.1354d4556e4cbp-55, -0x1.ffd886084cd0dp-1}, - {0x1.c57bc2e24aa15p-57, -0x1.ff621e3796d7ep-1}, - {-0x1.521ecd0c67e35p-57, -0x1.fe9cdad01883ap-1}, - {0x1.87df6378811c7p-55, -0x1.fd88da3d12526p-1}, - {-0x1.1ec8668ecaceep-55, -0x1.fc26470e19fd3p-1}, - {0x1.7a0a8ca13571fp-55, -0x1.fa7557f08a517p-1}, - {-0x1.ab256778ffcb6p-56, -0x1.f8764fa714ba9p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {0x1.d7bafb51f72e6p-56, -0x1.f38f3ac64e589p-1}, - {-0x1.52c7adc6b4989p-56, -0x1.f0a7efb9230d7p-1}, - {-0x1.e82c791f59cc2p-56, -0x1.ed740e7684963p-1}, - {-0x1.760b1e2e3f81ep-55, -0x1.e9f4156c62ddap-1}, - {0x1.16b56f2847754p-57, -0x1.e6288ec48e112p-1}, - {0x1.014c76c126527p-55, -0x1.e212104f686e5p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {-0x1.457e610231ac2p-56, -0x1.d906bcf328d46p-1}, - {0x1.4ef5295d25af2p-55, -0x1.d4134d14dc93ap-1}, - {0x1.e7b6bb5ab58aep-58, -0x1.ced7af43cc773p-1}, - {0x1.2fb761e946603p-58, -0x1.c954b213411f5p-1}, - {0x1.6e0b1757c8d07p-56, -0x1.c38b2f180bdb1p-1}, - {0x1.825a732ac700ap-55, -0x1.bd7c0ac6f952ap-1}, - {0x1.bc69f324e6d61p-55, -0x1.b728345196e3ep-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {-0x1.9f630e8b6dac8p-60, -0x1.a9b66290ea1a3p-1}, - {0x1.128bb015df175p-56, -0x1.a29a7a0462782p-1}, - {0x1.30ee286712474p-55, -0x1.9b3e047f38741p-1}, - {-0x1.3d419a920df0bp-55, -0x1.93a22499263fbp-1}, - {0x1.2c5e12ed1336dp-55, -0x1.8bc806b151741p-1}, - {0x1.6f420f8ea3475p-56, -0x1.83b0e0bff976ep-1}, - {0x1.0f537acdf0ad7p-56, -0x1.7b5df226aafafp-1}, - {-0x1.0d4ef0f1d915cp-55, -0x1.72d0837efff96p-1}, - {0x1.bdd3413b26456p-55, -0x1.6a09e667f3bcdp-1}, - {0x1.251b352ff2a37p-56, -0x1.610b7551d2cdfp-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {-0x1.3c293edceb327p-57, -0x1.4e6cabbe3e5e9p-1}, - {-0x1.8076a2cfdc6b3p-57, -0x1.44cf325091dd6p-1}, - {-0x1.e3e25e3954964p-56, -0x1.3affa292050b9p-1}, - {0x1.efcc626f74a6fp-57, -0x1.30ff7fce17035p-1}, - {0x1.5da743ef3770cp-55, -0x1.26d054cdd12dfp-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.ef23b69abe4f1p-55, -0x1.11eb3541b4b23p-1}, - {0x1.a5a014347406cp-55, -0x1.073879922ffeep-1}, - {0x1.2ec1fc1b776b8p-60, -0x1.f8ba4dbf89abap-2}, - {-0x1.e0d891d3c6841p-58, -0x1.e2b5d3806f63bp-2}, - {-0x1.6850e59c37f8fp-58, -0x1.cc66e9931c45ep-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {-0x1.6da81290bdbabp-57, -0x1.9ef7943a8ed8ap-2}, - {0x1.72cedd3d5a61p-57, -0x1.87de2a6aea963p-2}, - {0x1.44b19e0864c5dp-56, -0x1.7088530fa459fp-2}, - {0x1.efdc0d58cf62p-62, -0x1.58f9a75ab1fddp-2}, - {-0x1.0c97c4afa2518p-56, -0x1.4135c94176601p-2}, - {0x1.5d28da2c4612dp-56, -0x1.294062ed59f06p-2}, - {-0x1.824c20ab7aa9ap-56, -0x1.111d262b1f677p-2}, - {0x1.42deef11da2c4p-57, -0x1.f19f97b215f1bp-3}, - {0x1.af1439e521935p-62, -0x1.c0b826a7e4f63p-3}, - {0x1.26d19b9ff8d82p-57, -0x1.8f8b83c69a60bp-3}, - {-0x1.531ff779ddac6p-57, -0x1.5e214448b3fc6p-3}, - {-0x1.13000a89a11ep-58, -0x1.2c8106e8e613ap-3}, - {-0x1.a2704729ae56dp-59, -0x1.f564e56a9730ep-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {0x1.9a088a8bf6b2cp-59, -0x1.2d52092ce19f6p-4}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {0x1.b1d63091a013p-64, -0x1.92155f7a3667ep-6}, -#endif // !LIBC_MATH_HAS_SMALL_TABLES -}; - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_FMA_H diff --git a/libc/src/math/generic/range_reduction_double_nofma.h b/libc/src/math/generic/range_reduction_double_nofma.h deleted file mode 100644 index 9d13d24..0000000 --- a/libc/src/math/generic/range_reduction_double_nofma.h +++ /dev/null @@ -1,347 +0,0 @@ -//===-- Range reduction for double precision sin/cos/tan --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H -#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H - -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/math/generic/range_reduction_double_common.h" - -namespace LIBC_NAMESPACE_DECL { - -using fputil::DoubleDouble; - -LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) { - using FPBits = typename fputil::FPBits<double>; - FPBits xbits(x); - - int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62); - idx = static_cast<unsigned>((x_e_m62 >> 4) + 3); - // Scale x down by 2^(-(16 * (idx - 3)) - xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62); - // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 - x_reduced = xbits.get_val(); - // x * c_hi = ph.hi + ph.lo exactly. - DoubleDouble x_split = fputil::split(x_reduced); - DoubleDouble ph = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]); - // x * c_mid = pm.hi + pm.lo exactly. - DoubleDouble pm = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]); - // x * c_lo = pl.hi + pl.lo exactly. - DoubleDouble pl = fputil::exact_mult<double, SPLIT>( - x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]); - // Extract integral parts and fractional parts of (ph.lo + pm.hi). - double sum_hi = ph.lo + pm.hi; - double kd = fputil::nearest_integer(sum_hi); - - // x * 128/pi mod 1 ~ y_hi + y_mid + y_lo - y_hi = (ph.lo - kd) + pm.hi; // Exact - y_mid = fputil::exact_add(pm.lo, pl.hi); - y_lo = pl.lo; - - // y_l = x * c_lo_2 + pl.lo - double y_l = - fputil::multiply_add(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][3], y_lo); - DoubleDouble y = fputil::exact_add(y_hi, y_mid.hi); - y.lo += (y_mid.lo + y_l); - - // Digits of pi/128, generated by Sollya with: - // > a = round(pi/128, D, RN); - // > b = round(pi/128 - a, D, RN); - constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60, - 0x1.921fb54442d18p-6}; - - // Error bound: with {a} denote the fractional part of a, i.e.: - // {a} = a - round(a) - // Then, - // | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105 - // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110 - u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD); - - return static_cast<unsigned>(static_cast<int64_t>(kd)); -} - -// Lookup table for sin(k * pi / 128) with k = 0, ..., 255. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 255 do { -// a = round(sin(k * pi/128), 51, RN); -// b = round(sin(k * pi/128) - a, D, RN); -// print("{", b, ",", a, "},"); -// }; -LIBC_INLINE constexpr DoubleDouble SIN_K_PI_OVER_128[] = { - {0, 0}, - {0x1.f938a73db97fbp-58, 0x1.92155f7a3667cp-6}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {0x1.ccbeeeae8129ap-56, 0x1.2d52092ce19f4p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {-0x1.cbb1f71aca352p-56, 0x1.f564e56a9731p-4}, - {-0x1.dd9ffeaecbdc4p-55, 0x1.2c8106e8e613cp-3}, - {-0x1.ab3802218894fp-55, 0x1.5e214448b3fc8p-3}, - {-0x1.49b466e7fe36p-55, 0x1.8f8b83c69a60cp-3}, - {-0x1.035e2873ca432p-55, 0x1.c0b826a7e4f64p-3}, - {-0x1.50b7bbc4768b1p-55, 0x1.f19f97b215f1cp-3}, - {-0x1.3ed9efaa42ab3p-55, 0x1.111d262b1f678p-2}, - {0x1.a8b5c974ee7b5p-54, 0x1.294062ed59f04p-2}, - {0x1.4325f12be8946p-54, 0x1.4135c941766p-2}, - {0x1.fc2047e54e614p-55, 0x1.58f9a75ab1fdcp-2}, - {-0x1.512c678219317p-54, 0x1.7088530fa45ap-2}, - {-0x1.2e59dba7ab4c2p-54, 0x1.87de2a6aea964p-2}, - {-0x1.d24afdade848bp-54, 0x1.9ef7943a8ed8cp-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {-0x1.e97af1a63c807p-54, 0x1.cc66e9931c46p-2}, - {-0x1.c3e4edc5872f8p-55, 0x1.e2b5d3806f63cp-2}, - {0x1.fb44f80f92225p-54, 0x1.f8ba4dbf89ab8p-2}, - {0x1.9697faf2e2fe5p-53, 0x1.073879922ffecp-1}, - {-0x1.7bc8eda6af93cp-53, 0x1.11eb3541b4b24p-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.5769d0fbcddc3p-53, 0x1.26d054cdd12ep-1}, - {0x1.c20673b2116b2p-54, 0x1.30ff7fce17034p-1}, - {0x1.3c7c4bc72a92cp-53, 0x1.3affa292050b8p-1}, - {-0x1.e7f895d302395p-53, 0x1.44cf325091dd8p-1}, - {0x1.13c293edceb32p-53, 0x1.4e6cabbe3e5e8p-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {-0x1.24a366a5fe547p-53, 0x1.610b7551d2cep-1}, - {0x1.21165f626cdd5p-54, 0x1.6a09e667f3bccp-1}, - {-0x1.bcac43c389ba9p-53, 0x1.72d0837efff98p-1}, - {-0x1.21ea6f59be15bp-53, 0x1.7b5df226aafbp-1}, - {0x1.d217be0e2b971p-53, 0x1.83b0e0bff976cp-1}, - {0x1.69d0f6897664ap-54, 0x1.8bc806b15174p-1}, - {-0x1.615f32b6f907ap-54, 0x1.93a22499263fcp-1}, - {0x1.6788ebcc76dc6p-54, 0x1.9b3e047f3874p-1}, - {0x1.ddae89fd441d1p-53, 0x1.a29a7a046278p-1}, - {-0x1.f98273c5d2495p-54, 0x1.a9b66290ea1a4p-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {0x1.90e58336c64a8p-53, 0x1.b728345196e3cp-1}, - {0x1.9f6963354e3fep-53, 0x1.bd7c0ac6f9528p-1}, - {0x1.a47d3a2a0dcbep-54, 0x1.c38b2f180bdbp-1}, - {0x1.ed0489e16b9ap-54, 0x1.c954b213411f4p-1}, - {-0x1.0f3db5dad5ac5p-53, 0x1.ced7af43cc774p-1}, - {0x1.ac42b5a8b6943p-53, 0x1.d4134d14dc938p-1}, - {-0x1.d75033dfb9ca8p-53, 0x1.d906bcf328d48p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {0x1.7f59c49f6cd6dp-54, 0x1.e212104f686e4p-1}, - {0x1.ee94a90d7b88bp-53, 0x1.e6288ec48e11p-1}, - {-0x1.a27d3874701f9p-53, 0x1.e9f4156c62ddcp-1}, - {-0x1.85f4e1b8298dp-54, 0x1.ed740e7684964p-1}, - {-0x1.ab4e148e52d9ep-54, 0x1.f0a7efb9230d8p-1}, - {0x1.8a11412b82346p-54, 0x1.f38f3ac64e588p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.3564acef1ff97p-53, 0x1.f8764fa714ba8p-1}, - {-0x1.5e82a3284d5c8p-53, 0x1.fa7557f08a518p-1}, - {-0x1.709bccb89a989p-54, 0x1.fc26470e19fd4p-1}, - {0x1.9e082721dfb8ep-53, 0x1.fd88da3d12524p-1}, - {-0x1.eade132f3981dp-53, 0x1.fe9cdad01883cp-1}, - {0x1.e3a843d1db55fp-53, 0x1.ff621e3796d7cp-1}, - {0x1.765595d548d9ap-54, 0x1.ffd886084cd0cp-1}, - {0, 1}, -#ifndef LIBC_MATH_HAS_SMALL_TABLES - {0x1.765595d548d9ap-54, 0x1.ffd886084cd0cp-1}, - {0x1.e3a843d1db55fp-53, 0x1.ff621e3796d7cp-1}, - {-0x1.eade132f3981dp-53, 0x1.fe9cdad01883cp-1}, - {0x1.9e082721dfb8ep-53, 0x1.fd88da3d12524p-1}, - {-0x1.709bccb89a989p-54, 0x1.fc26470e19fd4p-1}, - {-0x1.5e82a3284d5c8p-53, 0x1.fa7557f08a518p-1}, - {0x1.3564acef1ff97p-53, 0x1.f8764fa714ba8p-1}, - {0x1.562172a361fd3p-56, 0x1.f6297cff75cbp-1}, - {0x1.8a11412b82346p-54, 0x1.f38f3ac64e588p-1}, - {-0x1.ab4e148e52d9ep-54, 0x1.f0a7efb9230d8p-1}, - {-0x1.85f4e1b8298dp-54, 0x1.ed740e7684964p-1}, - {-0x1.a27d3874701f9p-53, 0x1.e9f4156c62ddcp-1}, - {0x1.ee94a90d7b88bp-53, 0x1.e6288ec48e11p-1}, - {0x1.7f59c49f6cd6dp-54, 0x1.e212104f686e4p-1}, - {0x1.83c37c6107db3p-55, 0x1.ddb13b6ccc23cp-1}, - {-0x1.d75033dfb9ca8p-53, 0x1.d906bcf328d48p-1}, - {0x1.ac42b5a8b6943p-53, 0x1.d4134d14dc938p-1}, - {-0x1.0f3db5dad5ac5p-53, 0x1.ced7af43cc774p-1}, - {0x1.ed0489e16b9ap-54, 0x1.c954b213411f4p-1}, - {0x1.a47d3a2a0dcbep-54, 0x1.c38b2f180bdbp-1}, - {0x1.9f6963354e3fep-53, 0x1.bd7c0ac6f9528p-1}, - {0x1.90e58336c64a8p-53, 0x1.b728345196e3cp-1}, - {-0x1.926da300ffccep-55, 0x1.b090a581502p-1}, - {-0x1.f98273c5d2495p-54, 0x1.a9b66290ea1a4p-1}, - {0x1.ddae89fd441d1p-53, 0x1.a29a7a046278p-1}, - {0x1.6788ebcc76dc6p-54, 0x1.9b3e047f3874p-1}, - {-0x1.615f32b6f907ap-54, 0x1.93a22499263fcp-1}, - {0x1.69d0f6897664ap-54, 0x1.8bc806b15174p-1}, - {0x1.d217be0e2b971p-53, 0x1.83b0e0bff976cp-1}, - {-0x1.21ea6f59be15bp-53, 0x1.7b5df226aafbp-1}, - {-0x1.bcac43c389ba9p-53, 0x1.72d0837efff98p-1}, - {0x1.21165f626cdd5p-54, 0x1.6a09e667f3bccp-1}, - {-0x1.24a366a5fe547p-53, 0x1.610b7551d2cep-1}, - {-0x1.75720992bfbb2p-55, 0x1.57d69348cecap-1}, - {0x1.13c293edceb32p-53, 0x1.4e6cabbe3e5e8p-1}, - {-0x1.e7f895d302395p-53, 0x1.44cf325091dd8p-1}, - {0x1.3c7c4bc72a92cp-53, 0x1.3affa292050b8p-1}, - {0x1.c20673b2116b2p-54, 0x1.30ff7fce17034p-1}, - {-0x1.5769d0fbcddc3p-53, 0x1.26d054cdd12ep-1}, - {0x1.b25dd267f66p-55, 0x1.1c73b39ae68c8p-1}, - {-0x1.7bc8eda6af93cp-53, 0x1.11eb3541b4b24p-1}, - {0x1.9697faf2e2fe5p-53, 0x1.073879922ffecp-1}, - {0x1.fb44f80f92225p-54, 0x1.f8ba4dbf89ab8p-2}, - {-0x1.c3e4edc5872f8p-55, 0x1.e2b5d3806f63cp-2}, - {-0x1.e97af1a63c807p-54, 0x1.cc66e9931c46p-2}, - {0x1.5b362cb974183p-57, 0x1.b5d1009e15ccp-2}, - {-0x1.d24afdade848bp-54, 0x1.9ef7943a8ed8cp-2}, - {-0x1.2e59dba7ab4c2p-54, 0x1.87de2a6aea964p-2}, - {-0x1.512c678219317p-54, 0x1.7088530fa45ap-2}, - {0x1.fc2047e54e614p-55, 0x1.58f9a75ab1fdcp-2}, - {0x1.4325f12be8946p-54, 0x1.4135c941766p-2}, - {0x1.a8b5c974ee7b5p-54, 0x1.294062ed59f04p-2}, - {-0x1.3ed9efaa42ab3p-55, 0x1.111d262b1f678p-2}, - {-0x1.50b7bbc4768b1p-55, 0x1.f19f97b215f1cp-3}, - {-0x1.035e2873ca432p-55, 0x1.c0b826a7e4f64p-3}, - {-0x1.49b466e7fe36p-55, 0x1.8f8b83c69a60cp-3}, - {-0x1.ab3802218894fp-55, 0x1.5e214448b3fc8p-3}, - {-0x1.dd9ffeaecbdc4p-55, 0x1.2c8106e8e613cp-3}, - {-0x1.cbb1f71aca352p-56, 0x1.f564e56a9731p-4}, - {-0x1.e2718d26ed688p-60, 0x1.917a6bc29b42cp-4}, - {0x1.ccbeeeae8129ap-56, 0x1.2d52092ce19f4p-4}, - {-0x1.912bd0d569a9p-61, 0x1.91f65f10dd814p-5}, - {0x1.f938a73db97fbp-58, 0x1.92155f7a3667cp-6}, - {0, 0}, - {-0x1.f938a73db97fbp-58, -0x1.92155f7a3667cp-6}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {-0x1.ccbeeeae8129ap-56, -0x1.2d52092ce19f4p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {0x1.cbb1f71aca352p-56, -0x1.f564e56a9731p-4}, - {0x1.dd9ffeaecbdc4p-55, -0x1.2c8106e8e613cp-3}, - {0x1.ab3802218894fp-55, -0x1.5e214448b3fc8p-3}, - {0x1.49b466e7fe36p-55, -0x1.8f8b83c69a60cp-3}, - {0x1.035e2873ca432p-55, -0x1.c0b826a7e4f64p-3}, - {0x1.50b7bbc4768b1p-55, -0x1.f19f97b215f1cp-3}, - {0x1.3ed9efaa42ab3p-55, -0x1.111d262b1f678p-2}, - {-0x1.a8b5c974ee7b5p-54, -0x1.294062ed59f04p-2}, - {-0x1.4325f12be8946p-54, -0x1.4135c941766p-2}, - {-0x1.fc2047e54e614p-55, -0x1.58f9a75ab1fdcp-2}, - {0x1.512c678219317p-54, -0x1.7088530fa45ap-2}, - {0x1.2e59dba7ab4c2p-54, -0x1.87de2a6aea964p-2}, - {0x1.d24afdade848bp-54, -0x1.9ef7943a8ed8cp-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {0x1.e97af1a63c807p-54, -0x1.cc66e9931c46p-2}, - {0x1.c3e4edc5872f8p-55, -0x1.e2b5d3806f63cp-2}, - {-0x1.fb44f80f92225p-54, -0x1.f8ba4dbf89ab8p-2}, - {-0x1.9697faf2e2fe5p-53, -0x1.073879922ffecp-1}, - {0x1.7bc8eda6af93cp-53, -0x1.11eb3541b4b24p-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.5769d0fbcddc3p-53, -0x1.26d054cdd12ep-1}, - {-0x1.c20673b2116b2p-54, -0x1.30ff7fce17034p-1}, - {-0x1.3c7c4bc72a92cp-53, -0x1.3affa292050b8p-1}, - {0x1.e7f895d302395p-53, -0x1.44cf325091dd8p-1}, - {-0x1.13c293edceb32p-53, -0x1.4e6cabbe3e5e8p-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {0x1.24a366a5fe547p-53, -0x1.610b7551d2cep-1}, - {-0x1.21165f626cdd5p-54, -0x1.6a09e667f3bccp-1}, - {0x1.bcac43c389ba9p-53, -0x1.72d0837efff98p-1}, - {0x1.21ea6f59be15bp-53, -0x1.7b5df226aafbp-1}, - {-0x1.d217be0e2b971p-53, -0x1.83b0e0bff976cp-1}, - {-0x1.69d0f6897664ap-54, -0x1.8bc806b15174p-1}, - {0x1.615f32b6f907ap-54, -0x1.93a22499263fcp-1}, - {-0x1.6788ebcc76dc6p-54, -0x1.9b3e047f3874p-1}, - {-0x1.ddae89fd441d1p-53, -0x1.a29a7a046278p-1}, - {0x1.f98273c5d2495p-54, -0x1.a9b66290ea1a4p-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {-0x1.90e58336c64a8p-53, -0x1.b728345196e3cp-1}, - {-0x1.9f6963354e3fep-53, -0x1.bd7c0ac6f9528p-1}, - {-0x1.a47d3a2a0dcbep-54, -0x1.c38b2f180bdbp-1}, - {-0x1.ed0489e16b9ap-54, -0x1.c954b213411f4p-1}, - {0x1.0f3db5dad5ac5p-53, -0x1.ced7af43cc774p-1}, - {-0x1.ac42b5a8b6943p-53, -0x1.d4134d14dc938p-1}, - {0x1.d75033dfb9ca8p-53, -0x1.d906bcf328d48p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {-0x1.7f59c49f6cd6dp-54, -0x1.e212104f686e4p-1}, - {-0x1.ee94a90d7b88bp-53, -0x1.e6288ec48e11p-1}, - {0x1.a27d3874701f9p-53, -0x1.e9f4156c62ddcp-1}, - {0x1.85f4e1b8298dp-54, -0x1.ed740e7684964p-1}, - {0x1.ab4e148e52d9ep-54, -0x1.f0a7efb9230d8p-1}, - {-0x1.8a11412b82346p-54, -0x1.f38f3ac64e588p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.3564acef1ff97p-53, -0x1.f8764fa714ba8p-1}, - {0x1.5e82a3284d5c8p-53, -0x1.fa7557f08a518p-1}, - {0x1.709bccb89a989p-54, -0x1.fc26470e19fd4p-1}, - {-0x1.9e082721dfb8ep-53, -0x1.fd88da3d12524p-1}, - {0x1.eade132f3981dp-53, -0x1.fe9cdad01883cp-1}, - {-0x1.e3a843d1db55fp-53, -0x1.ff621e3796d7cp-1}, - {-0x1.765595d548d9ap-54, -0x1.ffd886084cd0cp-1}, - {0, -1}, - {-0x1.765595d548d9ap-54, -0x1.ffd886084cd0cp-1}, - {-0x1.e3a843d1db55fp-53, -0x1.ff621e3796d7cp-1}, - {0x1.eade132f3981dp-53, -0x1.fe9cdad01883cp-1}, - {-0x1.9e082721dfb8ep-53, -0x1.fd88da3d12524p-1}, - {0x1.709bccb89a989p-54, -0x1.fc26470e19fd4p-1}, - {0x1.5e82a3284d5c8p-53, -0x1.fa7557f08a518p-1}, - {-0x1.3564acef1ff97p-53, -0x1.f8764fa714ba8p-1}, - {-0x1.562172a361fd3p-56, -0x1.f6297cff75cbp-1}, - {-0x1.8a11412b82346p-54, -0x1.f38f3ac64e588p-1}, - {0x1.ab4e148e52d9ep-54, -0x1.f0a7efb9230d8p-1}, - {0x1.85f4e1b8298dp-54, -0x1.ed740e7684964p-1}, - {0x1.a27d3874701f9p-53, -0x1.e9f4156c62ddcp-1}, - {-0x1.ee94a90d7b88bp-53, -0x1.e6288ec48e11p-1}, - {-0x1.7f59c49f6cd6dp-54, -0x1.e212104f686e4p-1}, - {-0x1.83c37c6107db3p-55, -0x1.ddb13b6ccc23cp-1}, - {0x1.d75033dfb9ca8p-53, -0x1.d906bcf328d48p-1}, - {-0x1.ac42b5a8b6943p-53, -0x1.d4134d14dc938p-1}, - {0x1.0f3db5dad5ac5p-53, -0x1.ced7af43cc774p-1}, - {-0x1.ed0489e16b9ap-54, -0x1.c954b213411f4p-1}, - {-0x1.a47d3a2a0dcbep-54, -0x1.c38b2f180bdbp-1}, - {-0x1.9f6963354e3fep-53, -0x1.bd7c0ac6f9528p-1}, - {-0x1.90e58336c64a8p-53, -0x1.b728345196e3cp-1}, - {0x1.926da300ffccep-55, -0x1.b090a581502p-1}, - {0x1.f98273c5d2495p-54, -0x1.a9b66290ea1a4p-1}, - {-0x1.ddae89fd441d1p-53, -0x1.a29a7a046278p-1}, - {-0x1.6788ebcc76dc6p-54, -0x1.9b3e047f3874p-1}, - {0x1.615f32b6f907ap-54, -0x1.93a22499263fcp-1}, - {-0x1.69d0f6897664ap-54, -0x1.8bc806b15174p-1}, - {-0x1.d217be0e2b971p-53, -0x1.83b0e0bff976cp-1}, - {0x1.21ea6f59be15bp-53, -0x1.7b5df226aafbp-1}, - {0x1.bcac43c389ba9p-53, -0x1.72d0837efff98p-1}, - {-0x1.21165f626cdd5p-54, -0x1.6a09e667f3bccp-1}, - {0x1.24a366a5fe547p-53, -0x1.610b7551d2cep-1}, - {0x1.75720992bfbb2p-55, -0x1.57d69348cecap-1}, - {-0x1.13c293edceb32p-53, -0x1.4e6cabbe3e5e8p-1}, - {0x1.e7f895d302395p-53, -0x1.44cf325091dd8p-1}, - {-0x1.3c7c4bc72a92cp-53, -0x1.3affa292050b8p-1}, - {-0x1.c20673b2116b2p-54, -0x1.30ff7fce17034p-1}, - {0x1.5769d0fbcddc3p-53, -0x1.26d054cdd12ep-1}, - {-0x1.b25dd267f66p-55, -0x1.1c73b39ae68c8p-1}, - {0x1.7bc8eda6af93cp-53, -0x1.11eb3541b4b24p-1}, - {-0x1.9697faf2e2fe5p-53, -0x1.073879922ffecp-1}, - {-0x1.fb44f80f92225p-54, -0x1.f8ba4dbf89ab8p-2}, - {0x1.c3e4edc5872f8p-55, -0x1.e2b5d3806f63cp-2}, - {0x1.e97af1a63c807p-54, -0x1.cc66e9931c46p-2}, - {-0x1.5b362cb974183p-57, -0x1.b5d1009e15ccp-2}, - {0x1.d24afdade848bp-54, -0x1.9ef7943a8ed8cp-2}, - {0x1.2e59dba7ab4c2p-54, -0x1.87de2a6aea964p-2}, - {0x1.512c678219317p-54, -0x1.7088530fa45ap-2}, - {-0x1.fc2047e54e614p-55, -0x1.58f9a75ab1fdcp-2}, - {-0x1.4325f12be8946p-54, -0x1.4135c941766p-2}, - {-0x1.a8b5c974ee7b5p-54, -0x1.294062ed59f04p-2}, - {0x1.3ed9efaa42ab3p-55, -0x1.111d262b1f678p-2}, - {0x1.50b7bbc4768b1p-55, -0x1.f19f97b215f1cp-3}, - {0x1.035e2873ca432p-55, -0x1.c0b826a7e4f64p-3}, - {0x1.49b466e7fe36p-55, -0x1.8f8b83c69a60cp-3}, - {0x1.ab3802218894fp-55, -0x1.5e214448b3fc8p-3}, - {0x1.dd9ffeaecbdc4p-55, -0x1.2c8106e8e613cp-3}, - {0x1.cbb1f71aca352p-56, -0x1.f564e56a9731p-4}, - {0x1.e2718d26ed688p-60, -0x1.917a6bc29b42cp-4}, - {-0x1.ccbeeeae8129ap-56, -0x1.2d52092ce19f4p-4}, - {0x1.912bd0d569a9p-61, -0x1.91f65f10dd814p-5}, - {-0x1.f938a73db97fbp-58, -0x1.92155f7a3667cp-6}, -#endif // !LIBC_MATH_HAS_SMALL_TABLES -}; - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_NOFMA_H diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp index a614427b..1b6310f 100644 --- a/libc/src/math/generic/sin.cpp +++ b/libc/src/math/generic/sin.cpp @@ -18,13 +18,13 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" +#include "src/__support/math/range_reduction_double_common.h" +#include "src/__support/math/sincos_eval.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -33,6 +33,7 @@ using DoubleDouble = fputil::DoubleDouble; using Float128 = typename fputil::DyadicFloat<128>; LLVM_LIBC_FUNCTION(double, sin, (double x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -95,7 +96,8 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { DoubleDouble sin_y, cos_y; - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); + [[maybe_unused]] double err = + math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y); // Look up sin(k * pi/128) and cos(k * pi/128) #ifdef LIBC_MATH_HAS_SMALL_TABLES @@ -149,7 +151,7 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { else u_f128 = range_reduction_large.accurate(); - generic::sincos_eval(u_f128, sin_u, cos_u); + math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u); auto get_sin_k = [](unsigned kk) -> Float128 { unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); diff --git a/libc/src/math/generic/sincos.cpp b/libc/src/math/generic/sincos.cpp index 08c8a82..38661de 100644 --- a/libc/src/math/generic/sincos.cpp +++ b/libc/src/math/generic/sincos.cpp @@ -19,13 +19,13 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" -#include "src/math/generic/sincos_eval.h" +#include "src/__support/math/range_reduction_double_common.h" +#include "src/__support/math/sincos_eval.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -34,6 +34,7 @@ using DoubleDouble = fputil::DoubleDouble; using Float128 = typename fputil::DyadicFloat<128>; LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -106,7 +107,8 @@ LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { DoubleDouble sin_y, cos_y; - [[maybe_unused]] double err = generic::sincos_eval(y, sin_y, cos_y); + [[maybe_unused]] double err = + math::sincos_eval_internal::sincos_eval(y, sin_y, cos_y); // Look up sin(k * pi/128) and cos(k * pi/128) #ifdef LIBC_MATH_HAS_SMALL_TABLES @@ -179,7 +181,7 @@ LLVM_LIBC_FUNCTION(void, sincos, (double x, double *sin_x, double *cos_x)) { else u_f128 = range_reduction_large.accurate(); - generic::sincos_eval(u_f128, sin_u, cos_u); + math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u); auto get_sin_k = [](unsigned kk) -> Float128 { unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); diff --git a/libc/src/math/generic/sincos_eval.h b/libc/src/math/generic/sincos_eval.h deleted file mode 100644 index 41a4c75..0000000 --- a/libc/src/math/generic/sincos_eval.h +++ /dev/null @@ -1,138 +0,0 @@ -//===-- Compute sin + cos for small angles ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOS_EVAL_H -#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOS_EVAL_H - -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -namespace generic { - -using fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - -LIBC_INLINE double sincos_eval(const DoubleDouble &u, DoubleDouble &sin_u, - DoubleDouble &cos_u) { - // Evaluate sin(y) = sin(x - k * (pi/128)) - // We use the degree-7 Taylor approximation: - // sin(y) ~ y - y^3/3! + y^5/5! - y^7/7! - // Then the error is bounded by: - // |sin(y) - (y - y^3/3! + y^5/5! - y^7/7!)| < |y|^9/9! < 2^-54/9! < 2^-72. - // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms - // < ulp(u_hi^3) gives us: - // y - y^3/3! + y^5/5! - y^7/7! = ... - // ~ u_hi + u_hi^3 * (-1/6 + u_hi^2 * (1/120 - u_hi^2 * 1/5040)) + - // + u_lo (1 + u_hi^2 * (-1/2 + u_hi^2 / 24)) - double u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58. - // p1 ~ 1/120 + u_hi^2 / 5040. - double p1 = fputil::multiply_add(u_hi_sq, -0x1.a01a01a01a01ap-13, - 0x1.1111111111111p-7); - // q1 ~ -1/2 + u_hi^2 / 24. - double q1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-5, -0x1.0p-1); - double u_hi_3 = u_hi_sq * u.hi; - // p2 ~ -1/6 + u_hi^2 (1/120 - u_hi^2 * 1/5040) - double p2 = fputil::multiply_add(u_hi_sq, p1, -0x1.5555555555555p-3); - // q2 ~ 1 + u_hi^2 (-1/2 + u_hi^2 / 24) - double q2 = fputil::multiply_add(u_hi_sq, q1, 1.0); - double sin_lo = fputil::multiply_add(u_hi_3, p2, u.lo * q2); - // Overall, |sin(y) - (u_hi + sin_lo)| < 2*ulp(u_hi^3) < 2^-69. - - // Evaluate cos(y) = cos(x - k * (pi/128)) - // We use the degree-8 Taylor approximation: - // cos(y) ~ 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! - // Then the error is bounded by: - // |cos(y) - (...)| < |y|^10/10! < 2^-81 - // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms - // < ulp(u_hi^3) gives us: - // 1 - y^2/2 + y^4/4! - y^6/6! + y^8/8! = ... - // ~ 1 - u_hi^2/2 + u_hi^4(1/24 + u_hi^2 (-1/720 + u_hi^2/40320)) + - // + u_hi u_lo (-1 + u_hi^2/6) - // We compute 1 - u_hi^2 accurately: - // v_hi + v_lo ~ 1 - u_hi^2/2 - // with error <= 2^-105. - double u_hi_neg_half = (-0.5) * u.hi; - DoubleDouble v; - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - v.hi = fputil::multiply_add(u.hi, u_hi_neg_half, 1.0); - v.lo = 1.0 - v.hi; // Exact - v.lo = fputil::multiply_add(u.hi, u_hi_neg_half, v.lo); -#else - DoubleDouble u_hi_sq_neg_half = fputil::exact_mult(u.hi, u_hi_neg_half); - v = fputil::exact_add(1.0, u_hi_sq_neg_half.hi); - v.lo += u_hi_sq_neg_half.lo; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - // r1 ~ -1/720 + u_hi^2 / 40320 - double r1 = fputil::multiply_add(u_hi_sq, 0x1.a01a01a01a01ap-16, - -0x1.6c16c16c16c17p-10); - // s1 ~ -1 + u_hi^2 / 6 - double s1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-3, -1.0); - double u_hi_4 = u_hi_sq * u_hi_sq; - double u_hi_u_lo = u.hi * u.lo; - // r2 ~ 1/24 + u_hi^2 (-1/720 + u_hi^2 / 40320) - double r2 = fputil::multiply_add(u_hi_sq, r1, 0x1.5555555555555p-5); - // s2 ~ v_lo + u_hi * u_lo * (-1 + u_hi^2 / 6) - double s2 = fputil::multiply_add(u_hi_u_lo, s1, v.lo); - double cos_lo = fputil::multiply_add(u_hi_4, r2, s2); - // Overall, |cos(y) - (v_hi + cos_lo)| < 2*ulp(u_hi^4) < 2^-75. - - sin_u = fputil::exact_add(u.hi, sin_lo); - cos_u = fputil::exact_add(v.hi, cos_lo); - - return fputil::multiply_add(fputil::FPBits<double>(u_hi_3).abs().get_val(), - 0x1.0p-51, 0x1.0p-105); -} - -LIBC_INLINE void sincos_eval(const Float128 &u, Float128 &sin_u, - Float128 &cos_u) { - Float128 u_sq = fputil::quick_mul(u, u); - - // sin(u) ~ x - x^3/3! + x^5/5! - x^7/7! + x^9/9! - x^11/11! + x^13/13! - constexpr Float128 SIN_COEFFS[] = { - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1 - {Sign::NEG, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // -1/3! - {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/5! - {Sign::NEG, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // -1/7! - {Sign::POS, -146, 0xb8ef1d2a'b6399c7d'560e4472'800b8ef2_u128}, // 1/9! - {Sign::NEG, -153, 0xd7322b3f'aa271c7f'3a3f25c1'bee38f10_u128}, // -1/11! - {Sign::POS, -160, 0xb092309d'43684be5'1c198e91'd7b4269e_u128}, // 1/13! - }; - - // cos(u) ~ 1 - x^2/2 + x^4/4! - x^6/6! + x^8/8! - x^10/10! + x^12/12! - constexpr Float128 COS_COEFFS[] = { - {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 - {Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128}, // 1/2 - {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/4! - {Sign::NEG, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/6! - {Sign::POS, -143, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/8! - {Sign::NEG, -149, 0x93f27dbb'c4fae397'780b69f5'333c725b_u128}, // 1/10! - {Sign::POS, -156, 0x8f76c77f'c6c4bdaa'26d4c3d6'7f425f60_u128}, // 1/12! - }; - - sin_u = fputil::quick_mul(u, fputil::polyeval(u_sq, SIN_COEFFS[0], - SIN_COEFFS[1], SIN_COEFFS[2], - SIN_COEFFS[3], SIN_COEFFS[4], - SIN_COEFFS[5], SIN_COEFFS[6])); - cos_u = fputil::polyeval(u_sq, COS_COEFFS[0], COS_COEFFS[1], COS_COEFFS[2], - COS_COEFFS[3], COS_COEFFS[4], COS_COEFFS[5], - COS_COEFFS[6]); -} - -} // namespace generic - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_EVAL_H diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp index 89b812c..7ea40c9 100644 --- a/libc/src/math/generic/tan.cpp +++ b/libc/src/math/generic/tan.cpp @@ -20,12 +20,12 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/math/generic/range_reduction_double_common.h" +#include "src/__support/math/range_reduction_double_common.h" #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#include "range_reduction_double_fma.h" +#include "src/__support/math/range_reduction_double_fma.h" #else -#include "range_reduction_double_nofma.h" +#include "src/__support/math/range_reduction_double_nofma.h" #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE namespace LIBC_NAMESPACE_DECL { @@ -121,6 +121,7 @@ LIBC_INLINE double tan_eval(const DoubleDouble &u, DoubleDouble &result) { } // anonymous namespace LLVM_LIBC_FUNCTION(double, tan, (double x)) { + using namespace math::range_reduction_double_internal; using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); |