diff options
Diffstat (limited to 'libc/src/math/generic/range_reduction_double_common.h')
-rw-r--r-- | libc/src/math/generic/range_reduction_double_common.h | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h new file mode 100644 index 0000000..0e9edf8 --- /dev/null +++ b/libc/src/math/generic/range_reduction_double_common.h @@ -0,0 +1,162 @@ +//===-- Range reduction for double precision sin/cos/tan -*- C++ --------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H +#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/common.h" +#include "src/__support/integer_literals.h" + +namespace LIBC_NAMESPACE { + +namespace generic { + +using LIBC_NAMESPACE::fputil::DoubleDouble; +using Float128 = LIBC_NAMESPACE::fputil::DyadicFloat<128>; + +LIBC_INLINE constexpr Float128 PI_OVER_128_F128 = { + Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; + +// Note: The look-up tables ONE_TWENTY_EIGHT_OVER_PI is selected to be either +// from fma:: or nofma:: namespace. + +// For large range |x| >= 2^32, we use the exponent of x to find 3 double-chunks +// of 128/pi c_hi, c_mid, c_lo such that: +// 1) ulp(round(x * c_hi, D, RN)) >= 256, +// 2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then +// min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53. +// 3) ulp(round(x * c_lo, D, RN)) <= 2^-7x. +// This will allow us to do quick computations as: +// (x * 256/pi) ~ x * (c_hi + c_mid + c_lo) (mod 256) +// ~ ph_lo + pm_hi + pm_lo + (x * c_lo) +// Then, +// round(x * 128/pi) = round(ph_lo + pm_hi) (mod 256) +// And the high part of fractional part of (x * 128/pi) can simply be: +// {x * 128/pi}_hi = {ph_lo + pm_hi}. +// To prevent overflow when x is very large, we simply scale up +// (c_hi, c_mid, c_lo) by a fixed power of 2 (based on the index) and scale down +// x by the same amount. + +template <bool NO_FMA> struct LargeRangeReduction { + // Calculate the high part of the range reduction exactly. + LIBC_INLINE unsigned compute_high_part(double x) { + using FPBits = typename fputil::FPBits<double>; + FPBits xbits(x); + + // TODO: The extra exponent gap of 62 below can be reduced a bit for non-FMA + // with a more careful analysis, which in turn will reduce the error bound + // for non-FMA + int x_e_m62 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 62); + idx = static_cast<unsigned>((x_e_m62 >> 4) + 3); + // Scale x down by 2^(-(16 * (idx - 3)) + xbits.set_biased_exponent((x_e_m62 & 15) + FPBits::EXP_BIAS + 62); + // 2^62 <= |x_reduced| < 2^(62 + 16) = 2^78 + x_reduced = xbits.get_val(); + // x * c_hi = ph.hi + ph.lo exactly. + DoubleDouble ph = + fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]); + // x * c_mid = pm.hi + pm.lo exactly. + DoubleDouble pm = + fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]); + // Extract integral parts and fractional parts of (ph.lo + pm.hi). + double kh = fputil::nearest_integer(ph.lo); + double ph_lo_frac = ph.lo - kh; // Exact + double km = fputil::nearest_integer(pm.hi + ph_lo_frac); + double pm_hi_frac = pm.hi - km; // Exact + // x * 128/pi mod 1 ~ y_hi + y_lo + y_hi = ph_lo_frac + pm_hi_frac; // Exact + pm_lo = pm.lo; + return static_cast<unsigned>(static_cast<int64_t>(kh) + + static_cast<int64_t>(km)); + } + + LIBC_INLINE DoubleDouble fast() const { + // y_lo = x * c_lo + pm.lo + double y_lo = fputil::multiply_add(x_reduced, + ONE_TWENTY_EIGHT_OVER_PI[idx][2], pm_lo); + DoubleDouble y = fputil::exact_add(y_hi, y_lo); + + // Digits of pi/128, generated by Sollya with: + // > a = round(pi/128, D, RN); + // > b = round(pi/128 - a, D, RN); + constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60, + 0x1.921fb54442d18p-6}; + + // Error bound: with {a} denote the fractional part of a, i.e.: + // {a} = a - round(a) + // Then, + // | {x * 128/pi} - (y_hi + y_lo) | < 2 * ulp(x_reduced * + // * ONE_TWENTY_EIGHT_OVER_PI[idx][2]) + // For FMA: + // | {x * 128/pi} - (y_hi + y_lo) | <= 2 * 2^77 * 2^-103 * 2^-52 + // = 2^-77. + // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-77. + // = 2^-82. + // For non-FMA: + // | {x * 128/pi} - (y_hi + y_lo) | <= 2 * 2^77 * 2^-99 * 2^-52 + // = 2^-73. + // | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-73. + // = 2^-78. + return fputil::quick_mult<NO_FMA>(y, PI_OVER_128_DD); + } + + LIBC_INLINE Float128 accurate() const { + // y_lo = x * c_lo + pm.lo + Float128 y_lo_0(x_reduced * ONE_TWENTY_EIGHT_OVER_PI[idx][3]); + Float128 y_lo_1 = fputil::quick_mul( + Float128(x_reduced), Float128(ONE_TWENTY_EIGHT_OVER_PI[idx][2])); + Float128 y_lo_2(pm_lo); + Float128 y_hi_f128(y_hi); + + Float128 y = fputil::quick_add( + y_hi_f128, + fputil::quick_add(y_lo_2, fputil::quick_add(y_lo_1, y_lo_0))); + + return fputil::quick_mul(y, PI_OVER_128_F128); + } + +private: + // Index of x in the look-up table ONE_TWENTY_EIGHT_OVER_PI. + unsigned idx; + // x scaled down by 2^(-16 *(idx - 3))). + double x_reduced; + // High part of (x * 128/pi) mod 1. + double y_hi; + // Low part of x * ONE_TWENTY_EIGHT_OVER_PI[idx][1]. + double pm_lo; +}; + +LIBC_INLINE Float128 range_reduction_small_f128(double x) { + double prod_hi = x * ONE_TWENTY_EIGHT_OVER_PI[3][0]; + double kd = fputil::nearest_integer(prod_hi); + + Float128 mk_f128(-kd); + Float128 x_f128(x); + Float128 p_hi = + fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][0])); + Float128 p_mid = + fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][1])); + Float128 p_lo = + fputil::quick_mul(x_f128, Float128(ONE_TWENTY_EIGHT_OVER_PI[3][2])); + Float128 s_hi = fputil::quick_add(p_hi, mk_f128); + Float128 s_lo = fputil::quick_add(p_mid, p_lo); + Float128 y = fputil::quick_add(s_hi, s_lo); + + return fputil::quick_mul(y, PI_OVER_128_F128); +} + +} // namespace generic + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H |