diff options
author | lntue <35648136+lntue@users.noreply.github.com> | 2024-06-24 17:57:08 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-24 17:57:08 -0400 |
commit | 16903ace180755b7558234ff2b2e8d89b00dcb88 (patch) | |
tree | 1158c0aef94cca9903d1238e0eef7abdf3e4cb91 /libc/src/__support/FPUtil/dyadic_float.h | |
parent | 5ae50698a0d6a3022af2e79d405a7eb6c8c790f0 (diff) | |
download | llvm-16903ace180755b7558234ff2b2e8d89b00dcb88.zip llvm-16903ace180755b7558234ff2b2e8d89b00dcb88.tar.gz llvm-16903ace180755b7558234ff2b2e8d89b00dcb88.tar.bz2 |
[libc][math] Implement double precision sin correctly rounded to all rounding modes. (#95736)
- Algorithm:
- Step 1 - Range reduction: for a double precision input `x`, return `k`
and `u` such that
- k is an integer
- u = x - k * pi / 128, and |u| < pi/256
- Step 2 - Calculate `sin(u)` and `cos(u)` in double-double using Taylor
polynomials with errors < 2^-70 with FMA or < 2^-66 w/o FMA.
- Step 3 - Calculate `sin(x) = sin(k*pi/128) * cos(u) + cos(k*pi/128) *
sin(u)` using look-up table for `sin(k*pi/128)` and `cos(k*pi/128)`.
- Step 4 - Use Ziv's rounding test to decide if the result is correctly
rounded.
- Step 4' - If the Ziv's rounding test failed, redo step 1-3 using
128-bit precision.
- Currently, without FMA instructions, the large range reduction only
works correctly for the default rounding mode (FE_TONEAREST).
- Provide `LIBC_MATH` flag so that users can set `LIBC_MATH =
LIBC_MATH_SKIP_ACCURATE_PASS` to build the `sin` function without step 4
and 4'.
Diffstat (limited to 'libc/src/__support/FPUtil/dyadic_float.h')
-rw-r--r-- | libc/src/__support/FPUtil/dyadic_float.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 63cb983..76786a2 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -278,11 +278,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a, // don't need to normalize the inputs again in this function. If the inputs are // not normalized, the results might lose precision significantly. template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a, - DyadicFloat<Bits> b) { +LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a, + const DyadicFloat<Bits> &b) { DyadicFloat<Bits> result; result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS; - result.exponent = a.exponent + b.exponent + int(Bits); + result.exponent = a.exponent + b.exponent + static_cast<int>(Bits); if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) { result.mantissa = a.mantissa.quick_mul_hi(b.mantissa); @@ -309,7 +309,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b, // Simple exponentiation implementation for printf. Only handles positive // exponents, since division isn't implemented. template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, +LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a, uint32_t power) { DyadicFloat<Bits> result = 1.0; DyadicFloat<Bits> cur_power = a; @@ -325,7 +325,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, } template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a, +LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a, int32_t pow_2) { DyadicFloat<Bits> result = a; result.exponent += pow_2; |