aboutsummaryrefslogtreecommitdiff
path: root/libc/src/__support/FPUtil/dyadic_float.h
diff options
context:
space:
mode:
authorlntue <35648136+lntue@users.noreply.github.com>2024-06-24 17:57:08 -0400
committerGitHub <noreply@github.com>2024-06-24 17:57:08 -0400
commit16903ace180755b7558234ff2b2e8d89b00dcb88 (patch)
tree1158c0aef94cca9903d1238e0eef7abdf3e4cb91 /libc/src/__support/FPUtil/dyadic_float.h
parent5ae50698a0d6a3022af2e79d405a7eb6c8c790f0 (diff)
downloadllvm-16903ace180755b7558234ff2b2e8d89b00dcb88.zip
llvm-16903ace180755b7558234ff2b2e8d89b00dcb88.tar.gz
llvm-16903ace180755b7558234ff2b2e8d89b00dcb88.tar.bz2
[libc][math] Implement double precision sin correctly rounded to all rounding modes. (#95736)
- Algorithm: - Step 1 - Range reduction: for a double precision input `x`, return `k` and `u` such that - k is an integer - u = x - k * pi / 128, and |u| < pi/256 - Step 2 - Calculate `sin(u)` and `cos(u)` in double-double using Taylor polynomials with errors < 2^-70 with FMA or < 2^-66 w/o FMA. - Step 3 - Calculate `sin(x) = sin(k*pi/128) * cos(u) + cos(k*pi/128) * sin(u)` using look-up table for `sin(k*pi/128)` and `cos(k*pi/128)`. - Step 4 - Use Ziv's rounding test to decide if the result is correctly rounded. - Step 4' - If the Ziv's rounding test failed, redo step 1-3 using 128-bit precision. - Currently, without FMA instructions, the large range reduction only works correctly for the default rounding mode (FE_TONEAREST). - Provide `LIBC_MATH` flag so that users can set `LIBC_MATH = LIBC_MATH_SKIP_ACCURATE_PASS` to build the `sin` function without step 4 and 4'.
Diffstat (limited to 'libc/src/__support/FPUtil/dyadic_float.h')
-rw-r--r--libc/src/__support/FPUtil/dyadic_float.h10
1 files changed, 5 insertions, 5 deletions
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 63cb983..76786a2 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -278,11 +278,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
// don't need to normalize the inputs again in this function. If the inputs are
// not normalized, the results might lose precision significantly.
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
- DyadicFloat<Bits> b) {
+LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
+ const DyadicFloat<Bits> &b) {
DyadicFloat<Bits> result;
result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
- result.exponent = a.exponent + b.exponent + int(Bits);
+ result.exponent = a.exponent + b.exponent + static_cast<int>(Bits);
if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
@@ -309,7 +309,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
// Simple exponentiation implementation for printf. Only handles positive
// exponents, since division isn't implemented.
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
+LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a,
uint32_t power) {
DyadicFloat<Bits> result = 1.0;
DyadicFloat<Bits> cur_power = a;
@@ -325,7 +325,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
}
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
+LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a,
int32_t pow_2) {
DyadicFloat<Bits> result = a;
result.exponent += pow_2;