diff options
Diffstat (limited to 'libc/src/__support/FPUtil/double_double.h')
-rw-r--r-- | libc/src/__support/FPUtil/double_double.h | 52 |
1 files changed, 41 insertions, 11 deletions
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h index b9490b5..3d16a3c 100644 --- a/libc/src/__support/FPUtil/double_double.h +++ b/libc/src/__support/FPUtil/double_double.h @@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>; // The output of Dekker's FastTwoSum algorithm is correct, i.e.: // r.hi + r.lo = a + b exactly // and |r.lo| < eps(r.lo) -// if ssumption: |a| >= |b|, or a = 0. +// Assumption: |a| >= |b|, or a = 0. +template <bool FAST2SUM = true> LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) { DoubleDouble r{0.0, 0.0}; - r.hi = a + b; - double t = r.hi - a; - r.lo = b - t; + if constexpr (FAST2SUM) { + r.hi = a + b; + double t = r.hi - a; + r.lo = b - t; + } else { + r.hi = a + b; + double t1 = r.hi - a; + double t2 = r.hi - t1; + double t3 = b - t1; + double t4 = a - t2; + r.lo = t3 + t4; + } return r; } @@ -40,15 +50,20 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, // Assumption: |a.hi| >= |b| LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) { - DoubleDouble r = exact_add(a.hi, b); + DoubleDouble r = exact_add<false>(a.hi, b); return exact_add(r.hi, r.lo + a.lo); } -// Velkamp's Splitting for double precision. -LIBC_INLINE constexpr DoubleDouble split(double a) { +// Veltkamp's Splitting for double precision. +// Note: This is proved to be correct for all rounding modes: +// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed +// Roundings," https://inria.hal.science/hal-04480440. +// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1. +template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) { DoubleDouble r{0.0, 0.0}; - // Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1. - constexpr double C = 0x1.0p27 + 1.0; + // CN = 2^N. + constexpr double CN = static_cast<double>(1 << N); + constexpr double C = CN + 1.0; double t1 = C * a; double t2 = a - t1; r.hi = t1 + t2; @@ -56,6 +71,14 @@ LIBC_INLINE constexpr DoubleDouble split(double a) { return r; } +// Note: When FMA instruction is not available, the `exact_mult` function is +// only correct for round-to-nearest mode. See: +// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed +// Roundings," https://inria.hal.science/hal-04480440. +// Using Theorem 1 in the paper above, without FMA instruction, if we restrict +// the generated constants to precision <= 51, and splitting it by 2^28 + 1, +// then a * b = r.hi + r.lo is exact for all rounding modes. +template <bool NO_FMA_ALL_ROUNDINGS = false> LIBC_INLINE DoubleDouble exact_mult(double a, double b) { DoubleDouble r{0.0, 0.0}; @@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) { #else // Dekker's Product. DoubleDouble as = split(a); - DoubleDouble bs = split(b); + DoubleDouble bs; + + if constexpr (NO_FMA_ALL_ROUNDINGS) + bs = split<28>(b); + else + bs = split(b); + r.hi = a * b; double t1 = as.hi * bs.hi - r.hi; double t2 = as.hi * bs.lo + t1; @@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) { return r; } +template <bool NO_FMA_ALL_ROUNDINGS = false> LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a, const DoubleDouble &b) { - DoubleDouble r = exact_mult(a.hi, b.hi); + DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi); double t1 = multiply_add(a.hi, b.lo, r.lo); double t2 = multiply_add(a.lo, b.hi, t1); r.lo = t2; |