aboutsummaryrefslogtreecommitdiff
path: root/libc/src/__support/FPUtil/double_double.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support/FPUtil/double_double.h')
-rw-r--r--libc/src/__support/FPUtil/double_double.h52
1 files changed, 41 insertions, 11 deletions
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index b9490b5..3d16a3c 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
// r.hi + r.lo = a + b exactly
// and |r.lo| < eps(r.lo)
-// if ssumption: |a| >= |b|, or a = 0.
+// Assumption: |a| >= |b|, or a = 0.
+template <bool FAST2SUM = true>
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
DoubleDouble r{0.0, 0.0};
- r.hi = a + b;
- double t = r.hi - a;
- r.lo = b - t;
+ if constexpr (FAST2SUM) {
+ r.hi = a + b;
+ double t = r.hi - a;
+ r.lo = b - t;
+ } else {
+ r.hi = a + b;
+ double t1 = r.hi - a;
+ double t2 = r.hi - t1;
+ double t3 = b - t1;
+ double t4 = a - t2;
+ r.lo = t3 + t4;
+ }
return r;
}
@@ -40,15 +50,20 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
// Assumption: |a.hi| >= |b|
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
- DoubleDouble r = exact_add(a.hi, b);
+ DoubleDouble r = exact_add<false>(a.hi, b);
return exact_add(r.hi, r.lo + a.lo);
}
-// Velkamp's Splitting for double precision.
-LIBC_INLINE constexpr DoubleDouble split(double a) {
+// Veltkamp's Splitting for double precision.
+// Note: This is proved to be correct for all rounding modes:
+// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
+// Roundings," https://inria.hal.science/hal-04480440.
+// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
+template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
DoubleDouble r{0.0, 0.0};
- // Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
- constexpr double C = 0x1.0p27 + 1.0;
+ // CN = 2^N.
+ constexpr double CN = static_cast<double>(1 << N);
+ constexpr double C = CN + 1.0;
double t1 = C * a;
double t2 = a - t1;
r.hi = t1 + t2;
@@ -56,6 +71,14 @@ LIBC_INLINE constexpr DoubleDouble split(double a) {
return r;
}
+// Note: When FMA instruction is not available, the `exact_mult` function is
+// only correct for round-to-nearest mode. See:
+// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
+// Roundings," https://inria.hal.science/hal-04480440.
+// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
+// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
+// then a * b = r.hi + r.lo is exact for all rounding modes.
+template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
DoubleDouble r{0.0, 0.0};
@@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
#else
// Dekker's Product.
DoubleDouble as = split(a);
- DoubleDouble bs = split(b);
+ DoubleDouble bs;
+
+ if constexpr (NO_FMA_ALL_ROUNDINGS)
+ bs = split<28>(b);
+ else
+ bs = split(b);
+
r.hi = a * b;
double t1 = as.hi * bs.hi - r.hi;
double t2 = as.hi * bs.lo + t1;
@@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
return r;
}
+template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
const DoubleDouble &b) {
- DoubleDouble r = exact_mult(a.hi, b.hi);
+ DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi);
double t1 = multiply_add(a.hi, b.lo, r.lo);
double t2 = multiply_add(a.lo, b.hi, t1);
r.lo = t2;