diff options
Diffstat (limited to 'libc/src/__support')
-rw-r--r-- | libc/src/__support/FPUtil/double_double.h | 52 | ||||
-rw-r--r-- | libc/src/__support/FPUtil/dyadic_float.h | 10 | ||||
-rw-r--r-- | libc/src/__support/macros/optimization.h | 14 |
3 files changed, 60 insertions, 16 deletions
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h index b9490b5..3d16a3c 100644 --- a/libc/src/__support/FPUtil/double_double.h +++ b/libc/src/__support/FPUtil/double_double.h @@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>; // The output of Dekker's FastTwoSum algorithm is correct, i.e.: // r.hi + r.lo = a + b exactly // and |r.lo| < eps(r.lo) -// if ssumption: |a| >= |b|, or a = 0. +// Assumption: |a| >= |b|, or a = 0. +template <bool FAST2SUM = true> LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) { DoubleDouble r{0.0, 0.0}; - r.hi = a + b; - double t = r.hi - a; - r.lo = b - t; + if constexpr (FAST2SUM) { + r.hi = a + b; + double t = r.hi - a; + r.lo = b - t; + } else { + r.hi = a + b; + double t1 = r.hi - a; + double t2 = r.hi - t1; + double t3 = b - t1; + double t4 = a - t2; + r.lo = t3 + t4; + } return r; } @@ -40,15 +50,20 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, // Assumption: |a.hi| >= |b| LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) { - DoubleDouble r = exact_add(a.hi, b); + DoubleDouble r = exact_add<false>(a.hi, b); return exact_add(r.hi, r.lo + a.lo); } -// Velkamp's Splitting for double precision. -LIBC_INLINE constexpr DoubleDouble split(double a) { +// Veltkamp's Splitting for double precision. +// Note: This is proved to be correct for all rounding modes: +// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed +// Roundings," https://inria.hal.science/hal-04480440. +// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1. +template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) { DoubleDouble r{0.0, 0.0}; - // Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1. - constexpr double C = 0x1.0p27 + 1.0; + // CN = 2^N. + constexpr double CN = static_cast<double>(1 << N); + constexpr double C = CN + 1.0; double t1 = C * a; double t2 = a - t1; r.hi = t1 + t2; @@ -56,6 +71,14 @@ LIBC_INLINE constexpr DoubleDouble split(double a) { return r; } +// Note: When FMA instruction is not available, the `exact_mult` function is +// only correct for round-to-nearest mode. See: +// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed +// Roundings," https://inria.hal.science/hal-04480440. +// Using Theorem 1 in the paper above, without FMA instruction, if we restrict +// the generated constants to precision <= 51, and splitting it by 2^28 + 1, +// then a * b = r.hi + r.lo is exact for all rounding modes. +template <bool NO_FMA_ALL_ROUNDINGS = false> LIBC_INLINE DoubleDouble exact_mult(double a, double b) { DoubleDouble r{0.0, 0.0}; @@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) { #else // Dekker's Product. DoubleDouble as = split(a); - DoubleDouble bs = split(b); + DoubleDouble bs; + + if constexpr (NO_FMA_ALL_ROUNDINGS) + bs = split<28>(b); + else + bs = split(b); + r.hi = a * b; double t1 = as.hi * bs.hi - r.hi; double t2 = as.hi * bs.lo + t1; @@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) { return r; } +template <bool NO_FMA_ALL_ROUNDINGS = false> LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a, const DoubleDouble &b) { - DoubleDouble r = exact_mult(a.hi, b.hi); + DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi); double t1 = multiply_add(a.hi, b.lo, r.lo); double t2 = multiply_add(a.lo, b.hi, t1); r.lo = t2; diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 63cb983..76786a2 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -278,11 +278,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a, // don't need to normalize the inputs again in this function. If the inputs are // not normalized, the results might lose precision significantly. template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a, - DyadicFloat<Bits> b) { +LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a, + const DyadicFloat<Bits> &b) { DyadicFloat<Bits> result; result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS; - result.exponent = a.exponent + b.exponent + int(Bits); + result.exponent = a.exponent + b.exponent + static_cast<int>(Bits); if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) { result.mantissa = a.mantissa.quick_mul_hi(b.mantissa); @@ -309,7 +309,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b, // Simple exponentiation implementation for printf. Only handles positive // exponents, since division isn't implemented. template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, +LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a, uint32_t power) { DyadicFloat<Bits> result = 1.0; DyadicFloat<Bits> cur_power = a; @@ -325,7 +325,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a, } template <size_t Bits> -LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a, +LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a, int32_t pow_2) { DyadicFloat<Bits> result = a; result.exponent += pow_2; diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h index 59886ca..05a47791 100644 --- a/libc/src/__support/macros/optimization.h +++ b/libc/src/__support/macros/optimization.h @@ -33,4 +33,18 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) { #error "Unhandled compiler" #endif +// Defining optimization options for math functions. +// TODO: Exporting this to public generated headers? +#define LIBC_MATH_SKIP_ACCURATE_PASS 0x01 +#define LIBC_MATH_SMALL_TABLES 0x02 +#define LIBC_MATH_NO_ERRNO 0x04 +#define LIBC_MATH_NO_EXCEPT 0x08 +#define LIBC_MATH_FAST \ + (LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \ + LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT) + +#ifndef LIBC_MATH +#define LIBC_MATH 0 +#endif // LIBC_MATH + #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H |