aboutsummaryrefslogtreecommitdiff
path: root/libc/src/__support
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support')
-rw-r--r--libc/src/__support/FPUtil/double_double.h52
-rw-r--r--libc/src/__support/FPUtil/dyadic_float.h10
-rw-r--r--libc/src/__support/macros/optimization.h14
3 files changed, 60 insertions, 16 deletions
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index b9490b5..3d16a3c 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -21,12 +21,22 @@ using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
// r.hi + r.lo = a + b exactly
// and |r.lo| < eps(r.lo)
-// if ssumption: |a| >= |b|, or a = 0.
+// Assumption: |a| >= |b|, or a = 0.
+template <bool FAST2SUM = true>
LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
DoubleDouble r{0.0, 0.0};
- r.hi = a + b;
- double t = r.hi - a;
- r.lo = b - t;
+ if constexpr (FAST2SUM) {
+ r.hi = a + b;
+ double t = r.hi - a;
+ r.lo = b - t;
+ } else {
+ r.hi = a + b;
+ double t1 = r.hi - a;
+ double t2 = r.hi - t1;
+ double t3 = b - t1;
+ double t4 = a - t2;
+ r.lo = t3 + t4;
+ }
return r;
}
@@ -40,15 +50,20 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
// Assumption: |a.hi| >= |b|
LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
- DoubleDouble r = exact_add(a.hi, b);
+ DoubleDouble r = exact_add<false>(a.hi, b);
return exact_add(r.hi, r.lo + a.lo);
}
-// Velkamp's Splitting for double precision.
-LIBC_INLINE constexpr DoubleDouble split(double a) {
+// Veltkamp's Splitting for double precision.
+// Note: This is proved to be correct for all rounding modes:
+// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
+// Roundings," https://inria.hal.science/hal-04480440.
+// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
+template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
DoubleDouble r{0.0, 0.0};
- // Splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
- constexpr double C = 0x1.0p27 + 1.0;
+ // CN = 2^N.
+ constexpr double CN = static_cast<double>(1 << N);
+ constexpr double C = CN + 1.0;
double t1 = C * a;
double t2 = a - t1;
r.hi = t1 + t2;
@@ -56,6 +71,14 @@ LIBC_INLINE constexpr DoubleDouble split(double a) {
return r;
}
+// Note: When FMA instruction is not available, the `exact_mult` function is
+// only correct for round-to-nearest mode. See:
+// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
+// Roundings," https://inria.hal.science/hal-04480440.
+// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
+// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
+// then a * b = r.hi + r.lo is exact for all rounding modes.
+template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
DoubleDouble r{0.0, 0.0};
@@ -65,7 +88,13 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
#else
// Dekker's Product.
DoubleDouble as = split(a);
- DoubleDouble bs = split(b);
+ DoubleDouble bs;
+
+ if constexpr (NO_FMA_ALL_ROUNDINGS)
+ bs = split<28>(b);
+ else
+ bs = split(b);
+
r.hi = a * b;
double t1 = as.hi * bs.hi - r.hi;
double t2 = as.hi * bs.lo + t1;
@@ -82,9 +111,10 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
return r;
}
+template <bool NO_FMA_ALL_ROUNDINGS = false>
LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
const DoubleDouble &b) {
- DoubleDouble r = exact_mult(a.hi, b.hi);
+ DoubleDouble r = exact_mult<NO_FMA_ALL_ROUNDINGS>(a.hi, b.hi);
double t1 = multiply_add(a.hi, b.lo, r.lo);
double t2 = multiply_add(a.lo, b.hi, t1);
r.lo = t2;
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 63cb983..76786a2 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -278,11 +278,11 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
// don't need to normalize the inputs again in this function. If the inputs are
// not normalized, the results might lose precision significantly.
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
- DyadicFloat<Bits> b) {
+LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
+ const DyadicFloat<Bits> &b) {
DyadicFloat<Bits> result;
result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
- result.exponent = a.exponent + b.exponent + int(Bits);
+ result.exponent = a.exponent + b.exponent + static_cast<int>(Bits);
if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
@@ -309,7 +309,7 @@ multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
// Simple exponentiation implementation for printf. Only handles positive
// exponents, since division isn't implemented.
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
+LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(const DyadicFloat<Bits> &a,
uint32_t power) {
DyadicFloat<Bits> result = 1.0;
DyadicFloat<Bits> cur_power = a;
@@ -325,7 +325,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
}
template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
+LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(const DyadicFloat<Bits> &a,
int32_t pow_2) {
DyadicFloat<Bits> result = a;
result.exponent += pow_2;
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index 59886ca..05a47791 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -33,4 +33,18 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#error "Unhandled compiler"
#endif
+// Defining optimization options for math functions.
+// TODO: Exporting this to public generated headers?
+#define LIBC_MATH_SKIP_ACCURATE_PASS 0x01
+#define LIBC_MATH_SMALL_TABLES 0x02
+#define LIBC_MATH_NO_ERRNO 0x04
+#define LIBC_MATH_NO_EXCEPT 0x08
+#define LIBC_MATH_FAST \
+ (LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | \
+ LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
+
+#ifndef LIBC_MATH
+#define LIBC_MATH 0
+#endif // LIBC_MATH
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H