diff options
Diffstat (limited to 'libc/src')
-rw-r--r-- | libc/src/__support/macros/properties/architectures.h | 2 | ||||
-rw-r--r-- | libc/src/__support/macros/properties/cpu_features.h | 2 | ||||
-rw-r--r-- | libc/src/__support/math/CMakeLists.txt | 67 | ||||
-rw-r--r-- | libc/src/__support/math/acos.h | 285 | ||||
-rw-r--r-- | libc/src/__support/math/asin_utils.h (renamed from libc/src/math/generic/asin_utils.h) | 34 | ||||
-rw-r--r-- | libc/src/__support/math/exp.h | 19 | ||||
-rw-r--r-- | libc/src/__support/math/exp10.h | 19 | ||||
-rw-r--r-- | libc/src/__support/math/exp10_float16_constants.h | 43 | ||||
-rw-r--r-- | libc/src/__support/math/exp10f16.h | 141 | ||||
-rw-r--r-- | libc/src/__support/math/exp10f16_utils.h | 64 | ||||
-rw-r--r-- | libc/src/__support/math/exp10f_utils.h | 6 | ||||
-rw-r--r-- | libc/src/math/generic/CMakeLists.txt | 49 | ||||
-rw-r--r-- | libc/src/math/generic/acos.cpp | 266 | ||||
-rw-r--r-- | libc/src/math/generic/asin.cpp | 2 | ||||
-rw-r--r-- | libc/src/math/generic/exp10f16.cpp | 122 | ||||
-rw-r--r-- | libc/src/math/generic/exp10m1f16.cpp | 2 | ||||
-rw-r--r-- | libc/src/math/generic/expxf16.h | 56 | ||||
-rw-r--r-- | libc/src/sys/time/linux/setitimer.cpp | 4 | ||||
-rw-r--r-- | libc/src/sys/time/linux/utimes.cpp | 6 |
19 files changed, 662 insertions, 527 deletions
diff --git a/libc/src/__support/macros/properties/architectures.h b/libc/src/__support/macros/properties/architectures.h index c88956f..ecc9319 100644 --- a/libc/src/__support/macros/properties/architectures.h +++ b/libc/src/__support/macros/properties/architectures.h @@ -21,7 +21,7 @@ #define LIBC_TARGET_ARCH_IS_GPU #endif -#if defined(__pnacl__) || defined(__CLR_VER) || defined(LIBC_TARGET_ARCH_IS_GPU) +#if defined(__CLR_VER) || defined(LIBC_TARGET_ARCH_IS_GPU) #define LIBC_TARGET_ARCH_IS_VM #endif diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index cdb2df9..fde30ea 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -81,7 +81,7 @@ #endif #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \ - defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA) + defined(__NVPTX__) || defined(__AMDGPU__) || defined(__riscv_flen) #define LIBC_TARGET_CPU_HAS_FMA // Provide a more fine-grained control of FMA instruction for ARM targets. #if defined(LIBC_TARGET_CPU_HAS_FPU_HALF) diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index ad36679..4a29c29 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -1,4 +1,37 @@ add_header_library( + acos + HDRS + acos.h + DEPENDS + .asin_utils + libc.src.__support.math.asin_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.cpu_features +) + +add_header_library( + asin_utils + HDRS + asin_utils.h + DEPENDS + libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.optimization +) + +add_header_library( exp_float_constants HDRS exp_float_constants.h @@ -198,3 +231,37 @@ add_header_library( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization ) + +add_header_library( + exp10_float16_constants + HDRS + exp10_float16_constants.h + DEPENDS + libc.src.__support.CPP.array +) + +add_header_library( + exp10f16_utils + HDRS + exp10f16_utils.h + DEPENDS + .expf16_utils + .exp10_float16_constants + libc.src.__support.FPUtil.fp_bits +) + +add_header_library( + exp10f16 + HDRS + exp10f16.h + DEPENDS + .exp10f16_utils + libc.src.__support.FPUtil.fp_bits + src.__support.FPUtil.FEnvImpl + src.__support.FPUtil.FPBits + src.__support.FPUtil.cast + src.__support.FPUtil.rounding_mode + src.__support.FPUtil.except_value_utils + src.__support.macros.optimization + src.__support.macros.properties.cpu_features +) diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h new file mode 100644 index 0000000..a7287f1 --- /dev/null +++ b/libc/src/__support/math/acos.h @@ -0,0 +1,285 @@ +//===-- Implementation header for acos --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H + +#include "asin_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +using DoubleDouble = fputil::DoubleDouble; +using Float128 = fputil::DyadicFloat<128>; + +static constexpr double acos(double x) { + using FPBits = fputil::FPBits<double>; + + FPBits xbits(x); + int x_exp = xbits.get_biased_exponent(); + + // |x| < 0.5. + if (x_exp < FPBits::EXP_BIAS - 1) { + // |x| < 2^-55. + if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 55)) { + // When |x| < 2^-55, acos(x) = pi/2 +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) + return PI_OVER_TWO.hi; +#else + // Force the evaluation and prevent constant propagation so that it + // is rounded correctly for FE_UPWARD rounding mode. + return (xbits.abs().get_val() + 0x1.0p-160) + PI_OVER_TWO.hi; +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // acos(x) = pi/2 - asin(x) + // = pi/2 - x * P(x^2) + double p = asin_eval(x * x); + return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo); +#else + unsigned idx = 0; + DoubleDouble x_sq = fputil::exact_mult(x, x); + double err = xbits.abs().get_val() * 0x1.0p-51; + // Polynomial approximation: + // p ~ asin(x)/x + DoubleDouble p = asin_eval(x_sq, idx, err); + // asin(x) ~ x * p + DoubleDouble r0 = fputil::exact_mult(x, p.hi); + // acos(x) = pi/2 - asin(x) + // ~ pi/2 - x * p + // = pi/2 - x * (p.hi + p.lo) + double r_hi = fputil::multiply_add(-x, p.hi, PI_OVER_TWO.hi); + // Use Dekker's 2SUM algorithm to compute the lower part. + double r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo; + r_lo = fputil::multiply_add(-x, p.lo, r_lo + PI_OVER_TWO.lo); + + // Ziv's accuracy test. + + double r_upper = r_hi + (r_lo + err); + double r_lower = r_hi + (r_lo - err); + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, perform 128-bit calculation. + + // Recalculate mod 1/64. + idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6)); + + // Get x^2 - idx/64 exactly. When FMA is available, double-double + // multiplication will be correct for all rounding modes. Otherwise we use + // Float128 directly. + Float128 x_f128(x); + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // u = x^2 - idx/64 + Float128 u_hi( + fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi)); + Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo)); +#else + Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128); + Float128 u = fputil::quick_add( + x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6))); +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + Float128 p_f128 = asin_eval(u, idx); + // Flip the sign of x_f128 to perform subtraction. + x_f128.sign = x_f128.sign.negate(); + Float128 r = + fputil::quick_add(PI_OVER_TWO_F128, fputil::quick_mul(x_f128, p_f128)); + + return static_cast<double>(r); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + // |x| >= 0.5 + + double x_abs = xbits.abs().get_val(); + + // Maintaining the sign: + constexpr double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[xbits.is_neg()]; + // |x| >= 1 + if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) { + // x = +-1, asin(x) = +- pi/2 + if (x_abs == 1.0) { + // x = 1, acos(x) = 0, + // x = -1, acos(x) = pi + return x == 1.0 ? 0.0 : fputil::multiply_add(-x_sign, PI.hi, PI.lo); + } + // |x| > 1, return NaN. + if (xbits.is_quiet_nan()) + return x; + + // Set domain error for non-NaN input. + if (!xbits.is_nan()) + fputil::set_errno_if_required(EDOM); + + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // When |x| >= 0.5, we perform range reduction as follow: + // + // When 0.5 <= x < 1, let: + // y = acos(x) + // We will use the double angle formula: + // cos(2y) = 1 - 2 sin^2(y) + // and the complement angle identity: + // x = cos(y) = 1 - 2 sin^2 (y/2) + // So: + // sin(y/2) = sqrt( (1 - x)/2 ) + // And hence: + // y/2 = asin( sqrt( (1 - x)/2 ) ) + // Equivalently: + // acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) ) + // Let u = (1 - x)/2, then: + // acos(x) = 2 * asin( sqrt(u) ) + // Moreover, since 0.5 <= x < 1: + // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5, + // And hence we can reuse the same polynomial approximation of asin(x) when + // |x| <= 0.5: + // acos(x) ~ 2 * sqrt(u) * P(u). + // + // When -1 < x <= -0.5, we reduce to the previous case using the formula: + // acos(x) = pi - acos(-x) + // = pi - 2 * asin ( sqrt( (1 + x)/2 ) ) + // ~ pi - 2 * sqrt(u) * P(u), + // where u = (1 - |x|)/2. + + // u = (1 - |x|)/2 + double u = fputil::multiply_add(x_abs, -0.5, 0.5); + // v_hi + v_lo ~ sqrt(u). + // Let: + // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi) + // Then: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // ~ v_hi + h / (2 * v_hi) + // So we can use: + // v_lo = h / (2 * v_hi). + double v_hi = fputil::sqrt<double>(u); + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI}; + DoubleDouble const_term = CONST_TERM[xbits.is_neg()]; + + double p = asin_eval(u); + double scale = x_sign * 2.0 * v_hi; + double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo); + return r; +#else + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double h = fputil::multiply_add(v_hi, -v_hi, u); +#else + DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi); + double h = (u - v_hi_sq.hi) - v_hi_sq.lo; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + // Scale v_lo and v_hi by 2 from the formula: + // vh = v_hi * 2 + // vl = 2*v_lo = h / v_hi. + double vh = v_hi * 2.0; + double vl = h / v_hi; + + // Polynomial approximation: + // p ~ asin(sqrt(u))/sqrt(u) + unsigned idx = 0; + double err = vh * 0x1.0p-51; + + DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err); + + // Perform computations in double-double arithmetic: + // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p) + DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p); + + double r_hi = 0, r_lo = 0; + if (xbits.is_pos()) { + r_hi = r0.hi; + r_lo = r0.lo; + } else { + DoubleDouble r = fputil::exact_add(PI.hi, -r0.hi); + r_hi = r.hi; + r_lo = (PI.lo - r0.lo) + r.lo; + } + + // Ziv's accuracy test. + + double r_upper = r_hi + (r_lo + err); + double r_lower = r_hi + (r_lo - err); + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, we redo the computations in Float128. + // Recalculate mod 1/64. + idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6)); + + // After the first step of Newton-Raphson approximating v = sqrt(u), we have + // that: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // v_lo = h / (2 * v_hi) + // With error: + // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) ) + // = -h^2 / (2*v * (sqrt(u) + v)^2). + // Since: + // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u, + // we can add another correction term to (v_hi + v_lo) that is: + // v_ll = -h^2 / (2*v_hi * 4u) + // = -v_lo * (h / 4u) + // = -vl * (h / 8u), + // making the errors: + // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3) + // well beyond 128-bit precision needed. + + // Get the rounding error of vl = 2 * v_lo ~ h / vh + // Get full product of vh * vl +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi; +#else + DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl); + double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // vll = 2*v_ll = -vl * (h / (4u)). + double t = h * (-0.25) / u; + double vll = fputil::multiply_add(vl, t, vl_lo); + // m_v = -(v_hi + v_lo + v_ll). + Float128 m_v = fputil::quick_add( + Float128(vh), fputil::quick_add(Float128(vl), Float128(vll))); + m_v.sign = xbits.sign(); + + // Perform computations in Float128: + // acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1, + // = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5. + Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u)); + + Float128 p_f128 = asin_eval(y_f128, idx); + Float128 r_f128 = fputil::quick_mul(m_v, p_f128); + + if (xbits.is_neg()) + r_f128 = fputil::quick_add(PI_F128, r_f128); + + return static_cast<double>(r_f128); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H diff --git a/libc/src/math/generic/asin_utils.h b/libc/src/__support/math/asin_utils.h index 44913d5..3146444 100644 --- a/libc/src/math/generic/asin_utils.h +++ b/libc/src/__support/math/asin_utils.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/double_double.h" @@ -16,7 +16,6 @@ #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/integer_literals.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE_DECL { @@ -25,10 +24,10 @@ namespace { using DoubleDouble = fputil::DoubleDouble; using Float128 = fputil::DyadicFloat<128>; -constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p1}; +static constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p1}; -constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, - 0x1.921fb54442d18p0}; +static constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, + 0x1.921fb54442d18p0}; #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS @@ -39,14 +38,14 @@ constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, // > dirtyinfnorm(asin(x)/x - P, [0, 0.5]); // 0x1.1a71ef0a0f26a9fb7ed7e41dee788b13d1770db3dp-52 -constexpr double ASIN_COEFFS[12] = { +static constexpr double ASIN_COEFFS[12] = { 0x1.0000000000000p0, 0x1.5555555556dcfp-3, 0x1.3333333082e11p-4, 0x1.6db6dd14099edp-5, 0x1.f1c69b35bf81fp-6, 0x1.6e97194225a67p-6, 0x1.1babddb82ce12p-6, 0x1.d55bd078600d6p-7, 0x1.33328959e63d6p-7, 0x1.2b5993bda1d9bp-6, -0x1.806aff270bf25p-7, 0x1.02614e5ed3936p-5, }; -LIBC_INLINE double asin_eval(double u) { +LIBC_INLINE static constexpr double asin_eval(double u) { double u2 = u * u; double c0 = fputil::multiply_add(u, ASIN_COEFFS[1], ASIN_COEFFS[0]); double c1 = fputil::multiply_add(u, ASIN_COEFFS[3], ASIN_COEFFS[2]); @@ -124,7 +123,7 @@ LIBC_INLINE double asin_eval(double u) { // > dirtyinfnorm(asin(x)/x - P, [-1/64, 1/64]); // 0x1.999075402cafp-83 -constexpr double ASIN_COEFFS[9][12] = { +static constexpr double ASIN_COEFFS[9][12] = { {1.0, 0.0, 0x1.5555555555555p-3, 0x1.5555555555555p-57, 0x1.3333333333333p-4, 0x1.6db6db6db6db7p-5, 0x1.f1c71c71c71c7p-6, 0x1.6e8ba2e8ba2e9p-6, 0x1.1c4ec4ec4ec4fp-6, 0x1.c99999999999ap-7, @@ -164,8 +163,8 @@ constexpr double ASIN_COEFFS[9][12] = { }; // We calculate the lower part of the approximation P(u). -LIBC_INLINE DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, - double &err) { +LIBC_INLINE static DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, + double &err) { using fputil::multiply_add; // k = round(u * 32). double k = fputil::nearest_integer(u.hi * 0x1.0p5); @@ -239,7 +238,7 @@ LIBC_INLINE DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx, // + (676039 x^24)/104857600 + (1300075 x^26)/226492416 + // + (5014575 x^28)/973078528 + (9694845 x^30)/2080374784. -constexpr Float128 ASIN_COEFFS_F128[17][16] = { +static constexpr Float128 ASIN_COEFFS_F128[17][16] = { { {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, @@ -548,13 +547,14 @@ constexpr Float128 ASIN_COEFFS_F128[17][16] = { }, }; -constexpr Float128 PI_OVER_TWO_F128 = { +static constexpr Float128 PI_OVER_TWO_F128 = { Sign::POS, -127, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -constexpr Float128 PI_F128 = {Sign::POS, -126, - 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; +static constexpr Float128 PI_F128 = { + Sign::POS, -126, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -LIBC_INLINE Float128 asin_eval(const Float128 &u, unsigned idx) { +LIBC_INLINE static constexpr Float128 asin_eval(const Float128 &u, + unsigned idx) { return fputil::polyeval(u, ASIN_COEFFS_F128[idx][0], ASIN_COEFFS_F128[idx][1], ASIN_COEFFS_F128[idx][2], ASIN_COEFFS_F128[idx][3], ASIN_COEFFS_F128[idx][4], ASIN_COEFFS_F128[idx][5], @@ -571,4 +571,4 @@ LIBC_INLINE Float128 asin_eval(const Float128 &u, unsigned idx) { } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_UTILS_H diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h index a538df1..ff59ff7 100644 --- a/libc/src/__support/math/exp.h +++ b/libc/src/__support/math/exp.h @@ -40,11 +40,11 @@ static constexpr double LOG2_E = 0x1.71547652b82fep+0; // Error bounds: // Errors when using double precision. -static constexpr double ERR_D = 0x1.8p-63; +static constexpr double EXP_ERR_D = 0x1.8p-63; #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS // Errors when using double-double precision. -static constexpr double ERR_DD = 0x1.0p-99; +static constexpr double EXP_ERR_DD = 0x1.0p-99; #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS // -2^-12 * log(2) @@ -387,7 +387,8 @@ static double exp(double x) { #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS if (LIBC_UNLIKELY(denorm)) { - return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D) + return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, + EXP_ERR_D) .value(); } else { // to multiply by 2^hi, a fast way is to simply add hi to the exponent @@ -399,12 +400,12 @@ static double exp(double x) { } #else if (LIBC_UNLIKELY(denorm)) { - if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D); + if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, EXP_ERR_D); LIBC_LIKELY(r.has_value())) return r.value(); } else { - double upper = exp_mid.hi + (lo + ERR_D); - double lower = exp_mid.hi + (lo - ERR_D); + double upper = exp_mid.hi + (lo + EXP_ERR_D); + double lower = exp_mid.hi + (lo - EXP_ERR_D); if (LIBC_LIKELY(upper == lower)) { // to multiply by 2^hi, a fast way is to simply add hi to the exponent @@ -419,12 +420,12 @@ static double exp(double x) { DoubleDouble r_dd = exp_double_double(x, kd, exp_mid); if (LIBC_UNLIKELY(denorm)) { - if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, ERR_DD); + if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, EXP_ERR_DD); LIBC_LIKELY(r.has_value())) return r.value(); } else { - double upper_dd = r_dd.hi + (r_dd.lo + ERR_DD); - double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD); + double upper_dd = r_dd.hi + (r_dd.lo + EXP_ERR_DD); + double lower_dd = r_dd.hi + (r_dd.lo - EXP_ERR_DD); if (LIBC_LIKELY(upper_dd == lower_dd)) { int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN; diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h index 8874852..fa60e40c 100644 --- a/libc/src/__support/math/exp10.h +++ b/libc/src/__support/math/exp10.h @@ -54,11 +54,11 @@ static constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87; // Error bounds: // Errors when using double precision. -constexpr double ERR_D = 0x1.8p-63; +constexpr double EXP10_ERR_D = 0x1.8p-63; #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS // Errors when using double-double precision. -static constexpr double ERR_DD = 0x1.8p-99; +static constexpr double EXP10_ERR_DD = 0x1.8p-99; #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS // Polynomial approximations with double precision. Generated by Sollya with: @@ -207,17 +207,18 @@ static double exp10_denorm(double x) { double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo); #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D) + return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, + EXP10_ERR_D) .value(); #else - if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D); + if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, EXP10_ERR_D); LIBC_LIKELY(r.has_value())) return r.value(); // Use double-double DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid); - if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, ERR_DD); + if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, EXP10_ERR_DD); LIBC_LIKELY(r.has_value())) return r.value(); @@ -409,8 +410,8 @@ static constexpr double exp10(double x) { cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo)); return r; #else - double upper = exp_mid.hi + (lo + ERR_D); - double lower = exp_mid.hi + (lo - ERR_D); + double upper = exp_mid.hi + (lo + EXP10_ERR_D); + double lower = exp_mid.hi + (lo - EXP10_ERR_D); if (LIBC_LIKELY(upper == lower)) { // To multiply by 2^hi, a fast way is to simply add hi to the exponent @@ -476,8 +477,8 @@ static constexpr double exp10(double x) { // Use double-double DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid); - double upper_dd = r_dd.hi + (r_dd.lo + ERR_DD); - double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD); + double upper_dd = r_dd.hi + (r_dd.lo + EXP10_ERR_DD); + double lower_dd = r_dd.hi + (r_dd.lo - EXP10_ERR_DD); if (LIBC_LIKELY(upper_dd == lower_dd)) { // To multiply by 2^hi, a fast way is to simply add hi to the exponent diff --git a/libc/src/__support/math/exp10_float16_constants.h b/libc/src/__support/math/exp10_float16_constants.h new file mode 100644 index 0000000..f5928db --- /dev/null +++ b/libc/src/__support/math/exp10_float16_constants.h @@ -0,0 +1,43 @@ +//===-- Constants for exp10f16 function -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include <stdint.h> + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/CPP/array.h" + +namespace LIBC_NAMESPACE_DECL { + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN)); +static constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = { + 0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U, + 0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U, +}; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log2(10), SG, RN); +static constexpr float LOG2F_10 = 0x1.a934fp+1f; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log10(2), SG, RN); +static constexpr float LOG10F_2 = 0x1.344136p-2f; + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H diff --git a/libc/src/__support/math/exp10f16.h b/libc/src/__support/math/exp10f16.h new file mode 100644 index 0000000..0d8b125 --- /dev/null +++ b/libc/src/__support/math/exp10f16.h @@ -0,0 +1,141 @@ +//===-- Implementation header for exp10f16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "exp10f16_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/cpu_features.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT +static constexpr size_t N_EXP10F16_EXCEPTS = 5; +#else +static constexpr size_t N_EXP10F16_EXCEPTS = 8; +#endif + +static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS> + EXP10F16_EXCEPTS = {{ + // x = 0x1.8f4p-2, exp10f16(x) = 0x1.3ap+1 (RZ) + {0x363dU, 0x40e8U, 1U, 0U, 1U}, + // x = 0x1.95cp-2, exp10f16(x) = 0x1.3ecp+1 (RZ) + {0x3657U, 0x40fbU, 1U, 0U, 0U}, + // x = -0x1.018p-4, exp10f16(x) = 0x1.bbp-1 (RZ) + {0xac06U, 0x3aecU, 1U, 0U, 0U}, + // x = -0x1.c28p+0, exp10f16(x) = 0x1.1ccp-6 (RZ) + {0xbf0aU, 0x2473U, 1U, 0U, 0U}, + // x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ) + {0xc387U, 0x09a5U, 1U, 0U, 0U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ) + {0x4030U, 0x57c1U, 1U, 0U, 1U}, + // x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ) + {0x406eU, 0x591fU, 1U, 0U, 1U}, + // x = 0x1.1b8p+2, exp10f16(x) = 0x1.a4p+14 (RZ) + {0x446eU, 0x7690U, 1U, 0U, 1U}, +#endif + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +static constexpr float16 exp10f16(float16 x) { + using FPBits = fputil::FPBits<float16>; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| >= 5, or x is NaN. + if (LIBC_UNLIKELY(x_abs >= 0x4500U)) { + // exp10(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 5. + if (x_bits.is_pos()) { + // exp10(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x <= -8. + if (x_u >= 0xc800U) { + // exp10(-inf) = +0 + if (x_bits.is_inf()) + return FPBits::zero().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); + + if (fputil::fenv_is_round_up()) + return FPBits::min_subnormal().get_val(); + return FPBits::zero().get_val(); + } + } + + // When x is 1, 2, 3, or 4. These are hard-to-round cases with exact results. + if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { + switch (x_u) { + case 0x3c00U: // x = 1.0f16 + return fputil::cast<float16>(10.0); + case 0x4000U: // x = 2.0f16 + return fputil::cast<float16>(100.0); + case 0x4200U: // x = 3.0f16 + return fputil::cast<float16>(1'000.0); + case 0x4400U: // x = 4.0f16 + return fputil::cast<float16>(10'000.0); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // 10^x = 2^((hi + mid) * log2(10)) * 10^lo + auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); + return fputil::cast<float16>(exp2_hi_mid * exp10_lo); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H diff --git a/libc/src/__support/math/exp10f16_utils.h b/libc/src/__support/math/exp10f16_utils.h new file mode 100644 index 0000000..bffb81b --- /dev/null +++ b/libc/src/__support/math/exp10f16_utils.h @@ -0,0 +1,64 @@ +//===-- Common utils for exp10f16 -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "exp10_float16_constants.h" +#include "expf16_utils.h" +#include "src/__support/FPUtil/FPBits.h" + +namespace LIBC_NAMESPACE_DECL { + +LIBC_INLINE static constexpr ExpRangeReduction +exp10_range_reduction(float16 x) { + // For -8 < x < 5, to compute 10^x, we perform the following range reduction: + // find hi, mid, lo, such that: + // x = (hi + mid) * log2(10) + lo, in which + // hi is an integer, + // mid * 2^3 is an integer, + // -2^(-4) <= lo < 2^(-4). + // In particular, + // hi + mid = round(x * 2^3) * 2^(-3). + // Then, + // 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo + // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid + // by adding hi to the exponent field of 2^mid. 10^lo is computed using a + // degree-4 minimax polynomial generated by Sollya. + + float xf = x; + float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f)); + int x_hi_mid = static_cast<int>(kf); + unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3; + unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7; + // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x + float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf); + + uint32_t exp2_hi_mid_bits = + EXP2_MID_BITS[x_mid] + + static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN); + float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val(); + // Degree-4 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]); + // > 1 + x * P; + float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f, + 0x1.04b434p+1f, 0x1.2bcf9ep+0f); + return {exp2_hi_mid, exp10_lo}; +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H diff --git a/libc/src/__support/math/exp10f_utils.h b/libc/src/__support/math/exp10f_utils.h index 0493e1b..c30def9 100644 --- a/libc/src/__support/math/exp10f_utils.h +++ b/libc/src/__support/math/exp10f_utils.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H -#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_UTILS_H #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -154,4 +154,4 @@ LIBC_INLINE static constexpr exp_b_reduc_t exp_b_range_reduc(float x) { } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_UTILS_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 99db743..7e6a32b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1477,20 +1477,8 @@ add_entrypoint_object( HDRS ../exp10f16.h DEPENDS - .expxf16 - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.CPP.array - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.exp10f16 + libc.src.errno.errno ) add_entrypoint_object( @@ -1519,7 +1507,6 @@ add_entrypoint_object( HDRS ../exp10m1f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -1531,6 +1518,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.exp10f16_utils ) add_entrypoint_object( @@ -4028,20 +4016,6 @@ add_entrypoint_object( libc.src.__support.macros.properties.types ) -add_header_library( - asin_utils - HDRS - atan_utils.h - DEPENDS - libc.src.__support.integer_literals - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization -) - add_entrypoint_object( asin SRCS @@ -4049,7 +4023,7 @@ add_entrypoint_object( HDRS ../asin.h DEPENDS - .asin_utils + libc.src.__support.math.asin_utils libc.src.__support.FPUtil.double_double libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl @@ -4104,17 +4078,7 @@ add_entrypoint_object( HDRS ../acos.h DEPENDS - .asin_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types - libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.acos ) add_entrypoint_object( @@ -5023,10 +4987,11 @@ add_header_library( HDRS expxf16.h DEPENDS - libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.nearest_integer libc.src.__support.macros.attributes libc.src.__support.math.expf16_utils + libc.src.__support.math.exp10_float16_constants ) diff --git a/libc/src/math/generic/acos.cpp b/libc/src/math/generic/acos.cpp index c14721f..3a59642 100644 --- a/libc/src/math/generic/acos.cpp +++ b/libc/src/math/generic/acos.cpp @@ -7,272 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/acos.h" -#include "asin_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/acos.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - -LLVM_LIBC_FUNCTION(double, acos, (double x)) { - using FPBits = fputil::FPBits<double>; - - FPBits xbits(x); - int x_exp = xbits.get_biased_exponent(); - - // |x| < 0.5. - if (x_exp < FPBits::EXP_BIAS - 1) { - // |x| < 2^-55. - if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 55)) { - // When |x| < 2^-55, acos(x) = pi/2 -#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) - return PI_OVER_TWO.hi; -#else - // Force the evaluation and prevent constant propagation so that it - // is rounded correctly for FE_UPWARD rounding mode. - return (xbits.abs().get_val() + 0x1.0p-160) + PI_OVER_TWO.hi; -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // acos(x) = pi/2 - asin(x) - // = pi/2 - x * P(x^2) - double p = asin_eval(x * x); - return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo); -#else - unsigned idx; - DoubleDouble x_sq = fputil::exact_mult(x, x); - double err = xbits.abs().get_val() * 0x1.0p-51; - // Polynomial approximation: - // p ~ asin(x)/x - DoubleDouble p = asin_eval(x_sq, idx, err); - // asin(x) ~ x * p - DoubleDouble r0 = fputil::exact_mult(x, p.hi); - // acos(x) = pi/2 - asin(x) - // ~ pi/2 - x * p - // = pi/2 - x * (p.hi + p.lo) - double r_hi = fputil::multiply_add(-x, p.hi, PI_OVER_TWO.hi); - // Use Dekker's 2SUM algorithm to compute the lower part. - double r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo; - r_lo = fputil::multiply_add(-x, p.lo, r_lo + PI_OVER_TWO.lo); - - // Ziv's accuracy test. - - double r_upper = r_hi + (r_lo + err); - double r_lower = r_hi + (r_lo - err); - - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - // Ziv's accuracy test failed, perform 128-bit calculation. - - // Recalculate mod 1/64. - idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6)); - - // Get x^2 - idx/64 exactly. When FMA is available, double-double - // multiplication will be correct for all rounding modes. Otherwise we use - // Float128 directly. - Float128 x_f128(x); - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - // u = x^2 - idx/64 - Float128 u_hi( - fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi)); - Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo)); -#else - Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128); - Float128 u = fputil::quick_add( - x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6))); -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - Float128 p_f128 = asin_eval(u, idx); - // Flip the sign of x_f128 to perform subtraction. - x_f128.sign = x_f128.sign.negate(); - Float128 r = - fputil::quick_add(PI_OVER_TWO_F128, fputil::quick_mul(x_f128, p_f128)); - - return static_cast<double>(r); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - // |x| >= 0.5 - - double x_abs = xbits.abs().get_val(); - - // Maintaining the sign: - constexpr double SIGN[2] = {1.0, -1.0}; - double x_sign = SIGN[xbits.is_neg()]; - // |x| >= 1 - if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) { - // x = +-1, asin(x) = +- pi/2 - if (x_abs == 1.0) { - // x = 1, acos(x) = 0, - // x = -1, acos(x) = pi - return x == 1.0 ? 0.0 : fputil::multiply_add(-x_sign, PI.hi, PI.lo); - } - // |x| > 1, return NaN. - if (xbits.is_quiet_nan()) - return x; - - // Set domain error for non-NaN input. - if (!xbits.is_nan()) - fputil::set_errno_if_required(EDOM); - - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - // When |x| >= 0.5, we perform range reduction as follow: - // - // When 0.5 <= x < 1, let: - // y = acos(x) - // We will use the double angle formula: - // cos(2y) = 1 - 2 sin^2(y) - // and the complement angle identity: - // x = cos(y) = 1 - 2 sin^2 (y/2) - // So: - // sin(y/2) = sqrt( (1 - x)/2 ) - // And hence: - // y/2 = asin( sqrt( (1 - x)/2 ) ) - // Equivalently: - // acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) ) - // Let u = (1 - x)/2, then: - // acos(x) = 2 * asin( sqrt(u) ) - // Moreover, since 0.5 <= x < 1: - // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5, - // And hence we can reuse the same polynomial approximation of asin(x) when - // |x| <= 0.5: - // acos(x) ~ 2 * sqrt(u) * P(u). - // - // When -1 < x <= -0.5, we reduce to the previous case using the formula: - // acos(x) = pi - acos(-x) - // = pi - 2 * asin ( sqrt( (1 + x)/2 ) ) - // ~ pi - 2 * sqrt(u) * P(u), - // where u = (1 - |x|)/2. - - // u = (1 - |x|)/2 - double u = fputil::multiply_add(x_abs, -0.5, 0.5); - // v_hi + v_lo ~ sqrt(u). - // Let: - // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi) - // Then: - // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) - // ~ v_hi + h / (2 * v_hi) - // So we can use: - // v_lo = h / (2 * v_hi). - double v_hi = fputil::sqrt<double>(u); - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI}; - DoubleDouble const_term = CONST_TERM[xbits.is_neg()]; - - double p = asin_eval(u); - double scale = x_sign * 2.0 * v_hi; - double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo); - return r; -#else - -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - double h = fputil::multiply_add(v_hi, -v_hi, u); -#else - DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi); - double h = (u - v_hi_sq.hi) - v_hi_sq.lo; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - // Scale v_lo and v_hi by 2 from the formula: - // vh = v_hi * 2 - // vl = 2*v_lo = h / v_hi. - double vh = v_hi * 2.0; - double vl = h / v_hi; - - // Polynomial approximation: - // p ~ asin(sqrt(u))/sqrt(u) - unsigned idx; - double err = vh * 0x1.0p-51; - - DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err); - - // Perform computations in double-double arithmetic: - // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p) - DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p); - - double r_hi, r_lo; - if (xbits.is_pos()) { - r_hi = r0.hi; - r_lo = r0.lo; - } else { - DoubleDouble r = fputil::exact_add(PI.hi, -r0.hi); - r_hi = r.hi; - r_lo = (PI.lo - r0.lo) + r.lo; - } - - // Ziv's accuracy test. - - double r_upper = r_hi + (r_lo + err); - double r_lower = r_hi + (r_lo - err); - - if (LIBC_LIKELY(r_upper == r_lower)) - return r_upper; - - // Ziv's accuracy test failed, we redo the computations in Float128. - // Recalculate mod 1/64. - idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6)); - - // After the first step of Newton-Raphson approximating v = sqrt(u), we have - // that: - // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) - // v_lo = h / (2 * v_hi) - // With error: - // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) ) - // = -h^2 / (2*v * (sqrt(u) + v)^2). - // Since: - // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u, - // we can add another correction term to (v_hi + v_lo) that is: - // v_ll = -h^2 / (2*v_hi * 4u) - // = -v_lo * (h / 4u) - // = -vl * (h / 8u), - // making the errors: - // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3) - // well beyond 128-bit precision needed. - - // Get the rounding error of vl = 2 * v_lo ~ h / vh - // Get full product of vh * vl -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi; -#else - DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl); - double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - // vll = 2*v_ll = -vl * (h / (4u)). - double t = h * (-0.25) / u; - double vll = fputil::multiply_add(vl, t, vl_lo); - // m_v = -(v_hi + v_lo + v_ll). - Float128 m_v = fputil::quick_add( - Float128(vh), fputil::quick_add(Float128(vl), Float128(vll))); - m_v.sign = xbits.sign(); - - // Perform computations in Float128: - // acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1, - // = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5. - Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u)); - - Float128 p_f128 = asin_eval(y_f128, idx); - Float128 r_f128 = fputil::quick_mul(m_v, p_f128); - - if (xbits.is_neg()) - r_f128 = fputil::quick_add(PI_F128, r_f128); - - return static_cast<double>(r_f128); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS -} +LLVM_LIBC_FUNCTION(double, acos, (double x)) { return math::acos(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp index ad77683..c033597 100644 --- a/libc/src/math/generic/asin.cpp +++ b/libc/src/math/generic/asin.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/asin.h" -#include "asin_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -18,6 +17,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/asin_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/exp10f16.cpp b/libc/src/math/generic/exp10f16.cpp index 31abf3b..cb3c859 100644 --- a/libc/src/math/generic/exp10f16.cpp +++ b/libc/src/math/generic/exp10f16.cpp @@ -7,128 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/exp10f16.h" -#include "expxf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/CPP/array.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/exp10f16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT -static constexpr size_t N_EXP10F16_EXCEPTS = 5; -#else -static constexpr size_t N_EXP10F16_EXCEPTS = 8; -#endif - -static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS> - EXP10F16_EXCEPTS = {{ - // x = 0x1.8f4p-2, exp10f16(x) = 0x1.3ap+1 (RZ) - {0x363dU, 0x40e8U, 1U, 0U, 1U}, - // x = 0x1.95cp-2, exp10f16(x) = 0x1.3ecp+1 (RZ) - {0x3657U, 0x40fbU, 1U, 0U, 0U}, - // x = -0x1.018p-4, exp10f16(x) = 0x1.bbp-1 (RZ) - {0xac06U, 0x3aecU, 1U, 0U, 0U}, - // x = -0x1.c28p+0, exp10f16(x) = 0x1.1ccp-6 (RZ) - {0xbf0aU, 0x2473U, 1U, 0U, 0U}, - // x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ) - {0xc387U, 0x09a5U, 1U, 0U, 0U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ) - {0x4030U, 0x57c1U, 1U, 0U, 1U}, - // x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ) - {0x406eU, 0x591fU, 1U, 0U, 1U}, - // x = 0x1.1b8p+2, exp10f16(x) = 0x1.a4p+14 (RZ) - {0x446eU, 0x7690U, 1U, 0U, 1U}, -#endif - }}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, exp10f16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| >= 5, or x is NaN. - if (LIBC_UNLIKELY(x_abs >= 0x4500U)) { - // exp10(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 5. - if (x_bits.is_pos()) { - // exp10(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x <= -8. - if (x_u >= 0xc800U) { - // exp10(-inf) = +0 - if (x_bits.is_inf()) - return FPBits::zero().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); - - if (fputil::fenv_is_round_up()) - return FPBits::min_subnormal().get_val(); - return FPBits::zero().get_val(); - } - } - - // When x is 1, 2, 3, or 4. These are hard-to-round cases with exact results. - if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { - switch (x_u) { - case 0x3c00U: // x = 1.0f16 - return fputil::cast<float16>(10.0); - case 0x4000U: // x = 2.0f16 - return fputil::cast<float16>(100.0); - case 0x4200U: // x = 3.0f16 - return fputil::cast<float16>(1'000.0); - case 0x4400U: // x = 4.0f16 - return fputil::cast<float16>(10'000.0); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // 10^x = 2^((hi + mid) * log2(10)) * 10^lo - auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); - return fputil::cast<float16>(exp2_hi_mid * exp10_lo); -} +LLVM_LIBC_FUNCTION(float16, exp10f16, (float16 x)) { return math::exp10f16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/exp10m1f16.cpp b/libc/src/math/generic/exp10m1f16.cpp index 545c479..6c2fdbe 100644 --- a/libc/src/math/generic/exp10m1f16.cpp +++ b/libc/src/math/generic/exp10m1f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/exp10m1f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -21,6 +20,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/exp10f16_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index 05ac95d..b17b14f 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -17,18 +17,11 @@ #include "src/__support/macros/config.h" #include <stdint.h> +#include "src/__support/math/exp10_float16_constants.h" #include "src/__support/math/expf16_utils.h" namespace LIBC_NAMESPACE_DECL { -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN)); -constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = { - 0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U, - 0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U, -}; - LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { // For -25 < x < 16, to compute 2^x, we perform the following range reduction: // find hi, mid, lo, such that: @@ -68,53 +61,6 @@ LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { // Generated by Sollya with the following commands: // > display = hexadecimal; -// > round(log2(10), SG, RN); -static constexpr float LOG2F_10 = 0x1.a934fp+1f; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > round(log10(2), SG, RN); -static constexpr float LOG10F_2 = 0x1.344136p-2f; - -LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) { - // For -8 < x < 5, to compute 10^x, we perform the following range reduction: - // find hi, mid, lo, such that: - // x = (hi + mid) * log2(10) + lo, in which - // hi is an integer, - // mid * 2^3 is an integer, - // -2^(-4) <= lo < 2^(-4). - // In particular, - // hi + mid = round(x * 2^3) * 2^(-3). - // Then, - // 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo - // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid - // by adding hi to the exponent field of 2^mid. 10^lo is computed using a - // degree-4 minimax polynomial generated by Sollya. - - float xf = x; - float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f)); - int x_hi_mid = static_cast<int>(kf); - unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3; - unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7; - // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x - float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf); - - uint32_t exp2_hi_mid_bits = - EXP2_MID_BITS[x_mid] + - static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN); - float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val(); - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]); - // > 1 + x * P; - float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f, - 0x1.04b434p+1f, 0x1.2bcf9ep+0f); - return {exp2_hi_mid, exp10_lo}; -} - -// Generated by Sollya with the following commands: -// > display = hexadecimal; // > round(log2(exp(1)), SG, RN); static constexpr float LOG2F_E = 0x1.715476p+0f; diff --git a/libc/src/sys/time/linux/setitimer.cpp b/libc/src/sys/time/linux/setitimer.cpp index 1de0d43..fb16358 100644 --- a/libc/src/sys/time/linux/setitimer.cpp +++ b/libc/src/sys/time/linux/setitimer.cpp @@ -22,9 +22,9 @@ LLVM_LIBC_FUNCTION(int, setitimer, // There is no SYS_setitimer_time64 call, so we can't use time_t directly, // and need to convert it to long first. long new_value32[4] = {static_cast<long>(new_value->it_interval.tv_sec), - new_value->it_interval.tv_usec, + static_cast<long>(new_value->it_interval.tv_usec), static_cast<long>(new_value->it_value.tv_sec), - new_value->it_value.tv_usec}; + static_cast<long>(new_value->it_value.tv_usec)}; long old_value32[4]; ret = LIBC_NAMESPACE::syscall_impl<long>(SYS_setitimer, which, new_value32, diff --git a/libc/src/sys/time/linux/utimes.cpp b/libc/src/sys/time/linux/utimes.cpp index ed37b42..9c00ce9 100644 --- a/libc/src/sys/time/linux/utimes.cpp +++ b/libc/src/sys/time/linux/utimes.cpp @@ -59,8 +59,10 @@ LLVM_LIBC_FUNCTION(int, utimes, ts[1].tv_sec = times[1].tv_sec; // convert u-seconds to nanoseconds - ts[0].tv_nsec = times[0].tv_usec * 1000; - ts[1].tv_nsec = times[1].tv_usec * 1000; + ts[0].tv_nsec = + static_cast<decltype(ts[0].tv_nsec)>(times[0].tv_usec * 1000); + ts[1].tv_nsec = + static_cast<decltype(ts[1].tv_nsec)>(times[1].tv_usec * 1000); ts_ptr = ts; } |