//===-- Single-precision general exp/log functions ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #include "common_constants.h" #include "src/__support/common.h" #include "src/__support/macros/properties/cpu_features.h" #include "src/__support/math/acoshf_utils.h" #include "src/__support/math/exp10f_utils.h" #include "src/__support/math/exp_utils.h" namespace LIBC_NAMESPACE_DECL { constexpr int LOG_P1_BITS = 6; constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) // and exp(-x) simultaneously. // To compute e^x, we perform the following range // reduction: find hi, mid, lo such that: // x = (hi + mid) * log(2) + lo, in which // hi is an integer, // 0 <= mid * 2^5 < 32 is an integer // -2^(-6) <= lo * log2(e) <= 2^-6. // In particular, // hi + mid = round(x * log2(e) * 2^5) * 2^(-5). // Then, // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. // 2^mid is stored in the lookup table of 32 elements. // e^lo is computed using a degree-5 minimax polynomial // generated by Sollya: // e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) // = P_even + lo * P_odd // We perform 2^hi * 2^mid by simply add hi to the exponent field // of 2^mid. // To compute e^(-x), notice that: // e^(-x) = 2^(-(hi + mid)) * e^(-lo) // ~ 2^(-(hi + mid)) * P(-lo) // = 2^(-(hi + mid)) * (P_even - lo * P_odd) // So: // sinh(x) = (e^x - e^(-x)) / 2 // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - // 2^(-(hi + mid)) * (P_even - lo * P_odd)) // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) // And similarly: // cosh(x) = (e^x + e^(-x)) / 2 // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) // The main point of these formulas is that the expensive part of calculating // the polynomials approximating lower parts of e^(x) and e^(-x) are shared // and only done once. template LIBC_INLINE double exp_pm_eval(float x) { double xd = static_cast(x); // kd = round(x * log2(e) * 2^5) // k_p = round(x * log2(e) * 2^5) // k_m = round(-x * log2(e) * 2^5) double kd; int k_p, k_m; #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); k_p = static_cast(kd); k_m = -k_p; #else constexpr double HALF_WAY[2] = {0.5, -0.5}; k_p = static_cast( fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); k_m = -k_p; kd = static_cast(k_p); #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT // hi = floor(kf * 2^(-5)) // exp_hi = shift hi to the exponent field of double precision. int64_t exp_hi_p = static_cast((k_p >> ExpBase::MID_BITS)) << fputil::FPBits::FRACTION_LEN; int64_t exp_hi_m = static_cast((k_m >> ExpBase::MID_BITS)) << fputil::FPBits::FRACTION_LEN; // mh_p = 2^(hi + mid) // mh_m = 2^(-(hi + mid)) // mh_bits_* = bit field of mh_* int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; double mh_p = fputil::FPBits(uint64_t(mh_bits_p)).get_val(); double mh_m = fputil::FPBits(uint64_t(mh_bits_m)).get_val(); // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) double mh_sum = mh_p + mh_m; // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) double mh_diff = mh_p - mh_m; // dx = lo = x - (hi + mid) * log(2) double dx = fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * lo^2 // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, ExpBase::COEFFS[2] * 0.5); // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, ExpBase::COEFFS[3] * 0.5); double r; if constexpr (is_sinh) r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); else r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); return r; } // x should be positive, normal finite value // TODO: Simplify range reduction and polynomial degree for float16. // See issue #137190. LIBC_INLINE static float log_eval_f(float x) { // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). using FPBits = fputil::FPBits; FPBits xbits(x); float ex = static_cast(xbits.get_exponent()); // p1 is the leading 7 bits of mx, i.e. // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). int p1 = static_cast(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); // Set bits to (1 + (mx - p1*2^(-7))) xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); xbits.set_biased_exponent(FPBits::EXP_BIAS); // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; // Minimax polynomial for log(1 + dx), generated using Sollya: // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); // > Q = (P - 1) / x; // > for i from 0 to degree(Q) do print(coeff(Q, i)); constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; float dx2 = dx * dx; float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); float p = fputil::polyeval(dx2, dx, c1, c2, c3); // Generated by Sollya with the following commands: // > display = hexadecimal; // > round(log(2), SG, RN); constexpr float LOGF_2 = 0x1.62e43p-1f; float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); return result; } } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H