diff options
author | Muhammad Bassiouni <60100307+bassiounix@users.noreply.github.com> | 2025-07-18 20:00:04 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-18 20:00:04 +0300 |
commit | 7e0ae019f854c99ae0d6a220aba7fcd5407f2494 (patch) | |
tree | f25a0475edbd6f626fd94b2f1ca16ff8e5fa1810 /libc/src | |
parent | 10518c76de091bf23e72a8761c1eff561ce6e074 (diff) | |
download | llvm-7e0ae019f854c99ae0d6a220aba7fcd5407f2494.zip llvm-7e0ae019f854c99ae0d6a220aba7fcd5407f2494.tar.gz llvm-7e0ae019f854c99ae0d6a220aba7fcd5407f2494.tar.bz2 |
[libc][math] Refactor exp10f16 implementation to header-only in src/__support/math folder. (#148408)
Part of #147386
in preparation for:
https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450
Diffstat (limited to 'libc/src')
-rw-r--r-- | libc/src/__support/math/CMakeLists.txt | 34 | ||||
-rw-r--r-- | libc/src/__support/math/exp10_float16_constants.h | 43 | ||||
-rw-r--r-- | libc/src/__support/math/exp10f16.h | 141 | ||||
-rw-r--r-- | libc/src/__support/math/exp10f16_utils.h | 64 | ||||
-rw-r--r-- | libc/src/math/generic/CMakeLists.txt | 21 | ||||
-rw-r--r-- | libc/src/math/generic/exp10f16.cpp | 122 | ||||
-rw-r--r-- | libc/src/math/generic/exp10m1f16.cpp | 2 | ||||
-rw-r--r-- | libc/src/math/generic/expxf16.h | 56 |
8 files changed, 291 insertions, 192 deletions
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index ad36679..77a47c6 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -198,3 +198,37 @@ add_header_library( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization ) + +add_header_library( + exp10_float16_constants + HDRS + exp10_float16_constants.h + DEPENDS + libc.src.__support.CPP.array +) + +add_header_library( + exp10f16_utils + HDRS + exp10f16_utils.h + DEPENDS + .expf16_utils + .exp10_float16_constants + libc.src.__support.FPUtil.fp_bits +) + +add_header_library( + exp10f16 + HDRS + exp10f16.h + DEPENDS + .exp10f16_utils + libc.src.__support.FPUtil.fp_bits + src.__support.FPUtil.FEnvImpl + src.__support.FPUtil.FPBits + src.__support.FPUtil.cast + src.__support.FPUtil.rounding_mode + src.__support.FPUtil.except_value_utils + src.__support.macros.optimization + src.__support.macros.properties.cpu_features +) diff --git a/libc/src/__support/math/exp10_float16_constants.h b/libc/src/__support/math/exp10_float16_constants.h new file mode 100644 index 0000000..f5928db --- /dev/null +++ b/libc/src/__support/math/exp10_float16_constants.h @@ -0,0 +1,43 @@ +//===-- Constants for exp10f16 function -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include <stdint.h> + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/CPP/array.h" + +namespace LIBC_NAMESPACE_DECL { + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN)); +static constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = { + 0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U, + 0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U, +}; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log2(10), SG, RN); +static constexpr float LOG2F_10 = 0x1.a934fp+1f; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log10(2), SG, RN); +static constexpr float LOG10F_2 = 0x1.344136p-2f; + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H diff --git a/libc/src/__support/math/exp10f16.h b/libc/src/__support/math/exp10f16.h new file mode 100644 index 0000000..0d8b125 --- /dev/null +++ b/libc/src/__support/math/exp10f16.h @@ -0,0 +1,141 @@ +//===-- Implementation header for exp10f16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "exp10f16_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/cpu_features.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT +static constexpr size_t N_EXP10F16_EXCEPTS = 5; +#else +static constexpr size_t N_EXP10F16_EXCEPTS = 8; +#endif + +static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS> + EXP10F16_EXCEPTS = {{ + // x = 0x1.8f4p-2, exp10f16(x) = 0x1.3ap+1 (RZ) + {0x363dU, 0x40e8U, 1U, 0U, 1U}, + // x = 0x1.95cp-2, exp10f16(x) = 0x1.3ecp+1 (RZ) + {0x3657U, 0x40fbU, 1U, 0U, 0U}, + // x = -0x1.018p-4, exp10f16(x) = 0x1.bbp-1 (RZ) + {0xac06U, 0x3aecU, 1U, 0U, 0U}, + // x = -0x1.c28p+0, exp10f16(x) = 0x1.1ccp-6 (RZ) + {0xbf0aU, 0x2473U, 1U, 0U, 0U}, + // x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ) + {0xc387U, 0x09a5U, 1U, 0U, 0U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ) + {0x4030U, 0x57c1U, 1U, 0U, 1U}, + // x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ) + {0x406eU, 0x591fU, 1U, 0U, 1U}, + // x = 0x1.1b8p+2, exp10f16(x) = 0x1.a4p+14 (RZ) + {0x446eU, 0x7690U, 1U, 0U, 1U}, +#endif + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +static constexpr float16 exp10f16(float16 x) { + using FPBits = fputil::FPBits<float16>; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| >= 5, or x is NaN. + if (LIBC_UNLIKELY(x_abs >= 0x4500U)) { + // exp10(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 5. + if (x_bits.is_pos()) { + // exp10(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x <= -8. + if (x_u >= 0xc800U) { + // exp10(-inf) = +0 + if (x_bits.is_inf()) + return FPBits::zero().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); + + if (fputil::fenv_is_round_up()) + return FPBits::min_subnormal().get_val(); + return FPBits::zero().get_val(); + } + } + + // When x is 1, 2, 3, or 4. These are hard-to-round cases with exact results. + if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { + switch (x_u) { + case 0x3c00U: // x = 1.0f16 + return fputil::cast<float16>(10.0); + case 0x4000U: // x = 2.0f16 + return fputil::cast<float16>(100.0); + case 0x4200U: // x = 3.0f16 + return fputil::cast<float16>(1'000.0); + case 0x4400U: // x = 4.0f16 + return fputil::cast<float16>(10'000.0); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // 10^x = 2^((hi + mid) * log2(10)) * 10^lo + auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); + return fputil::cast<float16>(exp2_hi_mid * exp10_lo); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H diff --git a/libc/src/__support/math/exp10f16_utils.h b/libc/src/__support/math/exp10f16_utils.h new file mode 100644 index 0000000..bffb81b --- /dev/null +++ b/libc/src/__support/math/exp10f16_utils.h @@ -0,0 +1,64 @@ +//===-- Common utils for exp10f16 -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "exp10_float16_constants.h" +#include "expf16_utils.h" +#include "src/__support/FPUtil/FPBits.h" + +namespace LIBC_NAMESPACE_DECL { + +LIBC_INLINE static constexpr ExpRangeReduction +exp10_range_reduction(float16 x) { + // For -8 < x < 5, to compute 10^x, we perform the following range reduction: + // find hi, mid, lo, such that: + // x = (hi + mid) * log2(10) + lo, in which + // hi is an integer, + // mid * 2^3 is an integer, + // -2^(-4) <= lo < 2^(-4). + // In particular, + // hi + mid = round(x * 2^3) * 2^(-3). + // Then, + // 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo + // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid + // by adding hi to the exponent field of 2^mid. 10^lo is computed using a + // degree-4 minimax polynomial generated by Sollya. + + float xf = x; + float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f)); + int x_hi_mid = static_cast<int>(kf); + unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3; + unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7; + // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x + float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf); + + uint32_t exp2_hi_mid_bits = + EXP2_MID_BITS[x_mid] + + static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN); + float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val(); + // Degree-4 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]); + // > 1 + x * P; + float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f, + 0x1.04b434p+1f, 0x1.2bcf9ep+0f); + return {exp2_hi_mid, exp10_lo}; +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_UTILS_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 99db743..fb253a4 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1477,20 +1477,8 @@ add_entrypoint_object( HDRS ../exp10f16.h DEPENDS - .expxf16 - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.CPP.array - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.exp10f16 + libc.src.errno.errno ) add_entrypoint_object( @@ -1519,7 +1507,6 @@ add_entrypoint_object( HDRS ../exp10m1f16.h DEPENDS - .expxf16 libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.cast @@ -1531,6 +1518,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.exp10f16_utils ) add_entrypoint_object( @@ -5023,10 +5011,11 @@ add_header_library( HDRS expxf16.h DEPENDS - libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.nearest_integer libc.src.__support.macros.attributes libc.src.__support.math.expf16_utils + libc.src.__support.math.exp10_float16_constants ) diff --git a/libc/src/math/generic/exp10f16.cpp b/libc/src/math/generic/exp10f16.cpp index 31abf3b..cb3c859 100644 --- a/libc/src/math/generic/exp10f16.cpp +++ b/libc/src/math/generic/exp10f16.cpp @@ -7,128 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/exp10f16.h" -#include "expxf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/CPP/array.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/exp10f16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT -static constexpr size_t N_EXP10F16_EXCEPTS = 5; -#else -static constexpr size_t N_EXP10F16_EXCEPTS = 8; -#endif - -static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS> - EXP10F16_EXCEPTS = {{ - // x = 0x1.8f4p-2, exp10f16(x) = 0x1.3ap+1 (RZ) - {0x363dU, 0x40e8U, 1U, 0U, 1U}, - // x = 0x1.95cp-2, exp10f16(x) = 0x1.3ecp+1 (RZ) - {0x3657U, 0x40fbU, 1U, 0U, 0U}, - // x = -0x1.018p-4, exp10f16(x) = 0x1.bbp-1 (RZ) - {0xac06U, 0x3aecU, 1U, 0U, 0U}, - // x = -0x1.c28p+0, exp10f16(x) = 0x1.1ccp-6 (RZ) - {0xbf0aU, 0x2473U, 1U, 0U, 0U}, - // x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ) - {0xc387U, 0x09a5U, 1U, 0U, 0U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ) - {0x4030U, 0x57c1U, 1U, 0U, 1U}, - // x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ) - {0x406eU, 0x591fU, 1U, 0U, 1U}, - // x = 0x1.1b8p+2, exp10f16(x) = 0x1.a4p+14 (RZ) - {0x446eU, 0x7690U, 1U, 0U, 1U}, -#endif - }}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, exp10f16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| >= 5, or x is NaN. - if (LIBC_UNLIKELY(x_abs >= 0x4500U)) { - // exp10(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 5. - if (x_bits.is_pos()) { - // exp10(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x <= -8. - if (x_u >= 0xc800U) { - // exp10(-inf) = +0 - if (x_bits.is_inf()) - return FPBits::zero().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); - - if (fputil::fenv_is_round_up()) - return FPBits::min_subnormal().get_val(); - return FPBits::zero().get_val(); - } - } - - // When x is 1, 2, 3, or 4. These are hard-to-round cases with exact results. - if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { - switch (x_u) { - case 0x3c00U: // x = 1.0f16 - return fputil::cast<float16>(10.0); - case 0x4000U: // x = 2.0f16 - return fputil::cast<float16>(100.0); - case 0x4200U: // x = 3.0f16 - return fputil::cast<float16>(1'000.0); - case 0x4400U: // x = 4.0f16 - return fputil::cast<float16>(10'000.0); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // 10^x = 2^((hi + mid) * log2(10)) * 10^lo - auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); - return fputil::cast<float16>(exp2_hi_mid * exp10_lo); -} +LLVM_LIBC_FUNCTION(float16, exp10f16, (float16 x)) { return math::exp10f16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/exp10m1f16.cpp b/libc/src/math/generic/exp10m1f16.cpp index 545c479..6c2fdbe 100644 --- a/libc/src/math/generic/exp10m1f16.cpp +++ b/libc/src/math/generic/exp10m1f16.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/exp10m1f16.h" -#include "expxf16.h" #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FEnvImpl.h" @@ -21,6 +20,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/exp10f16_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index 05ac95d..b17b14f 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -17,18 +17,11 @@ #include "src/__support/macros/config.h" #include <stdint.h> +#include "src/__support/math/exp10_float16_constants.h" #include "src/__support/math/expf16_utils.h" namespace LIBC_NAMESPACE_DECL { -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN)); -constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = { - 0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U, - 0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U, -}; - LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { // For -25 < x < 16, to compute 2^x, we perform the following range reduction: // find hi, mid, lo, such that: @@ -68,53 +61,6 @@ LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { // Generated by Sollya with the following commands: // > display = hexadecimal; -// > round(log2(10), SG, RN); -static constexpr float LOG2F_10 = 0x1.a934fp+1f; - -// Generated by Sollya with the following commands: -// > display = hexadecimal; -// > round(log10(2), SG, RN); -static constexpr float LOG10F_2 = 0x1.344136p-2f; - -LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) { - // For -8 < x < 5, to compute 10^x, we perform the following range reduction: - // find hi, mid, lo, such that: - // x = (hi + mid) * log2(10) + lo, in which - // hi is an integer, - // mid * 2^3 is an integer, - // -2^(-4) <= lo < 2^(-4). - // In particular, - // hi + mid = round(x * 2^3) * 2^(-3). - // Then, - // 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo - // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid - // by adding hi to the exponent field of 2^mid. 10^lo is computed using a - // degree-4 minimax polynomial generated by Sollya. - - float xf = x; - float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f)); - int x_hi_mid = static_cast<int>(kf); - unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3; - unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7; - // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x - float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf); - - uint32_t exp2_hi_mid_bits = - EXP2_MID_BITS[x_mid] + - static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN); - float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val(); - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]); - // > 1 + x * P; - float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f, - 0x1.04b434p+1f, 0x1.2bcf9ep+0f); - return {exp2_hi_mid, exp10_lo}; -} - -// Generated by Sollya with the following commands: -// > display = hexadecimal; // > round(log2(exp(1)), SG, RN); static constexpr float LOG2F_E = 0x1.715476p+0f; |