diff options
Diffstat (limited to 'libc')
-rw-r--r-- | libc/config/gpu/amdgpu/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/config/gpu/nvptx/entrypoints.txt | 1 | ||||
-rw-r--r-- | libc/include/math.yaml | 12 | ||||
-rw-r--r-- | libc/shared/math.h | 1 | ||||
-rw-r--r-- | libc/shared/math/atanhf16.h | 28 | ||||
-rw-r--r-- | libc/src/__support/math/CMakeLists.txt | 15 | ||||
-rw-r--r-- | libc/src/__support/math/atanhf16.h | 234 | ||||
-rw-r--r-- | libc/src/math/generic/CMakeLists.txt | 12 | ||||
-rw-r--r-- | libc/src/math/generic/atanhf16.cpp | 86 | ||||
-rw-r--r-- | libc/src/math/generic/common_constants.cpp | 78 | ||||
-rw-r--r-- | libc/src/math/generic/common_constants.h | 8 | ||||
-rw-r--r-- | libc/src/math/generic/explogxf.h | 43 | ||||
-rw-r--r-- | libc/test/shared/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/test/shared/shared_math_test.cpp | 1 |
14 files changed, 297 insertions, 224 deletions
diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt index e39819d..291a2d0 100644 --- a/libc/config/gpu/amdgpu/entrypoints.txt +++ b/libc/config/gpu/amdgpu/entrypoints.txt @@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf + libc.src.math.tanpif libc.src.math.tgamma libc.src.math.tgammaf libc.src.math.totalorder diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt index 26e3b15..55b27e6 100644 --- a/libc/config/gpu/nvptx/entrypoints.txt +++ b/libc/config/gpu/nvptx/entrypoints.txt @@ -490,6 +490,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf + libc.src.math.tanpif libc.src.math.tgamma libc.src.math.tgammaf libc.src.math.totalorder diff --git a/libc/include/math.yaml b/libc/include/math.yaml index 007be23..e8ac7ee 100644 --- a/libc/include/math.yaml +++ b/libc/include/math.yaml @@ -283,6 +283,12 @@ functions: return_type: float arguments: - type: float + - name: cospif + standards: + - stdc + return_type: float + arguments: + - type: float - name: cospif16 standards: - stdc @@ -2453,6 +2459,12 @@ functions: arguments: - type: _Float16 guard: LIBC_TYPES_HAS_FLOAT16 + - name: sinpif + standards: + - stdc + return_type: float + arguments: + - type: float - name: sinpif16 standards: - stdc diff --git a/libc/shared/math.h b/libc/shared/math.h index ddf219e..7fb736b 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -29,6 +29,7 @@ #include "math/atanf.h" #include "math/atanf16.h" #include "math/atanhf.h" +#include "math/atanhf16.h" #include "math/erff.h" #include "math/exp.h" #include "math/exp10.h" diff --git a/libc/shared/math/atanhf16.h b/libc/shared/math/atanhf16.h new file mode 100644 index 0000000..b7b5d77 --- /dev/null +++ b/libc/shared/math/atanhf16.h @@ -0,0 +1,28 @@ +//===-- Shared atanhf16 function --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ATANHF16_H +#define LLVM_LIBC_SHARED_MATH_ATANHF16_H + +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/atanhf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::atanhf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_ATANHF16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 500dd9d..9631ab5 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -287,6 +287,21 @@ add_header_library( ) add_header_library( + atanhf16 + HDRS + atanhf16.h + DEPENDS + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h diff --git a/libc/src/__support/math/atanhf16.h b/libc/src/__support/math/atanhf16.h new file mode 100644 index 0000000..80929dd --- /dev/null +++ b/libc/src/__support/math/atanhf16.h @@ -0,0 +1,234 @@ +//===-- Implementation header for atanhf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +namespace atanhf16_internal { + +// Lookup table for logf(f) = logf(1 + n*2^(-7)) where n = 0..127, +// computed and stored as float precision constants. +// Generated by Sollya with the following commands: +// display = hexadecimal; +// for n from 0 to 127 do { print(single(1 / (1 + n / 128.0))); }; +static constexpr float ONE_OVER_F_FLOAT[128] = { + 0x1p0f, 0x1.fc07fp-1f, 0x1.f81f82p-1f, 0x1.f4465ap-1f, + 0x1.f07c2p-1f, 0x1.ecc07cp-1f, 0x1.e9131ap-1f, 0x1.e573acp-1f, + 0x1.e1e1e2p-1f, 0x1.de5d6ep-1f, 0x1.dae608p-1f, 0x1.d77b66p-1f, + 0x1.d41d42p-1f, 0x1.d0cb58p-1f, 0x1.cd8568p-1f, 0x1.ca4b3p-1f, + 0x1.c71c72p-1f, 0x1.c3f8fp-1f, 0x1.c0e07p-1f, 0x1.bdd2b8p-1f, + 0x1.bacf92p-1f, 0x1.b7d6c4p-1f, 0x1.b4e81cp-1f, 0x1.b20364p-1f, + 0x1.af286cp-1f, 0x1.ac5702p-1f, 0x1.a98ef6p-1f, 0x1.a6d01ap-1f, + 0x1.a41a42p-1f, 0x1.a16d4p-1f, 0x1.9ec8eap-1f, 0x1.9c2d14p-1f, + 0x1.99999ap-1f, 0x1.970e5p-1f, 0x1.948b1p-1f, 0x1.920fb4p-1f, + 0x1.8f9c18p-1f, 0x1.8d3018p-1f, 0x1.8acb9p-1f, 0x1.886e6p-1f, + 0x1.861862p-1f, 0x1.83c978p-1f, 0x1.818182p-1f, 0x1.7f406p-1f, + 0x1.7d05f4p-1f, 0x1.7ad22p-1f, 0x1.78a4c8p-1f, 0x1.767dcep-1f, + 0x1.745d18p-1f, 0x1.724288p-1f, 0x1.702e06p-1f, 0x1.6e1f76p-1f, + 0x1.6c16c2p-1f, 0x1.6a13cep-1f, 0x1.681682p-1f, 0x1.661ec6p-1f, + 0x1.642c86p-1f, 0x1.623fa8p-1f, 0x1.605816p-1f, 0x1.5e75bcp-1f, + 0x1.5c9882p-1f, 0x1.5ac056p-1f, 0x1.58ed24p-1f, 0x1.571ed4p-1f, + 0x1.555556p-1f, 0x1.539094p-1f, 0x1.51d07ep-1f, 0x1.501502p-1f, + 0x1.4e5e0ap-1f, 0x1.4cab88p-1f, 0x1.4afd6ap-1f, 0x1.49539ep-1f, + 0x1.47ae14p-1f, 0x1.460cbcp-1f, 0x1.446f86p-1f, 0x1.42d662p-1f, + 0x1.414142p-1f, 0x1.3fb014p-1f, 0x1.3e22ccp-1f, 0x1.3c995ap-1f, + 0x1.3b13b2p-1f, 0x1.3991c2p-1f, 0x1.381382p-1f, 0x1.3698ep-1f, + 0x1.3521dp-1f, 0x1.33ae46p-1f, 0x1.323e34p-1f, 0x1.30d19p-1f, + 0x1.2f684cp-1f, 0x1.2e025cp-1f, 0x1.2c9fb4p-1f, 0x1.2b404ap-1f, + 0x1.29e412p-1f, 0x1.288b02p-1f, 0x1.27350cp-1f, 0x1.25e228p-1f, + 0x1.24924ap-1f, 0x1.234568p-1f, 0x1.21fb78p-1f, 0x1.20b47p-1f, + 0x1.1f7048p-1f, 0x1.1e2ef4p-1f, 0x1.1cf06ap-1f, 0x1.1bb4a4p-1f, + 0x1.1a7b96p-1f, 0x1.194538p-1f, 0x1.181182p-1f, 0x1.16e068p-1f, + 0x1.15b1e6p-1f, 0x1.1485fp-1f, 0x1.135c82p-1f, 0x1.12358ep-1f, + 0x1.111112p-1f, 0x1.0fef02p-1f, 0x1.0ecf56p-1f, 0x1.0db20ap-1f, + 0x1.0c9714p-1f, 0x1.0b7e6ep-1f, 0x1.0a681p-1f, 0x1.0953f4p-1f, + 0x1.08421p-1f, 0x1.07326p-1f, 0x1.0624dep-1f, 0x1.05198p-1f, + 0x1.041042p-1f, 0x1.03091cp-1f, 0x1.020408p-1f, 0x1.010102p-1f}; + +// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, +// computed and stored as float precision constants. +// Generated by Sollya with the following commands: +// display = hexadecimal; +// for n from 0 to 127 do { print(single(log(1 + n / 128.0))); }; +static constexpr float LOG_F_FLOAT[128] = { + 0.0f, 0x1.fe02a6p-8f, 0x1.fc0a8cp-7f, 0x1.7b91bp-6f, + 0x1.f829bp-6f, 0x1.39e87cp-5f, 0x1.77459p-5f, 0x1.b42dd8p-5f, + 0x1.f0a30cp-5f, 0x1.16536ep-4f, 0x1.341d7ap-4f, 0x1.51b074p-4f, + 0x1.6f0d28p-4f, 0x1.8c345ep-4f, 0x1.a926d4p-4f, 0x1.c5e548p-4f, + 0x1.e27076p-4f, 0x1.fec914p-4f, 0x1.0d77e8p-3f, 0x1.1b72aep-3f, + 0x1.29553p-3f, 0x1.371fc2p-3f, 0x1.44d2b6p-3f, 0x1.526e5ep-3f, + 0x1.5ff308p-3f, 0x1.6d60fep-3f, 0x1.7ab89p-3f, 0x1.87fa06p-3f, + 0x1.9525aap-3f, 0x1.a23bc2p-3f, 0x1.af3c94p-3f, 0x1.bc2868p-3f, + 0x1.c8ff7cp-3f, 0x1.d5c216p-3f, 0x1.e27076p-3f, 0x1.ef0adcp-3f, + 0x1.fb9186p-3f, 0x1.04025ap-2f, 0x1.0a324ep-2f, 0x1.1058cp-2f, + 0x1.1675cap-2f, 0x1.1c898cp-2f, 0x1.22942p-2f, 0x1.2895a2p-2f, + 0x1.2e8e2cp-2f, 0x1.347ddap-2f, 0x1.3a64c6p-2f, 0x1.404308p-2f, + 0x1.4618bcp-2f, 0x1.4be5fap-2f, 0x1.51aad8p-2f, 0x1.576772p-2f, + 0x1.5d1bdcp-2f, 0x1.62c83p-2f, 0x1.686c82p-2f, 0x1.6e08eap-2f, + 0x1.739d8p-2f, 0x1.792a56p-2f, 0x1.7eaf84p-2f, 0x1.842d1ep-2f, + 0x1.89a338p-2f, 0x1.8f11e8p-2f, 0x1.947942p-2f, 0x1.99d958p-2f, + 0x1.9f323ep-2f, 0x1.a4840ap-2f, 0x1.a9cecap-2f, 0x1.af1294p-2f, + 0x1.b44f78p-2f, 0x1.b9858ap-2f, 0x1.beb4dap-2f, 0x1.c3dd7ap-2f, + 0x1.c8ff7cp-2f, 0x1.ce1afp-2f, 0x1.d32fe8p-2f, 0x1.d83e72p-2f, + 0x1.dd46ap-2f, 0x1.e24882p-2f, 0x1.e74426p-2f, 0x1.ec399ep-2f, + 0x1.f128f6p-2f, 0x1.f6124p-2f, 0x1.faf588p-2f, 0x1.ffd2ep-2f, + 0x1.02552ap-1f, 0x1.04bdfap-1f, 0x1.0723e6p-1f, 0x1.0986f4p-1f, + 0x1.0be72ep-1f, 0x1.0e4498p-1f, 0x1.109f3ap-1f, 0x1.12f71ap-1f, + 0x1.154c3ep-1f, 0x1.179eacp-1f, 0x1.19ee6cp-1f, 0x1.1c3b82p-1f, + 0x1.1e85f6p-1f, 0x1.20cdcep-1f, 0x1.23130ep-1f, 0x1.2555bcp-1f, + 0x1.2795e2p-1f, 0x1.29d38p-1f, 0x1.2c0e9ep-1f, 0x1.2e4744p-1f, + 0x1.307d74p-1f, 0x1.32b134p-1f, 0x1.34e28ap-1f, 0x1.37117cp-1f, + 0x1.393e0ep-1f, 0x1.3b6844p-1f, 0x1.3d9026p-1f, 0x1.3fb5b8p-1f, + 0x1.41d8fep-1f, 0x1.43f9fep-1f, 0x1.4618bcp-1f, 0x1.48353ep-1f, + 0x1.4a4f86p-1f, 0x1.4c679ap-1f, 0x1.4e7d82p-1f, 0x1.50913cp-1f, + 0x1.52a2d2p-1f, 0x1.54b246p-1f, 0x1.56bf9ep-1f, 0x1.58cadcp-1f, + 0x1.5ad404p-1f, 0x1.5cdb1ep-1f, 0x1.5ee02ap-1f, 0x1.60e33p-1f}; + +// x should be positive, normal finite value +// TODO: Simplify range reduction and polynomial degree for float16. +// See issue #137190. +LIBC_INLINE static float log_eval_f(float x) { + // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). + using FPBits = fputil::FPBits<float>; + FPBits xbits(x); + + float ex = static_cast<float>(xbits.get_exponent()); + // p1 is the leading 7 bits of mx, i.e. + // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). + int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); + + // Set bits to (1 + (mx - p1*2^(-7))) + xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); + xbits.set_biased_exponent(FPBits::EXP_BIAS); + // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). + float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; + + // Minimax polynomial for log(1 + dx), generated using Sollya: + // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); + // > Q = (P - 1) / x; + // > for i from 0 to degree(Q) do print(coeff(Q, i)); + constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, + 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; + + float dx2 = dx * dx; + + float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); + float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); + float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); + + float p = fputil::polyeval(dx2, dx, c1, c2, c3); + + // Generated by Sollya with the following commands: + // > display = hexadecimal; + // > round(log(2), SG, RN); + constexpr float LOGF_2 = 0x1.62e43p-1f; + + float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); + return result; +} + +} // namespace atanhf16_internal + +LIBC_INLINE static constexpr float16 atanhf16(float16 x) { + constexpr size_t N_EXCEPTS = 1; + constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANHF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.a5cp-4, atanhf16(x) = 0x1.a74p-4 (RZ) + {0x2E97, 0x2E9D, 1, 0, 0}, + }}; + + using namespace atanhf16_internal; + using FPBits = fputil::FPBits<float16>; + + FPBits xbits(x); + Sign sign = xbits.sign(); + uint16_t x_abs = xbits.abs().uintval(); + + // |x| >= 1 + if (LIBC_UNLIKELY(x_abs >= 0x3c00U)) { + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + + // |x| == 1.0 + if (x_abs == 0x3c00U) { + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_DIVBYZERO); + return FPBits::inf(sign).get_val(); + } + // |x| > 1.0 + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + if (auto r = ATANHF16_EXCEPTS.lookup(xbits.uintval()); + LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // For |x| less than approximately 0.24 + if (LIBC_UNLIKELY(x_abs <= 0x33f3U)) { + // atanh(+/-0) = +/-0 + if (LIBC_UNLIKELY(x_abs == 0U)) + return x; + // The Taylor expansion of atanh(x) is: + // atanh(x) = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + x^11/11 + // = x * [1 + x^2/3 + x^4/5 + x^6/7 + x^8/9 + x^10/11] + // When |x| < 2^-5 (0x0800U), this can be approximated by: + // atanh(x) ≈ x + (1/3)*x^3 + if (LIBC_UNLIKELY(x_abs < 0x0800U)) { + float xf = x; + return fputil::cast<float16>(xf + 0x1.555556p-2f * xf * xf * xf); + } + + // For 2^-5 <= |x| <= 0x1.fccp-3 (~0.24): + // Let t = x^2. + // Define P(t) ≈ (1/3)*t + (1/5)*t^2 + (1/7)*t^3 + (1/9)*t^4 + (1/11)*t^5. + // Coefficients (from Sollya, RN, hexadecimal): + // 1/3 = 0x1.555556p-2, 1/5 = 0x1.99999ap-3, 1/7 = 0x1.24924ap-3, + // 1/9 = 0x1.c71c72p-4, 1/11 = 0x1.745d18p-4 + // Thus, atanh(x) ≈ x * (1 + P(x^2)). + float xf = x; + float x2 = xf * xf; + float pe = fputil::polyeval(x2, 0.0f, 0x1.555556p-2f, 0x1.99999ap-3f, + 0x1.24924ap-3f, 0x1.c71c72p-4f, 0x1.745d18p-4f); + return fputil::cast<float16>(fputil::multiply_add(xf, pe, xf)); + } + + float xf = x; + return fputil::cast<float16>(0.5 * log_eval_f((xf + 1.0f) / (xf - 1.0f))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index bac043f..9df9973 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3932,17 +3932,7 @@ add_entrypoint_object( HDRS ../atanhf16.h DEPENDS - .explogxf - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.atanhf16 ) add_entrypoint_object( diff --git a/libc/src/math/generic/atanhf16.cpp b/libc/src/math/generic/atanhf16.cpp index 57885ac..0539bac 100644 --- a/libc/src/math/generic/atanhf16.cpp +++ b/libc/src/math/generic/atanhf16.cpp @@ -7,92 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanhf16.h" -#include "explogxf.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/atanhf16.h" namespace LIBC_NAMESPACE_DECL { -static constexpr size_t N_EXCEPTS = 1; -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANHF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.a5cp-4, atanhf16(x) = 0x1.a74p-4 (RZ) - {0x2E97, 0x2E9D, 1, 0, 0}, -}}; - -LLVM_LIBC_FUNCTION(float16, atanhf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - - FPBits xbits(x); - Sign sign = xbits.sign(); - uint16_t x_abs = xbits.abs().uintval(); - - // |x| >= 1 - if (LIBC_UNLIKELY(x_abs >= 0x3c00U)) { - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - - // |x| == 1.0 - if (x_abs == 0x3c00U) { - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_DIVBYZERO); - return FPBits::inf(sign).get_val(); - } - // |x| > 1.0 - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - if (auto r = ATANHF16_EXCEPTS.lookup(xbits.uintval()); - LIBC_UNLIKELY(r.has_value())) - return r.value(); - - // For |x| less than approximately 0.24 - if (LIBC_UNLIKELY(x_abs <= 0x33f3U)) { - // atanh(+/-0) = +/-0 - if (LIBC_UNLIKELY(x_abs == 0U)) - return x; - // The Taylor expansion of atanh(x) is: - // atanh(x) = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + x^11/11 - // = x * [1 + x^2/3 + x^4/5 + x^6/7 + x^8/9 + x^10/11] - // When |x| < 2^-5 (0x0800U), this can be approximated by: - // atanh(x) ≈ x + (1/3)*x^3 - if (LIBC_UNLIKELY(x_abs < 0x0800U)) { - float xf = x; - return fputil::cast<float16>(xf + 0x1.555556p-2f * xf * xf * xf); - } - - // For 2^-5 <= |x| <= 0x1.fccp-3 (~0.24): - // Let t = x^2. - // Define P(t) ≈ (1/3)*t + (1/5)*t^2 + (1/7)*t^3 + (1/9)*t^4 + (1/11)*t^5. - // Coefficients (from Sollya, RN, hexadecimal): - // 1/3 = 0x1.555556p-2, 1/5 = 0x1.99999ap-3, 1/7 = 0x1.24924ap-3, - // 1/9 = 0x1.c71c72p-4, 1/11 = 0x1.745d18p-4 - // Thus, atanh(x) ≈ x * (1 + P(x^2)). - float xf = x; - float x2 = xf * xf; - float pe = fputil::polyeval(x2, 0.0f, 0x1.555556p-2f, 0x1.99999ap-3f, - 0x1.24924ap-3f, 0x1.c71c72p-4f, 0x1.745d18p-4f); - return fputil::cast<float16>(fputil::multiply_add(xf, pe, xf)); - } - - float xf = x; - return fputil::cast<float16>(0.5 * log_eval_f((xf + 1.0f) / (xf - 1.0f))); -} +LLVM_LIBC_FUNCTION(float16, atanhf16, (float16 x)) { return math::atanhf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp index 42e3ff0..2a15df2 100644 --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -12,84 +12,6 @@ namespace LIBC_NAMESPACE_DECL { -// Lookup table for logf(f) = logf(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -// Generated by Sollya with the following commands: -// display = hexadecimal; -// for n from 0 to 127 do { print(single(1 / (1 + n / 128.0))); }; -const float ONE_OVER_F_FLOAT[128] = { - 0x1p0f, 0x1.fc07fp-1f, 0x1.f81f82p-1f, 0x1.f4465ap-1f, - 0x1.f07c2p-1f, 0x1.ecc07cp-1f, 0x1.e9131ap-1f, 0x1.e573acp-1f, - 0x1.e1e1e2p-1f, 0x1.de5d6ep-1f, 0x1.dae608p-1f, 0x1.d77b66p-1f, - 0x1.d41d42p-1f, 0x1.d0cb58p-1f, 0x1.cd8568p-1f, 0x1.ca4b3p-1f, - 0x1.c71c72p-1f, 0x1.c3f8fp-1f, 0x1.c0e07p-1f, 0x1.bdd2b8p-1f, - 0x1.bacf92p-1f, 0x1.b7d6c4p-1f, 0x1.b4e81cp-1f, 0x1.b20364p-1f, - 0x1.af286cp-1f, 0x1.ac5702p-1f, 0x1.a98ef6p-1f, 0x1.a6d01ap-1f, - 0x1.a41a42p-1f, 0x1.a16d4p-1f, 0x1.9ec8eap-1f, 0x1.9c2d14p-1f, - 0x1.99999ap-1f, 0x1.970e5p-1f, 0x1.948b1p-1f, 0x1.920fb4p-1f, - 0x1.8f9c18p-1f, 0x1.8d3018p-1f, 0x1.8acb9p-1f, 0x1.886e6p-1f, - 0x1.861862p-1f, 0x1.83c978p-1f, 0x1.818182p-1f, 0x1.7f406p-1f, - 0x1.7d05f4p-1f, 0x1.7ad22p-1f, 0x1.78a4c8p-1f, 0x1.767dcep-1f, - 0x1.745d18p-1f, 0x1.724288p-1f, 0x1.702e06p-1f, 0x1.6e1f76p-1f, - 0x1.6c16c2p-1f, 0x1.6a13cep-1f, 0x1.681682p-1f, 0x1.661ec6p-1f, - 0x1.642c86p-1f, 0x1.623fa8p-1f, 0x1.605816p-1f, 0x1.5e75bcp-1f, - 0x1.5c9882p-1f, 0x1.5ac056p-1f, 0x1.58ed24p-1f, 0x1.571ed4p-1f, - 0x1.555556p-1f, 0x1.539094p-1f, 0x1.51d07ep-1f, 0x1.501502p-1f, - 0x1.4e5e0ap-1f, 0x1.4cab88p-1f, 0x1.4afd6ap-1f, 0x1.49539ep-1f, - 0x1.47ae14p-1f, 0x1.460cbcp-1f, 0x1.446f86p-1f, 0x1.42d662p-1f, - 0x1.414142p-1f, 0x1.3fb014p-1f, 0x1.3e22ccp-1f, 0x1.3c995ap-1f, - 0x1.3b13b2p-1f, 0x1.3991c2p-1f, 0x1.381382p-1f, 0x1.3698ep-1f, - 0x1.3521dp-1f, 0x1.33ae46p-1f, 0x1.323e34p-1f, 0x1.30d19p-1f, - 0x1.2f684cp-1f, 0x1.2e025cp-1f, 0x1.2c9fb4p-1f, 0x1.2b404ap-1f, - 0x1.29e412p-1f, 0x1.288b02p-1f, 0x1.27350cp-1f, 0x1.25e228p-1f, - 0x1.24924ap-1f, 0x1.234568p-1f, 0x1.21fb78p-1f, 0x1.20b47p-1f, - 0x1.1f7048p-1f, 0x1.1e2ef4p-1f, 0x1.1cf06ap-1f, 0x1.1bb4a4p-1f, - 0x1.1a7b96p-1f, 0x1.194538p-1f, 0x1.181182p-1f, 0x1.16e068p-1f, - 0x1.15b1e6p-1f, 0x1.1485fp-1f, 0x1.135c82p-1f, 0x1.12358ep-1f, - 0x1.111112p-1f, 0x1.0fef02p-1f, 0x1.0ecf56p-1f, 0x1.0db20ap-1f, - 0x1.0c9714p-1f, 0x1.0b7e6ep-1f, 0x1.0a681p-1f, 0x1.0953f4p-1f, - 0x1.08421p-1f, 0x1.07326p-1f, 0x1.0624dep-1f, 0x1.05198p-1f, - 0x1.041042p-1f, 0x1.03091cp-1f, 0x1.020408p-1f, 0x1.010102p-1f}; - -// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -// Generated by Sollya with the following commands: -// display = hexadecimal; -// for n from 0 to 127 do { print(single(log(1 + n / 128.0))); }; -const float LOG_F_FLOAT[128] = { - 0.0f, 0x1.fe02a6p-8f, 0x1.fc0a8cp-7f, 0x1.7b91bp-6f, - 0x1.f829bp-6f, 0x1.39e87cp-5f, 0x1.77459p-5f, 0x1.b42dd8p-5f, - 0x1.f0a30cp-5f, 0x1.16536ep-4f, 0x1.341d7ap-4f, 0x1.51b074p-4f, - 0x1.6f0d28p-4f, 0x1.8c345ep-4f, 0x1.a926d4p-4f, 0x1.c5e548p-4f, - 0x1.e27076p-4f, 0x1.fec914p-4f, 0x1.0d77e8p-3f, 0x1.1b72aep-3f, - 0x1.29553p-3f, 0x1.371fc2p-3f, 0x1.44d2b6p-3f, 0x1.526e5ep-3f, - 0x1.5ff308p-3f, 0x1.6d60fep-3f, 0x1.7ab89p-3f, 0x1.87fa06p-3f, - 0x1.9525aap-3f, 0x1.a23bc2p-3f, 0x1.af3c94p-3f, 0x1.bc2868p-3f, - 0x1.c8ff7cp-3f, 0x1.d5c216p-3f, 0x1.e27076p-3f, 0x1.ef0adcp-3f, - 0x1.fb9186p-3f, 0x1.04025ap-2f, 0x1.0a324ep-2f, 0x1.1058cp-2f, - 0x1.1675cap-2f, 0x1.1c898cp-2f, 0x1.22942p-2f, 0x1.2895a2p-2f, - 0x1.2e8e2cp-2f, 0x1.347ddap-2f, 0x1.3a64c6p-2f, 0x1.404308p-2f, - 0x1.4618bcp-2f, 0x1.4be5fap-2f, 0x1.51aad8p-2f, 0x1.576772p-2f, - 0x1.5d1bdcp-2f, 0x1.62c83p-2f, 0x1.686c82p-2f, 0x1.6e08eap-2f, - 0x1.739d8p-2f, 0x1.792a56p-2f, 0x1.7eaf84p-2f, 0x1.842d1ep-2f, - 0x1.89a338p-2f, 0x1.8f11e8p-2f, 0x1.947942p-2f, 0x1.99d958p-2f, - 0x1.9f323ep-2f, 0x1.a4840ap-2f, 0x1.a9cecap-2f, 0x1.af1294p-2f, - 0x1.b44f78p-2f, 0x1.b9858ap-2f, 0x1.beb4dap-2f, 0x1.c3dd7ap-2f, - 0x1.c8ff7cp-2f, 0x1.ce1afp-2f, 0x1.d32fe8p-2f, 0x1.d83e72p-2f, - 0x1.dd46ap-2f, 0x1.e24882p-2f, 0x1.e74426p-2f, 0x1.ec399ep-2f, - 0x1.f128f6p-2f, 0x1.f6124p-2f, 0x1.faf588p-2f, 0x1.ffd2ep-2f, - 0x1.02552ap-1f, 0x1.04bdfap-1f, 0x1.0723e6p-1f, 0x1.0986f4p-1f, - 0x1.0be72ep-1f, 0x1.0e4498p-1f, 0x1.109f3ap-1f, 0x1.12f71ap-1f, - 0x1.154c3ep-1f, 0x1.179eacp-1f, 0x1.19ee6cp-1f, 0x1.1c3b82p-1f, - 0x1.1e85f6p-1f, 0x1.20cdcep-1f, 0x1.23130ep-1f, 0x1.2555bcp-1f, - 0x1.2795e2p-1f, 0x1.29d38p-1f, 0x1.2c0e9ep-1f, 0x1.2e4744p-1f, - 0x1.307d74p-1f, 0x1.32b134p-1f, 0x1.34e28ap-1f, 0x1.37117cp-1f, - 0x1.393e0ep-1f, 0x1.3b6844p-1f, 0x1.3d9026p-1f, 0x1.3fb5b8p-1f, - 0x1.41d8fep-1f, 0x1.43f9fep-1f, 0x1.4618bcp-1f, 0x1.48353ep-1f, - 0x1.4a4f86p-1f, 0x1.4c679ap-1f, 0x1.4e7d82p-1f, 0x1.50913cp-1f, - 0x1.52a2d2p-1f, 0x1.54b246p-1f, 0x1.56bf9ep-1f, 0x1.58cadcp-1f, - 0x1.5ad404p-1f, 0x1.5cdb1ep-1f, 0x1.5ee02ap-1f, 0x1.60e33p-1f}; - // Range reduction constants for logarithms. // r(0) = 1, r(127) = 0.5 // r(k) = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + k*2^-7)) diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h index 72b1d564..9ee31f0 100644 --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -17,14 +17,6 @@ namespace LIBC_NAMESPACE_DECL { -// Lookup table for (1/f) where f = 1 + n*2^(-7), n = 0..127, -// computed and stored as float precision constants. -extern const float ONE_OVER_F_FLOAT[128]; - -// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -extern const float LOG_F_FLOAT[128]; - // Lookup table for range reduction constants r for logarithms. extern const float R[128]; diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index a2a6d60..72f8da8 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -121,49 +121,6 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { return r; } -// x should be positive, normal finite value -// TODO: Simplify range reduction and polynomial degree for float16. -// See issue #137190. -LIBC_INLINE static float log_eval_f(float x) { - // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). - using FPBits = fputil::FPBits<float>; - FPBits xbits(x); - - float ex = static_cast<float>(xbits.get_exponent()); - // p1 is the leading 7 bits of mx, i.e. - // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). - int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); - - // Set bits to (1 + (mx - p1*2^(-7))) - xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); - xbits.set_biased_exponent(FPBits::EXP_BIAS); - // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). - float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; - - // Minimax polynomial for log(1 + dx), generated using Sollya: - // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); - // > Q = (P - 1) / x; - // > for i from 0 to degree(Q) do print(coeff(Q, i)); - constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, - 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; - - float dx2 = dx * dx; - - float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); - float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); - float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); - - float p = fputil::polyeval(dx2, dx, c1, c2, c3); - - // Generated by Sollya with the following commands: - // > display = hexadecimal; - // > round(log(2), SG, RN); - constexpr float LOGF_2 = 0x1.62e43p-1f; - - float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); - return result; -} - } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index 34236ad..3d739c2 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -25,6 +25,7 @@ add_fp_unittest( libc.src.__support.math.atanf libc.src.__support.math.atanf16 libc.src.__support.math.atanhf + libc.src.__support.math.atanhf16 libc.src.__support.math.erff libc.src.__support.math.exp libc.src.__support.math.exp10 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index ffe1a24..6d3cf7c 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -20,6 +20,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) { EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16)); EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinhf16(0.0f16)); EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::atanf16(0.0f16)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::atanhf16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16)); |