diff options
Diffstat (limited to 'libc/src')
35 files changed, 936 insertions, 712 deletions
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 6e447fc..37520ea 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -285,6 +285,9 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bit libc.src.__support.CPP.type_traits + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.FPUtil.generic.div + libc.src.__support.FPUtil.generic.mul libc.src.__support.macros.config libc.src.__support.macros.properties.types ) diff --git a/libc/src/__support/FPUtil/bfloat16.h b/libc/src/__support/FPUtil/bfloat16.h index fa45d73..3fab2b8 100644 --- a/libc/src/__support/FPUtil/bfloat16.h +++ b/libc/src/__support/FPUtil/bfloat16.h @@ -15,6 +15,9 @@ #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/comparison_operations.h" #include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/FPUtil/generic/div.h" +#include "src/__support/FPUtil/generic/mul.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/types.h" @@ -81,6 +84,28 @@ struct BFloat16 { LIBC_INLINE bool operator>=(BFloat16 other) const { return fputil::greater_than_or_equals(*this, other); } + + LIBC_INLINE constexpr BFloat16 operator-() const { + fputil::FPBits<bfloat16> result(*this); + result.set_sign(result.is_pos() ? Sign::NEG : Sign::POS); + return result.get_val(); + } + + LIBC_INLINE BFloat16 operator+(BFloat16 other) const { + return fputil::generic::add<BFloat16>(*this, other); + } + + LIBC_INLINE BFloat16 operator-(BFloat16 other) const { + return fputil::generic::sub<BFloat16>(*this, other); + } + + LIBC_INLINE BFloat16 operator*(BFloat16 other) const { + return fputil::generic::mul<bfloat16>(*this, other); + } + + LIBC_INLINE BFloat16 operator/(BFloat16 other) const { + return fputil::generic::div<bfloat16>(*this, other); + } }; // struct BFloat16 } // namespace fputil diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h index e999ece..54c80e8 100644 --- a/libc/src/__support/FPUtil/cast.h +++ b/libc/src/__support/FPUtil/cast.h @@ -27,47 +27,47 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_floating_point_v<OutType> && OutType> cast(InType x) { // Casting to the same type is a no-op. - if constexpr (cpp::is_same_v<InType, OutType>) + if constexpr (cpp::is_same_v<InType, OutType>) { return x; - - // bfloat16 is always defined (for now) - if constexpr (cpp::is_same_v<OutType, bfloat16> || - cpp::is_same_v<InType, bfloat16> + } else { + if constexpr (cpp::is_same_v<OutType, bfloat16> || + cpp::is_same_v<InType, bfloat16> #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION) - || cpp::is_same_v<OutType, float16> || - cpp::is_same_v<InType, float16> + || cpp::is_same_v<OutType, float16> || + cpp::is_same_v<InType, float16> #endif - ) { - using InFPBits = FPBits<InType>; - using InStorageType = typename InFPBits::StorageType; - using OutFPBits = FPBits<OutType>; - using OutStorageType = typename OutFPBits::StorageType; + ) { + using InFPBits = FPBits<InType>; + using InStorageType = typename InFPBits::StorageType; + using OutFPBits = FPBits<OutType>; + using OutStorageType = typename OutFPBits::StorageType; - InFPBits x_bits(x); + InFPBits x_bits(x); - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - raise_except_if_required(FE_INVALID); - return OutFPBits::quiet_nan().get_val(); - } + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + raise_except_if_required(FE_INVALID); + return OutFPBits::quiet_nan().get_val(); + } - InStorageType x_mant = x_bits.get_mantissa(); - if (InFPBits::FRACTION_LEN > OutFPBits::FRACTION_LEN) - x_mant >>= InFPBits::FRACTION_LEN - OutFPBits::FRACTION_LEN; - return OutFPBits::quiet_nan(x_bits.sign(), - static_cast<OutStorageType>(x_mant)) - .get_val(); - } + InStorageType x_mant = x_bits.get_mantissa(); + if (InFPBits::FRACTION_LEN > OutFPBits::FRACTION_LEN) + x_mant >>= InFPBits::FRACTION_LEN - OutFPBits::FRACTION_LEN; + return OutFPBits::quiet_nan(x_bits.sign(), + static_cast<OutStorageType>(x_mant)) + .get_val(); + } - if (x_bits.is_inf()) - return OutFPBits::inf(x_bits.sign()).get_val(); + if (x_bits.is_inf()) + return OutFPBits::inf(x_bits.sign()).get_val(); - constexpr size_t MAX_FRACTION_LEN = - cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN); - DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x); - return xd.template as<OutType, /*ShouldSignalExceptions=*/true>(); - } else { - return static_cast<OutType>(x); + constexpr size_t MAX_FRACTION_LEN = + cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN); + DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x); + return xd.template as<OutType, /*ShouldSignalExceptions=*/true>(); + } else { + return static_cast<OutType>(x); + } } } diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 3464e4a..cc0710f 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -576,7 +576,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a, // Check the leading bit directly, should be faster than using clz in // normalize(). if (result.mantissa.val[DyadicFloat<Bits>::MantissaType::WORD_COUNT - 1] >> - 63 == + (DyadicFloat<Bits>::MantissaType::WORD_SIZE - 1) == 0) result.shift_left(1); } else { diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt index 117213f..b75efc8 100644 --- a/libc/src/__support/FPUtil/generic/CMakeLists.txt +++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt @@ -68,6 +68,7 @@ add_header_library( libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.attributes libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types ) add_header_library( @@ -77,6 +78,7 @@ add_header_library( DEPENDS libc.hdr.errno_macros libc.hdr.fenv_macros + libc.src.__support.CPP.algorithm libc.src.__support.CPP.bit libc.src.__support.CPP.type_traits libc.src.__support.FPUtil.basic_operations diff --git a/libc/src/__support/FPUtil/generic/add_sub.h b/libc/src/__support/FPUtil/generic/add_sub.h index 7205d8d..b2e9d81 100644 --- a/libc/src/__support/FPUtil/generic/add_sub.h +++ b/libc/src/__support/FPUtil/generic/add_sub.h @@ -87,8 +87,12 @@ add_or_sub(InType x, InType y) { return OutFPBits::inf(x_bits.sign()).get_val(); } - if (y_bits.is_inf()) - return OutFPBits::inf(y_bits.sign()).get_val(); + if (y_bits.is_inf()) { + if constexpr (IsSub) + return OutFPBits::inf(y_bits.sign().negate()).get_val(); + else + return OutFPBits::inf(y_bits.sign()).get_val(); + } if (x_bits.is_zero()) { if (y_bits.is_zero()) { @@ -100,13 +104,22 @@ add_or_sub(InType x, InType y) { } } - // volatile prevents Clang from converting tmp to OutType and then - // immediately back to InType before negating it, resulting in double - // rounding. - volatile InType tmp = y; - if constexpr (IsSub) - tmp = -tmp; - return cast<OutType>(tmp); + if constexpr (cpp::is_same_v<InType, bfloat16> && + cpp::is_same_v<OutType, bfloat16>) { + OutFPBits y_bits(y); + if constexpr (IsSub) + y_bits.set_sign(y_bits.sign().negate()); + return y_bits.get_val(); + } else { + + // volatile prevents Clang from converting tmp to OutType and then + // immediately back to InType before negating it, resulting in double + // rounding. + volatile InType tmp = y; + if constexpr (IsSub) + tmp = -tmp; + return cast<OutType>(tmp); + } } if (y_bits.is_zero()) @@ -161,8 +174,8 @@ add_or_sub(InType x, InType y) { int alignment = (max_bits.get_biased_exponent() - max_bits.is_normal()) - (min_bits.get_biased_exponent() - min_bits.is_normal()); - InStorageType aligned_min_mant = - min_mant >> cpp::min(alignment, RESULT_MANTISSA_LEN); + InStorageType aligned_min_mant = static_cast<InStorageType>( + min_mant >> cpp::min(alignment, RESULT_MANTISSA_LEN)); bool aligned_min_mant_sticky; if (alignment <= GUARD_BITS_LEN) diff --git a/libc/src/__support/FPUtil/generic/div.h b/libc/src/__support/FPUtil/generic/div.h index f0e4057..bf7d0b7 100644 --- a/libc/src/__support/FPUtil/generic/div.h +++ b/libc/src/__support/FPUtil/generic/div.h @@ -11,6 +11,7 @@ #include "hdr/errno_macros.h" #include "hdr/fenv_macros.h" +#include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/BasicOperations.h" @@ -34,8 +35,9 @@ div(InType x, InType y) { using OutStorageType = typename OutFPBits::StorageType; using InFPBits = FPBits<InType>; using InStorageType = typename InFPBits::StorageType; - using DyadicFloat = - DyadicFloat<cpp::bit_ceil(static_cast<size_t>(InFPBits::SIG_LEN + 1))>; + using DyadicFloat = DyadicFloat<cpp::max( + static_cast<size_t>(16), + cpp::bit_ceil(static_cast<size_t>(InFPBits::SIG_LEN + 1)))>; InFPBits x_bits(x); InFPBits y_bits(y); @@ -78,7 +80,7 @@ div(InType x, InType y) { } if (y_bits.is_inf()) - return OutFPBits::inf(result_sign).get_val(); + return OutFPBits::zero(result_sign).get_val(); if (y_bits.is_zero()) { if (x_bits.is_zero()) { diff --git a/libc/src/__support/FPUtil/rounding_mode.h b/libc/src/__support/FPUtil/rounding_mode.h index 4ee0a0b..fdc8498 100644 --- a/libc/src/__support/FPUtil/rounding_mode.h +++ b/libc/src/__support/FPUtil/rounding_mode.h @@ -17,30 +17,24 @@ namespace LIBC_NAMESPACE_DECL { namespace fputil { +namespace generic { + // Quick free-standing test whether fegetround() == FE_UPWARD. // Using the following observation: // 1.0f + 2^-25 = 1.0f for FE_TONEAREST, FE_DOWNWARD, FE_TOWARDZERO // = 0x1.000002f for FE_UPWARD. -LIBC_INLINE static constexpr bool fenv_is_round_up() { - if (cpp::is_constant_evaluated()) { - return false; - } else { - volatile float x = 0x1.0p-25f; - return (1.0f + x != 1.0f); - } +LIBC_INLINE bool fenv_is_round_up() { + static volatile float x = 0x1.0p-25f; + return (1.0f + x != 1.0f); } // Quick free-standing test whether fegetround() == FE_DOWNWARD. // Using the following observation: // -1.0f - 2^-25 = -1.0f for FE_TONEAREST, FE_UPWARD, FE_TOWARDZERO // = -0x1.000002f for FE_DOWNWARD. -LIBC_INLINE static constexpr bool fenv_is_round_down() { - if (cpp::is_constant_evaluated()) { - return false; - } else { - volatile float x = 0x1.0p-25f; - return (-1.0f - x != -1.0f); - } +LIBC_INLINE bool fenv_is_round_down() { + static volatile float x = 0x1.0p-25f; + return (-1.0f - x != -1.0f); } // Quick free-standing test whether fegetround() == FE_TONEAREST. @@ -49,14 +43,10 @@ LIBC_INLINE static constexpr bool fenv_is_round_down() { // = 0x1.100002p0f for FE_UPWARD, // 1.5f - 2^-24 = 1.5f for FE_TONEAREST, FE_UPWARD // = 0x1.0ffffep-1f for FE_DOWNWARD, FE_TOWARDZERO -LIBC_INLINE static constexpr bool fenv_is_round_to_nearest() { - if (cpp::is_constant_evaluated()) { - return true; - } else { - volatile float x = 0x1.0p-24f; - float y = 1.5f + x; - return (y == 1.5f - x); - } +LIBC_INLINE bool fenv_is_round_to_nearest() { + static volatile float x = 0x1.0p-24f; + float y = 1.5f + x; + return (y == 1.5f - x); } // Quick free-standing test whether fegetround() == FE_TOWARDZERO. @@ -69,13 +59,56 @@ LIBC_INLINE static constexpr bool fenv_is_round_to_nearest() { // (0x1.000002p0f + 2^-24) + (-1.0f - 2^-24) = 2^-23 for FE_TOWARDZERO // = 2^-22 for FE_TONEAREST, FE_UPWARD // = 0 for FE_DOWNWARD +LIBC_INLINE bool fenv_is_round_to_zero() { + static volatile float x = 0x1.0p-24f; + float y = x; + return ((0x1.000002p0f + y) + (-1.0f - y) == 0x1.0p-23f); +} + +// Quick free standing get rounding mode based on the above observations. +LIBC_INLINE int quick_get_round() { + static volatile float x = 0x1.0p-24f; + float y = x; + float z = (0x1.000002p0f + y) + (-1.0f - y); + + if (z == 0.0f) + return FE_DOWNWARD; + if (z == 0x1.0p-23f) + return FE_TOWARDZERO; + return (2.0f + y == 2.0f) ? FE_TONEAREST : FE_UPWARD; +} + +} // namespace generic + +LIBC_INLINE static constexpr bool fenv_is_round_up() { + if (cpp::is_constant_evaluated()) { + return false; + } else { + return generic::fenv_is_round_up(); + } +} + +LIBC_INLINE static constexpr bool fenv_is_round_down() { + if (cpp::is_constant_evaluated()) { + return false; + } else { + return generic::fenv_is_round_down(); + } +} + +LIBC_INLINE static constexpr bool fenv_is_round_to_nearest() { + if (cpp::is_constant_evaluated()) { + return true; + } else { + return generic::fenv_is_round_to_nearest(); + } +} + LIBC_INLINE static constexpr bool fenv_is_round_to_zero() { if (cpp::is_constant_evaluated()) { return false; } else { - volatile float x = 0x1.0p-24f; - volatile float y = 0x1.000002p0f + x; - return (y + (-1.0f - x) == 0x1.0p-23f); + return generic::fenv_is_round_to_zero(); } } @@ -84,15 +117,7 @@ LIBC_INLINE static constexpr int quick_get_round() { if (cpp::is_constant_evaluated()) { return FE_TONEAREST; } else { - volatile float x = 0x1.0p-24f; - volatile float y = 0x1.000002p0f + x; - float z = y + (-1.0f - x); - - if (z == 0.0f) - return FE_DOWNWARD; - if (z == 0x1.0p-23f) - return FE_TOWARDZERO; - return (2.0f + x == 2.0f) ? FE_TONEAREST : FE_UPWARD; + return generic::quick_get_round(); } } diff --git a/libc/src/__support/OSUtil/linux/aarch64/vdso.h b/libc/src/__support/OSUtil/linux/aarch64/vdso.h index 3c4c620..ee5777a 100644 --- a/libc/src/__support/OSUtil/linux/aarch64/vdso.h +++ b/libc/src/__support/OSUtil/linux/aarch64/vdso.h @@ -23,6 +23,8 @@ LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { return "__kernel_clock_gettime"; case VDSOSym::ClockGetRes: return "__kernel_clock_getres"; + case VDSOSym::GetRandom: + return "__kernel_getrandom"; default: return ""; } diff --git a/libc/src/__support/OSUtil/linux/vdso_sym.h b/libc/src/__support/OSUtil/linux/vdso_sym.h index 968e153..01f0b72 100644 --- a/libc/src/__support/OSUtil/linux/vdso_sym.h +++ b/libc/src/__support/OSUtil/linux/vdso_sym.h @@ -35,7 +35,8 @@ enum class VDSOSym { RTSigReturn, FlushICache, RiscvHwProbe, - VDSOSymCount + GetRandom, + VDSOSymCount, }; template <VDSOSym sym> LIBC_INLINE constexpr auto dispatcher() { @@ -60,6 +61,9 @@ template <VDSOSym sym> LIBC_INLINE constexpr auto dispatcher() { else if constexpr (sym == VDSOSym::RiscvHwProbe) return static_cast<int (*)(riscv_hwprobe *, size_t, size_t, cpu_set_t *, unsigned)>(nullptr); + else if constexpr (sym == VDSOSym::GetRandom) + return static_cast<int (*)(void *, size_t, unsigned int, void *, size_t)>( + nullptr); else return static_cast<void *>(nullptr); } diff --git a/libc/src/__support/OSUtil/linux/x86_64/vdso.h b/libc/src/__support/OSUtil/linux/x86_64/vdso.h index abe7c33..f46fcb0 100644 --- a/libc/src/__support/OSUtil/linux/x86_64/vdso.h +++ b/libc/src/__support/OSUtil/linux/x86_64/vdso.h @@ -29,6 +29,8 @@ LIBC_INLINE constexpr cpp::string_view symbol_name(VDSOSym sym) { return "__vdso_time"; case VDSOSym::ClockGetRes: return "__vdso_clock_getres"; + case VDSOSym::GetRandom: + return "__vdso_getrandom"; default: return ""; } diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h index c647467..4ff374b 100644 --- a/libc/src/__support/macros/attributes.h +++ b/libc/src/__support/macros/attributes.h @@ -28,7 +28,32 @@ #define LIBC_INLINE_ASM __asm__ __volatile__ #define LIBC_UNUSED __attribute__((unused)) -#ifdef LIBC_TARGET_ARCH_IS_GPU +// Uses the platform specific specialization +#define LIBC_THREAD_MODE_PLATFORM 0 + +// Mutex guards nothing, used in single-threaded implementations +#define LIBC_THREAD_MODE_SINGLE 1 + +// Vendor provides implementation +#define LIBC_THREAD_MODE_EXTERNAL 2 + +// libcxx doesn't define LIBC_THREAD_MODE, unless that is passed in the command +// line in the CMake invocation. This defaults to the original implementation +// (before changes in https://github.com/llvm/llvm-project/pull/145358) +#ifndef LIBC_THREAD_MODE +#define LIBC_THREAD_MODE LIBC_THREAD_MODE_PLATFORM +#endif // LIBC_THREAD_MODE + +#if LIBC_THREAD_MODE != LIBC_THREAD_MODE_PLATFORM && \ + LIBC_THREAD_MODE != LIBC_THREAD_MODE_SINGLE && \ + LIBC_THREAD_MODE != LIBC_THREAD_MODE_EXTERNAL +#error LIBC_THREAD_MODE must be one of the following values: \ +LIBC_THREAD_MODE_PLATFORM, \ +LIBC_THREAD_MODE_SINGLE, \ +LIBC_THREAD_MODE_EXTERNAL. +#endif + +#if LIBC_THREAD_MODE == LIBC_THREAD_MODE_SINGLE #define LIBC_THREAD_LOCAL #else #define LIBC_THREAD_LOCAL thread_local diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 500dd9d..e1076ed 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -287,6 +287,21 @@ add_header_library( ) add_header_library( + atanhf16 + HDRS + atanhf16.h + DEPENDS + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h @@ -317,6 +332,21 @@ add_header_library( ) add_header_library( + cbrt + HDRS + cbrt.h + DEPENDS + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.optimization + libc.src.__support.integer_literals +) + +add_header_library( erff HDRS erff.h diff --git a/libc/src/__support/math/atanhf16.h b/libc/src/__support/math/atanhf16.h new file mode 100644 index 0000000..80929dd --- /dev/null +++ b/libc/src/__support/math/atanhf16.h @@ -0,0 +1,234 @@ +//===-- Implementation header for atanhf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +namespace atanhf16_internal { + +// Lookup table for logf(f) = logf(1 + n*2^(-7)) where n = 0..127, +// computed and stored as float precision constants. +// Generated by Sollya with the following commands: +// display = hexadecimal; +// for n from 0 to 127 do { print(single(1 / (1 + n / 128.0))); }; +static constexpr float ONE_OVER_F_FLOAT[128] = { + 0x1p0f, 0x1.fc07fp-1f, 0x1.f81f82p-1f, 0x1.f4465ap-1f, + 0x1.f07c2p-1f, 0x1.ecc07cp-1f, 0x1.e9131ap-1f, 0x1.e573acp-1f, + 0x1.e1e1e2p-1f, 0x1.de5d6ep-1f, 0x1.dae608p-1f, 0x1.d77b66p-1f, + 0x1.d41d42p-1f, 0x1.d0cb58p-1f, 0x1.cd8568p-1f, 0x1.ca4b3p-1f, + 0x1.c71c72p-1f, 0x1.c3f8fp-1f, 0x1.c0e07p-1f, 0x1.bdd2b8p-1f, + 0x1.bacf92p-1f, 0x1.b7d6c4p-1f, 0x1.b4e81cp-1f, 0x1.b20364p-1f, + 0x1.af286cp-1f, 0x1.ac5702p-1f, 0x1.a98ef6p-1f, 0x1.a6d01ap-1f, + 0x1.a41a42p-1f, 0x1.a16d4p-1f, 0x1.9ec8eap-1f, 0x1.9c2d14p-1f, + 0x1.99999ap-1f, 0x1.970e5p-1f, 0x1.948b1p-1f, 0x1.920fb4p-1f, + 0x1.8f9c18p-1f, 0x1.8d3018p-1f, 0x1.8acb9p-1f, 0x1.886e6p-1f, + 0x1.861862p-1f, 0x1.83c978p-1f, 0x1.818182p-1f, 0x1.7f406p-1f, + 0x1.7d05f4p-1f, 0x1.7ad22p-1f, 0x1.78a4c8p-1f, 0x1.767dcep-1f, + 0x1.745d18p-1f, 0x1.724288p-1f, 0x1.702e06p-1f, 0x1.6e1f76p-1f, + 0x1.6c16c2p-1f, 0x1.6a13cep-1f, 0x1.681682p-1f, 0x1.661ec6p-1f, + 0x1.642c86p-1f, 0x1.623fa8p-1f, 0x1.605816p-1f, 0x1.5e75bcp-1f, + 0x1.5c9882p-1f, 0x1.5ac056p-1f, 0x1.58ed24p-1f, 0x1.571ed4p-1f, + 0x1.555556p-1f, 0x1.539094p-1f, 0x1.51d07ep-1f, 0x1.501502p-1f, + 0x1.4e5e0ap-1f, 0x1.4cab88p-1f, 0x1.4afd6ap-1f, 0x1.49539ep-1f, + 0x1.47ae14p-1f, 0x1.460cbcp-1f, 0x1.446f86p-1f, 0x1.42d662p-1f, + 0x1.414142p-1f, 0x1.3fb014p-1f, 0x1.3e22ccp-1f, 0x1.3c995ap-1f, + 0x1.3b13b2p-1f, 0x1.3991c2p-1f, 0x1.381382p-1f, 0x1.3698ep-1f, + 0x1.3521dp-1f, 0x1.33ae46p-1f, 0x1.323e34p-1f, 0x1.30d19p-1f, + 0x1.2f684cp-1f, 0x1.2e025cp-1f, 0x1.2c9fb4p-1f, 0x1.2b404ap-1f, + 0x1.29e412p-1f, 0x1.288b02p-1f, 0x1.27350cp-1f, 0x1.25e228p-1f, + 0x1.24924ap-1f, 0x1.234568p-1f, 0x1.21fb78p-1f, 0x1.20b47p-1f, + 0x1.1f7048p-1f, 0x1.1e2ef4p-1f, 0x1.1cf06ap-1f, 0x1.1bb4a4p-1f, + 0x1.1a7b96p-1f, 0x1.194538p-1f, 0x1.181182p-1f, 0x1.16e068p-1f, + 0x1.15b1e6p-1f, 0x1.1485fp-1f, 0x1.135c82p-1f, 0x1.12358ep-1f, + 0x1.111112p-1f, 0x1.0fef02p-1f, 0x1.0ecf56p-1f, 0x1.0db20ap-1f, + 0x1.0c9714p-1f, 0x1.0b7e6ep-1f, 0x1.0a681p-1f, 0x1.0953f4p-1f, + 0x1.08421p-1f, 0x1.07326p-1f, 0x1.0624dep-1f, 0x1.05198p-1f, + 0x1.041042p-1f, 0x1.03091cp-1f, 0x1.020408p-1f, 0x1.010102p-1f}; + +// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, +// computed and stored as float precision constants. +// Generated by Sollya with the following commands: +// display = hexadecimal; +// for n from 0 to 127 do { print(single(log(1 + n / 128.0))); }; +static constexpr float LOG_F_FLOAT[128] = { + 0.0f, 0x1.fe02a6p-8f, 0x1.fc0a8cp-7f, 0x1.7b91bp-6f, + 0x1.f829bp-6f, 0x1.39e87cp-5f, 0x1.77459p-5f, 0x1.b42dd8p-5f, + 0x1.f0a30cp-5f, 0x1.16536ep-4f, 0x1.341d7ap-4f, 0x1.51b074p-4f, + 0x1.6f0d28p-4f, 0x1.8c345ep-4f, 0x1.a926d4p-4f, 0x1.c5e548p-4f, + 0x1.e27076p-4f, 0x1.fec914p-4f, 0x1.0d77e8p-3f, 0x1.1b72aep-3f, + 0x1.29553p-3f, 0x1.371fc2p-3f, 0x1.44d2b6p-3f, 0x1.526e5ep-3f, + 0x1.5ff308p-3f, 0x1.6d60fep-3f, 0x1.7ab89p-3f, 0x1.87fa06p-3f, + 0x1.9525aap-3f, 0x1.a23bc2p-3f, 0x1.af3c94p-3f, 0x1.bc2868p-3f, + 0x1.c8ff7cp-3f, 0x1.d5c216p-3f, 0x1.e27076p-3f, 0x1.ef0adcp-3f, + 0x1.fb9186p-3f, 0x1.04025ap-2f, 0x1.0a324ep-2f, 0x1.1058cp-2f, + 0x1.1675cap-2f, 0x1.1c898cp-2f, 0x1.22942p-2f, 0x1.2895a2p-2f, + 0x1.2e8e2cp-2f, 0x1.347ddap-2f, 0x1.3a64c6p-2f, 0x1.404308p-2f, + 0x1.4618bcp-2f, 0x1.4be5fap-2f, 0x1.51aad8p-2f, 0x1.576772p-2f, + 0x1.5d1bdcp-2f, 0x1.62c83p-2f, 0x1.686c82p-2f, 0x1.6e08eap-2f, + 0x1.739d8p-2f, 0x1.792a56p-2f, 0x1.7eaf84p-2f, 0x1.842d1ep-2f, + 0x1.89a338p-2f, 0x1.8f11e8p-2f, 0x1.947942p-2f, 0x1.99d958p-2f, + 0x1.9f323ep-2f, 0x1.a4840ap-2f, 0x1.a9cecap-2f, 0x1.af1294p-2f, + 0x1.b44f78p-2f, 0x1.b9858ap-2f, 0x1.beb4dap-2f, 0x1.c3dd7ap-2f, + 0x1.c8ff7cp-2f, 0x1.ce1afp-2f, 0x1.d32fe8p-2f, 0x1.d83e72p-2f, + 0x1.dd46ap-2f, 0x1.e24882p-2f, 0x1.e74426p-2f, 0x1.ec399ep-2f, + 0x1.f128f6p-2f, 0x1.f6124p-2f, 0x1.faf588p-2f, 0x1.ffd2ep-2f, + 0x1.02552ap-1f, 0x1.04bdfap-1f, 0x1.0723e6p-1f, 0x1.0986f4p-1f, + 0x1.0be72ep-1f, 0x1.0e4498p-1f, 0x1.109f3ap-1f, 0x1.12f71ap-1f, + 0x1.154c3ep-1f, 0x1.179eacp-1f, 0x1.19ee6cp-1f, 0x1.1c3b82p-1f, + 0x1.1e85f6p-1f, 0x1.20cdcep-1f, 0x1.23130ep-1f, 0x1.2555bcp-1f, + 0x1.2795e2p-1f, 0x1.29d38p-1f, 0x1.2c0e9ep-1f, 0x1.2e4744p-1f, + 0x1.307d74p-1f, 0x1.32b134p-1f, 0x1.34e28ap-1f, 0x1.37117cp-1f, + 0x1.393e0ep-1f, 0x1.3b6844p-1f, 0x1.3d9026p-1f, 0x1.3fb5b8p-1f, + 0x1.41d8fep-1f, 0x1.43f9fep-1f, 0x1.4618bcp-1f, 0x1.48353ep-1f, + 0x1.4a4f86p-1f, 0x1.4c679ap-1f, 0x1.4e7d82p-1f, 0x1.50913cp-1f, + 0x1.52a2d2p-1f, 0x1.54b246p-1f, 0x1.56bf9ep-1f, 0x1.58cadcp-1f, + 0x1.5ad404p-1f, 0x1.5cdb1ep-1f, 0x1.5ee02ap-1f, 0x1.60e33p-1f}; + +// x should be positive, normal finite value +// TODO: Simplify range reduction and polynomial degree for float16. +// See issue #137190. +LIBC_INLINE static float log_eval_f(float x) { + // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). + using FPBits = fputil::FPBits<float>; + FPBits xbits(x); + + float ex = static_cast<float>(xbits.get_exponent()); + // p1 is the leading 7 bits of mx, i.e. + // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). + int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); + + // Set bits to (1 + (mx - p1*2^(-7))) + xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); + xbits.set_biased_exponent(FPBits::EXP_BIAS); + // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). + float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; + + // Minimax polynomial for log(1 + dx), generated using Sollya: + // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); + // > Q = (P - 1) / x; + // > for i from 0 to degree(Q) do print(coeff(Q, i)); + constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, + 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; + + float dx2 = dx * dx; + + float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); + float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); + float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); + + float p = fputil::polyeval(dx2, dx, c1, c2, c3); + + // Generated by Sollya with the following commands: + // > display = hexadecimal; + // > round(log(2), SG, RN); + constexpr float LOGF_2 = 0x1.62e43p-1f; + + float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); + return result; +} + +} // namespace atanhf16_internal + +LIBC_INLINE static constexpr float16 atanhf16(float16 x) { + constexpr size_t N_EXCEPTS = 1; + constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANHF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.a5cp-4, atanhf16(x) = 0x1.a74p-4 (RZ) + {0x2E97, 0x2E9D, 1, 0, 0}, + }}; + + using namespace atanhf16_internal; + using FPBits = fputil::FPBits<float16>; + + FPBits xbits(x); + Sign sign = xbits.sign(); + uint16_t x_abs = xbits.abs().uintval(); + + // |x| >= 1 + if (LIBC_UNLIKELY(x_abs >= 0x3c00U)) { + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + + // |x| == 1.0 + if (x_abs == 0x3c00U) { + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_DIVBYZERO); + return FPBits::inf(sign).get_val(); + } + // |x| > 1.0 + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + if (auto r = ATANHF16_EXCEPTS.lookup(xbits.uintval()); + LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // For |x| less than approximately 0.24 + if (LIBC_UNLIKELY(x_abs <= 0x33f3U)) { + // atanh(+/-0) = +/-0 + if (LIBC_UNLIKELY(x_abs == 0U)) + return x; + // The Taylor expansion of atanh(x) is: + // atanh(x) = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + x^11/11 + // = x * [1 + x^2/3 + x^4/5 + x^6/7 + x^8/9 + x^10/11] + // When |x| < 2^-5 (0x0800U), this can be approximated by: + // atanh(x) ≈ x + (1/3)*x^3 + if (LIBC_UNLIKELY(x_abs < 0x0800U)) { + float xf = x; + return fputil::cast<float16>(xf + 0x1.555556p-2f * xf * xf * xf); + } + + // For 2^-5 <= |x| <= 0x1.fccp-3 (~0.24): + // Let t = x^2. + // Define P(t) ≈ (1/3)*t + (1/5)*t^2 + (1/7)*t^3 + (1/9)*t^4 + (1/11)*t^5. + // Coefficients (from Sollya, RN, hexadecimal): + // 1/3 = 0x1.555556p-2, 1/5 = 0x1.99999ap-3, 1/7 = 0x1.24924ap-3, + // 1/9 = 0x1.c71c72p-4, 1/11 = 0x1.745d18p-4 + // Thus, atanh(x) ≈ x * (1 + P(x^2)). + float xf = x; + float x2 = xf * xf; + float pe = fputil::polyeval(x2, 0.0f, 0x1.555556p-2f, 0x1.99999ap-3f, + 0x1.24924ap-3f, 0x1.c71c72p-4f, 0x1.745d18p-4f); + return fputil::cast<float16>(fputil::multiply_add(xf, pe, xf)); + } + + float xf = x; + return fputil::cast<float16>(0.5 * log_eval_f((xf + 1.0f) / (xf - 1.0f))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF16_H diff --git a/libc/src/__support/math/cbrt.h b/libc/src/__support/math/cbrt.h new file mode 100644 index 0000000..9d86bf3 --- /dev/null +++ b/libc/src/__support/math/cbrt.h @@ -0,0 +1,350 @@ +//===-- Implementation header for erff --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC___SUPPORT_MATH_CBRT_H +#define LIBC_SRC___SUPPORT_MATH_CBRT_H + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/integer_literals.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) +#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS +#endif + +namespace cbrt_internal { +using namespace fputil; + +// Initial approximation of x^(-2/3) for 1 <= x < 2. +// Polynomial generated by Sollya with: +// > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); +// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); +// 0x1.28...p-21 +LIBC_INLINE static double intial_approximation(double x) { + constexpr double COEFFS[8] = { + 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, + -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, + 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, + }; + + double x_sq = x * x; + + double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]); + double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]); + double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]); + double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]); + + double x_4 = x_sq * x_sq; + double d0 = fputil::multiply_add(x_sq, c1, c0); + double d1 = fputil::multiply_add(x_sq, c3, c2); + + return fputil::multiply_add(x_4, d1, d0); +} + +// Get the error term for Newton iteration: +// h(x) = x^3 * a^2 - 1, +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE +LIBC_INLINE static double get_error(const DoubleDouble &x_3, + const DoubleDouble &a_sq) { + return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + + fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); +} +#else +LIBC_INLINE static constexpr double get_error(const DoubleDouble &x_3, + const DoubleDouble &a_sq) { + DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); + return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; +} +#endif + +} // namespace cbrt_internal + +// Correctly rounded cbrt algorithm: +// +// === Step 1 - Range reduction === +// For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: +// x_r = 1.m +// a = (-1)^s * 2^(e % 3) * (1.m) +// Then cbrt(x) = x^(1/3) can be computed as: +// x^(1/3) = 2^(e / 3) * a^(1/3). +// +// In order to avoid division, we compute a^(-2/3) using Newton method and then +// multiply the results by a: +// a^(1/3) = a * a^(-2/3). +// +// === Step 2 - First approximation to a^(-2/3) === +// First, we use a degree-7 minimax polynomial generated by Sollya to +// approximate x_r^(-2/3) for 1 <= x_r < 2. +// p = P(x_r) ~ x_r^(-2/3), +// with relative errors bounded by: +// | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. +// +// Then we multiply with 2^(e % 3) from a small lookup table to get: +// x_0 = 2^(-2*(e % 3)/3) * p +// ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) +// = a^(-2/3) +// With relative errors: +// | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. +// This step is done in double precision. +// +// === Step 3 - First Newton iteration === +// We follow the method described in: +// Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation +// in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf +// to derive multiplicative Newton iterations as below: +// Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: +// h_n = x_n^3 * a^2 - 1 +// Then: +// a^(-2/3) = x_n / (1 + h_n)^(1/3) +// = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) +// using the Taylor series expansion of (1 + h_n)^(-1/3). +// +// Apply to x_0 above: +// h_0 = x_0^3 * a^2 - 1 +// = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), +// it's bounded by: +// |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. +// So in the first iteration step, we use: +// x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) +// Its relative error is bounded by: +// | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. +// Then we perform Ziv's rounding test and check if the answer is exact. +// This step is done in double-double precision. +// +// === Step 4 - Second Newton iteration === +// If the Ziv's rounding test from the previous step fails, we define the error +// term: +// h_1 = x_1^3 * a^2 - 1, +// And perform another iteration: +// x_2 = x_1 * (1 - h_1 / 3) +// with the relative errors exceed the precision of double-double. +// We then check the Ziv's accuracy test with relative errors < 2^-102 to +// compensate for rounding errors. +// +// === Step 5 - Final iteration === +// If the Ziv's accuracy test from the previous step fails, we perform another +// iteration in 128-bit precision and check for exact outputs. +// +// TODO: It is possible to replace this costly computation step with special +// exceptional handling, similar to what was done in the CORE-MATH project: +// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c + +LIBC_INLINE static constexpr double cbrt(double x) { + using DoubleDouble = fputil::DoubleDouble; + using namespace cbrt_internal; + using FPBits = fputil::FPBits<double>; + + uint64_t x_abs = FPBits(x).abs().uintval(); + + unsigned exp_bias_correction = 682; // 1023 * 2/3 + + if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || + x_abs >= FPBits::inf().uintval())) { + if (x == 0.0 || x_abs >= FPBits::inf().uintval()) + // x is 0, Inf, or NaN. + // Make sure it works for FTZ/DAZ modes. + return static_cast<double>(x + x); + + // x is non-zero denormal number. + // Normalize x. + x *= 0x1.0p60; + exp_bias_correction -= 20; + } + + FPBits x_bits(x); + + // When using biased exponent of x in double precision, + // x_e = real_exponent_of_x + 1023 + // Then: + // x_e / 3 = real_exponent_of_x / 3 + 1023/3 + // = real_exponent_of_x / 3 + 341 + // So to make it the correct biased exponent of x^(1/3), we add + // 1023 - 341 = 682 + // to the quotient x_e / 3. + unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent()); + unsigned out_e = (x_e / 3 + exp_bias_correction); + unsigned shift_e = x_e % 3; + + // Set x_r = 1.mantissa + double x_r = + FPBits(x_bits.get_mantissa() | + (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) + .get_val(); + + // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa) + uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; + a_bits |= + (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS)) + << FPBits::FRACTION_LEN); + double a = FPBits(a_bits).get_val(); + + // Initial approximation of x_r^(-2/3). + double p = intial_approximation(x_r); + + // Look up for 2^(-2*n/3) used for first approximation step. + constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, + 0x1.965fea53d6e3dp-2}; + + // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8. + // Relative error: < 1.16 * 2^(-21). + double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p); + + // First iteration in double precision. + DoubleDouble a_sq = fputil::exact_mult(a, a); + + // h0 = x0^3 * a^2 - 1 + DoubleDouble x0_sq = fputil::exact_mult(x0, x0); + DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq); + + double h0 = get_error(x0_3, a_sq); + +#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + constexpr double REL_ERROR = 0; +#else + constexpr double REL_ERROR = 0x1.0p-51; +#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + + // Taylor polynomial of (1 + h)^(-1/3): + // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ... + constexpr double ERR_COEFFS[3] = { + -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error + 0x1.c71c71c71c71cp-3, // 2/9 + -0x1.61f9add3c0ca4p-3, // -14/81 + }; + // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error. + double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]); + double x0_h0 = x0 * h0; + + // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81) + // x1 approximate a^(-2/3) with relative errors bounded by: + // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR + DoubleDouble x1_dd{x0_h0 * e0, x0}; + + // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3). + DoubleDouble r1 = fputil::quick_mult(a, x1_dd); + + // Lambda function to update the exponent of the result. + auto update_exponent = [=](double r) -> double { + uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; + // Adjust exponent and sign. + uint64_t r_bits = + r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN); + return FPBits(r_bits).get_val(); + }; + +#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS + // TODO: We probably don't need to use double-double if accurate tests and + // passes are skipped. + return update_exponent(r1.hi + r1.lo); +#else + // Accurate checks and passes. + double r1_lower = r1.hi + r1.lo; + double r1_upper = + r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo); + + // Ziv's accuracy test. + if (LIBC_LIKELY(r1_upper == r1_lower)) { + // Test for exact outputs. + // Check if lower (52 - 17 = 35) bits are 0's. + if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == + 0)) { + double r1_err = (r1_lower - r1.hi) - r1.lo; + if (FPBits(r1_err).abs().get_val() < 0x1.0p69) + fputil::clear_except_if_required(FE_INEXACT); + } + + return update_exponent(r1_lower); + } + + // Accuracy test failed, perform another Newton iteration. + double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; + + // Second iteration in double-double precision. + // h1 = x1^3 * a^2 - 1. + DoubleDouble x1_sq = fputil::exact_mult(x1, x1); + DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq); + double h1 = get_error(x1_3, a_sq); + + // e1 = -x1*h1/3. + double e1 = h1 * (x1 * -0x1.5555555555555p-2); + // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101. + DoubleDouble x2 = fputil::exact_add(x1, e1); + // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100. + DoubleDouble r2 = fputil::quick_mult(a, x2); + + double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo); + double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo); + + // Ziv's accuracy test. + if (LIBC_LIKELY(r2_upper == r2_lower)) + return update_exponent(r2_upper); + + using Float128 = fputil::DyadicFloat<128>; + + // TODO: Investigate removing float128 and just list exceptional cases. + // Apply another Newton iteration with ~126-bit accuracy. + Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo)); + // x2^3 + Float128 x2_3 = + fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128); + // a^2 + Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a)); + // x2^3 * a^2 + Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128); + // h2 = x2^3 * a^2 - 1 + Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0)); + double h2 = static_cast<double>(h2_f128); + // t2 = 1 - h2 / 3 + Float128 t2 = + fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2))); + // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3) + Float128 x3 = fputil::quick_mul(x2_f128, t2); + // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3) + Float128 r3 = fputil::quick_mul(Float128(a), x3); + + // Check for exact cases: + Float128::MantissaType rounding_bits = + r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; + + double result = static_cast<double>(r3); + if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || + (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { + // Output is exact. + r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; + + if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { + Float128 tmp{r3.sign, r3.exponent - 123, + 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; + Float128 r4 = fputil::quick_add(r3, tmp); + result = static_cast<double>(r4); + } else { + result = static_cast<double>(r3); + } + + fputil::clear_except_if_required(FE_INEXACT); + } + + return update_exponent(result); +#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_SRC___SUPPORT_MATH_CBRT_H diff --git a/libc/src/__support/threads/mutex.h b/libc/src/__support/threads/mutex.h index cbef0d0..f64f7e7 100644 --- a/libc/src/__support/threads/mutex.h +++ b/libc/src/__support/threads/mutex.h @@ -12,28 +12,6 @@ #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" -// Uses the platform specific specialization -#define LIBC_THREAD_MODE_PLATFORM 0 - -// Mutex guards nothing, used in single-threaded implementations -#define LIBC_THREAD_MODE_SINGLE 1 - -// Vendor provides implementation -#define LIBC_THREAD_MODE_EXTERNAL 2 - -#if !defined(LIBC_THREAD_MODE) -#error LIBC_THREAD_MODE is undefined -#endif // LIBC_THREAD_MODE - -#if LIBC_THREAD_MODE != LIBC_THREAD_MODE_PLATFORM && \ - LIBC_THREAD_MODE != LIBC_THREAD_MODE_SINGLE && \ - LIBC_THREAD_MODE != LIBC_THREAD_MODE_EXTERNAL -#error LIBC_THREAD_MODE must be one of the following values: \ -LIBC_THREAD_MODE_PLATFORM, \ -LIBC_THREAD_MODE_SINGLE, \ -LIBC_THREAD_MODE_EXTERNAL. -#endif - #if LIBC_THREAD_MODE == LIBC_THREAD_MODE_PLATFORM // Platform independent code will include this header file which pulls diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index 15d0f47..2667288 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -132,12 +132,6 @@ ErrorOr<char32_t> CharacterConverter::pop_utf32() { return utf32; } -size_t CharacterConverter::sizeAsUTF32() { - return 1; // a single utf-32 value can fit an entire character -} - -size_t CharacterConverter::sizeAsUTF8() { return state->total_bytes; } - ErrorOr<char8_t> CharacterConverter::pop_utf8() { if (isEmpty()) return Error(-1); @@ -170,5 +164,13 @@ ErrorOr<char8_t> CharacterConverter::pop_utf8() { return static_cast<char8_t>(output); } +template <> ErrorOr<char8_t> CharacterConverter::pop() { return pop_utf8(); } +template <> ErrorOr<char32_t> CharacterConverter::pop() { return pop_utf32(); } + +template <> size_t CharacterConverter::sizeAs<char8_t>() { + return state->total_bytes; +} +template <> size_t CharacterConverter::sizeAs<char32_t>() { return 1; } + } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index b6d918f..2cc28ab 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/mbstate.h" @@ -31,14 +32,14 @@ public: bool isEmpty(); bool isValidState(); - size_t sizeAsUTF32(); - size_t sizeAsUTF8(); + template <typename CharType> size_t sizeAs(); int push(char8_t utf8_byte); int push(char32_t utf32); ErrorOr<char8_t> pop_utf8(); ErrorOr<char32_t> pop_utf32(); + template <typename CharType> ErrorOr<CharType> pop(); }; } // namespace internal diff --git a/libc/src/__support/wchar/mbrtowc.cpp b/libc/src/__support/wchar/mbrtowc.cpp index 0f730d6..66cc68e 100644 --- a/libc/src/__support/wchar/mbrtowc.cpp +++ b/libc/src/__support/wchar/mbrtowc.cpp @@ -8,7 +8,6 @@ #include "src/__support/wchar/mbrtowc.h" #include "hdr/errno_macros.h" -#include "hdr/types/mbstate_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/common.h" diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h index 54e3152..6abb836 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.h +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -36,7 +36,7 @@ LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps, len, nmc); size_t dst_idx = 0; - ErrorOr<char32_t> converted = str_conv.popUTF32(); + ErrorOr<char32_t> converted = str_conv.pop<char32_t>(); while (converted.has_value()) { if (dst != nullptr) dst[dst_idx] = converted.value(); @@ -47,7 +47,7 @@ LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, return dst_idx; } dst_idx++; - converted = str_conv.popUTF32(); + converted = str_conv.pop<char32_t>(); } if (converted.error() == -1) { // if we hit conversion limit diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index 869ebdf..ba628bd 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/character_converter.h" @@ -53,9 +54,7 @@ public: size_t srclen = SIZE_MAX) : cr(ps), src(s), src_len(srclen), src_idx(0), num_to_write(dstlen) {} - // TODO: following functions are almost identical - // look into templating CharacterConverter pop functions - ErrorOr<char32_t> popUTF32() { + template <typename CharType> ErrorOr<CharType> pop() { if (num_to_write == 0) return Error(-1); @@ -64,7 +63,7 @@ public: if (!src_elements_read.has_value()) return Error(src_elements_read.error()); - if (cr.sizeAsUTF32() > num_to_write) { + if (cr.sizeAs<CharType>() > num_to_write) { cr.clear(); return Error(-1); } @@ -72,34 +71,9 @@ public: src_idx += src_elements_read.value(); } - auto out = cr.pop_utf32(); - if (out.has_value() && out.value() == L'\0') - src_len = src_idx; - - num_to_write--; - - return out; - } - - ErrorOr<char8_t> popUTF8() { - if (num_to_write == 0) - return Error(-1); - - if (cr.isEmpty() || src_idx == 0) { - auto src_elements_read = pushFullCharacter(); - if (!src_elements_read.has_value()) - return Error(src_elements_read.error()); - - if (cr.sizeAsUTF8() > num_to_write) { - cr.clear(); - return Error(-1); - } - - src_idx += src_elements_read.value(); - } - - auto out = cr.pop_utf8(); - if (out.has_value() && out.value() == '\0') + ErrorOr<CharType> out = cr.pop<CharType>(); + // if out isn't null terminator or an error + if (out.has_value() && out.value() == 0) src_len = src_idx; num_to_write--; diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 433097c..f593a0e 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -39,7 +39,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, reinterpret_cast<const char32_t *>(*ptr_to_src), ps, dest_len, num_src_widechars); size_t dst_idx = 0; - ErrorOr<char8_t> converted = str_conv.popUTF8(); + ErrorOr<char8_t> converted = str_conv.pop<char8_t>(); while (converted.has_value()) { if (dest != nullptr) dest[dst_idx] = converted.value(); @@ -51,7 +51,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, } dst_idx++; - converted = str_conv.popUTF8(); + converted = str_conv.pop<char8_t>(); } if (dest != nullptr) diff --git a/libc/src/dlfcn/CMakeLists.txt b/libc/src/dlfcn/CMakeLists.txt index e3a51ba..8ef0540 100644 --- a/libc/src/dlfcn/CMakeLists.txt +++ b/libc/src/dlfcn/CMakeLists.txt @@ -38,3 +38,25 @@ add_entrypoint_object( libc.include.dlfcn libc.src.errno.errno ) + +add_entrypoint_object( + dlinfo + SRCS + dlinfo.cpp + HDRS + dlinfo.h + DEPENDS + libc.include.dlfcn + libc.src.errno.errno +) + +add_entrypoint_object( + dladdr + SRCS + dladdr.cpp + HDRS + dladdr.h + DEPENDS + libc.include.dlfcn + libc.src.errno.errno +) diff --git a/libc/src/dlfcn/dladdr.cpp b/libc/src/dlfcn/dladdr.cpp new file mode 100644 index 0000000..61490fd --- /dev/null +++ b/libc/src/dlfcn/dladdr.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of dladdr ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dladdr.h" + +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +// TODO: https:// github.com/llvm/llvm-project/issues/97929 +LLVM_LIBC_FUNCTION(int, dladdr, (const void *addr, Dl_info *info)) { + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/dlfcn/dladdr.h b/libc/src/dlfcn/dladdr.h new file mode 100644 index 0000000..346fc8d --- /dev/null +++ b/libc/src/dlfcn/dladdr.h @@ -0,0 +1,20 @@ +//===-- Implementation header of dladdr -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLADDR_H +#define LLVM_LIBC_SRC_DLFCN_DLADDR_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int dladdr(const void *, Dl_info *); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_DLFCN_DLADDR_H diff --git a/libc/src/dlfcn/dlinfo.cpp b/libc/src/dlfcn/dlinfo.cpp new file mode 100644 index 0000000..d78cade --- /dev/null +++ b/libc/src/dlfcn/dlinfo.cpp @@ -0,0 +1,23 @@ + +//===-- Implementation of dlinfo ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "dlinfo.h" + +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +// TODO: https://github.com/llvm/llvm-project/issues/149911 +LLVM_LIBC_FUNCTION(int, dlinfo, + (void *restrict handle, int request, void *restrict info)) { + return -1; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/dlfcn/dlinfo.h b/libc/src/dlfcn/dlinfo.h new file mode 100644 index 0000000..c2c34f0 --- /dev/null +++ b/libc/src/dlfcn/dlinfo.h @@ -0,0 +1,20 @@ +//===-- Implementation header of dlinfo -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_DLFCN_DLINFO_H +#define LLVM_LIBC_SRC_DLFCN_DLINFO_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int dlinfo(void *restrict, int, void *restrict); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_DLFCN_DLINFO_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index bac043f..a866195 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3932,17 +3932,7 @@ add_entrypoint_object( HDRS ../atanhf16.h DEPENDS - .explogxf - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.atanhf16 ) add_entrypoint_object( @@ -4763,15 +4753,7 @@ add_entrypoint_object( HDRS ../cbrt.h DEPENDS - libc.hdr.fenv_macros - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization - libc.src.__support.integer_literals + libc.src.__support.math.cbrt ) add_entrypoint_object( diff --git a/libc/src/math/generic/atanhf16.cpp b/libc/src/math/generic/atanhf16.cpp index 57885ac..0539bac 100644 --- a/libc/src/math/generic/atanhf16.cpp +++ b/libc/src/math/generic/atanhf16.cpp @@ -7,92 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanhf16.h" -#include "explogxf.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/atanhf16.h" namespace LIBC_NAMESPACE_DECL { -static constexpr size_t N_EXCEPTS = 1; -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANHF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.a5cp-4, atanhf16(x) = 0x1.a74p-4 (RZ) - {0x2E97, 0x2E9D, 1, 0, 0}, -}}; - -LLVM_LIBC_FUNCTION(float16, atanhf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - - FPBits xbits(x); - Sign sign = xbits.sign(); - uint16_t x_abs = xbits.abs().uintval(); - - // |x| >= 1 - if (LIBC_UNLIKELY(x_abs >= 0x3c00U)) { - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - - // |x| == 1.0 - if (x_abs == 0x3c00U) { - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_DIVBYZERO); - return FPBits::inf(sign).get_val(); - } - // |x| > 1.0 - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - if (auto r = ATANHF16_EXCEPTS.lookup(xbits.uintval()); - LIBC_UNLIKELY(r.has_value())) - return r.value(); - - // For |x| less than approximately 0.24 - if (LIBC_UNLIKELY(x_abs <= 0x33f3U)) { - // atanh(+/-0) = +/-0 - if (LIBC_UNLIKELY(x_abs == 0U)) - return x; - // The Taylor expansion of atanh(x) is: - // atanh(x) = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + x^11/11 - // = x * [1 + x^2/3 + x^4/5 + x^6/7 + x^8/9 + x^10/11] - // When |x| < 2^-5 (0x0800U), this can be approximated by: - // atanh(x) ≈ x + (1/3)*x^3 - if (LIBC_UNLIKELY(x_abs < 0x0800U)) { - float xf = x; - return fputil::cast<float16>(xf + 0x1.555556p-2f * xf * xf * xf); - } - - // For 2^-5 <= |x| <= 0x1.fccp-3 (~0.24): - // Let t = x^2. - // Define P(t) ≈ (1/3)*t + (1/5)*t^2 + (1/7)*t^3 + (1/9)*t^4 + (1/11)*t^5. - // Coefficients (from Sollya, RN, hexadecimal): - // 1/3 = 0x1.555556p-2, 1/5 = 0x1.99999ap-3, 1/7 = 0x1.24924ap-3, - // 1/9 = 0x1.c71c72p-4, 1/11 = 0x1.745d18p-4 - // Thus, atanh(x) ≈ x * (1 + P(x^2)). - float xf = x; - float x2 = xf * xf; - float pe = fputil::polyeval(x2, 0.0f, 0x1.555556p-2f, 0x1.99999ap-3f, - 0x1.24924ap-3f, 0x1.c71c72p-4f, 0x1.745d18p-4f); - return fputil::cast<float16>(fputil::multiply_add(xf, pe, xf)); - } - - float xf = x; - return fputil::cast<float16>(0.5 * log_eval_f((xf + 1.0f) / (xf - 1.0f))); -} +LLVM_LIBC_FUNCTION(float16, atanhf16, (float16 x)) { return math::atanhf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp index ce227e6..e9b69bb 100644 --- a/libc/src/math/generic/cbrt.cpp +++ b/libc/src/math/generic/cbrt.cpp @@ -7,334 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cbrt.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY - -#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) -#define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS -#endif +#include "src/__support/math/cbrt.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - -namespace { - -// Initial approximation of x^(-2/3) for 1 <= x < 2. -// Polynomial generated by Sollya with: -// > P = fpminimax(x^(-2/3), 7, [|D...|], [1, 2]); -// > dirtyinfnorm(P/x^(-2/3) - 1, [1, 2]); -// 0x1.28...p-21 -double intial_approximation(double x) { - constexpr double COEFFS[8] = { - 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, - -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, - 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, - }; - - double x_sq = x * x; - - double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]); - double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]); - double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]); - double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]); - - double x_4 = x_sq * x_sq; - double d0 = fputil::multiply_add(x_sq, c1, c0); - double d1 = fputil::multiply_add(x_sq, c3, c2); - - return fputil::multiply_add(x_4, d1, d0); -} - -// Get the error term for Newton iteration: -// h(x) = x^3 * a^2 - 1, -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE -double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { - return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + - fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); -} -#else -double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { - DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); - return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; -} -#endif - -} // anonymous namespace - -// Correctly rounded cbrt algorithm: -// -// === Step 1 - Range reduction === -// For x = (-1)^s * 2^e * (1.m), we get 2 reduced arguments x_r and a as: -// x_r = 1.m -// a = (-1)^s * 2^(e % 3) * (1.m) -// Then cbrt(x) = x^(1/3) can be computed as: -// x^(1/3) = 2^(e / 3) * a^(1/3). -// -// In order to avoid division, we compute a^(-2/3) using Newton method and then -// multiply the results by a: -// a^(1/3) = a * a^(-2/3). -// -// === Step 2 - First approximation to a^(-2/3) === -// First, we use a degree-7 minimax polynomial generated by Sollya to -// approximate x_r^(-2/3) for 1 <= x_r < 2. -// p = P(x_r) ~ x_r^(-2/3), -// with relative errors bounded by: -// | p / x_r^(-2/3) - 1 | < 1.16 * 2^-21. -// -// Then we multiply with 2^(e % 3) from a small lookup table to get: -// x_0 = 2^(-2*(e % 3)/3) * p -// ~ 2^(-2*(e % 3)/3) * x_r^(-2/3) -// = a^(-2/3) -// With relative errors: -// | x_0 / a^(-2/3) - 1 | < 1.16 * 2^-21. -// This step is done in double precision. -// -// === Step 3 - First Newton iteration === -// We follow the method described in: -// Sibidanov, A. and Zimmermann, P., "Correctly rounded cubic root evaluation -// in double precision", https://core-math.gitlabpages.inria.fr/cbrt64.pdf -// to derive multiplicative Newton iterations as below: -// Let x_n be the nth approximation to a^(-2/3). Define the n^th error as: -// h_n = x_n^3 * a^2 - 1 -// Then: -// a^(-2/3) = x_n / (1 + h_n)^(1/3) -// = x_n * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3 + ...) -// using the Taylor series expansion of (1 + h_n)^(-1/3). -// -// Apply to x_0 above: -// h_0 = x_0^3 * a^2 - 1 -// = a^2 * (x_0 - a^(-2/3)) * (x_0^2 + x_0 * a^(-2/3) + a^(-4/3)), -// it's bounded by: -// |h_0| < 4 * 3 * 1.16 * 2^-21 * 4 < 2^-17. -// So in the first iteration step, we use: -// x_1 = x_0 * (1 - (1/3) * h_n + (2/9) * h_n^2 - (14/81) * h_n^3) -// Its relative error is bounded by: -// | x_1 / a^(-2/3) - 1 | < 35/242 * |h_0|^4 < 2^-70. -// Then we perform Ziv's rounding test and check if the answer is exact. -// This step is done in double-double precision. -// -// === Step 4 - Second Newton iteration === -// If the Ziv's rounding test from the previous step fails, we define the error -// term: -// h_1 = x_1^3 * a^2 - 1, -// And perform another iteration: -// x_2 = x_1 * (1 - h_1 / 3) -// with the relative errors exceed the precision of double-double. -// We then check the Ziv's accuracy test with relative errors < 2^-102 to -// compensate for rounding errors. -// -// === Step 5 - Final iteration === -// If the Ziv's accuracy test from the previous step fails, we perform another -// iteration in 128-bit precision and check for exact outputs. -// -// TODO: It is possible to replace this costly computation step with special -// exceptional handling, similar to what was done in the CORE-MATH project: -// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/cbrt/cbrt.c - -LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { - using FPBits = fputil::FPBits<double>; - - uint64_t x_abs = FPBits(x).abs().uintval(); - - unsigned exp_bias_correction = 682; // 1023 * 2/3 - - if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || - x_abs >= FPBits::inf().uintval())) { - if (x == 0.0 || x_abs >= FPBits::inf().uintval()) - // x is 0, Inf, or NaN. - // Make sure it works for FTZ/DAZ modes. - return static_cast<double>(x + x); - - // x is non-zero denormal number. - // Normalize x. - x *= 0x1.0p60; - exp_bias_correction -= 20; - } - - FPBits x_bits(x); - - // When using biased exponent of x in double precision, - // x_e = real_exponent_of_x + 1023 - // Then: - // x_e / 3 = real_exponent_of_x / 3 + 1023/3 - // = real_exponent_of_x / 3 + 341 - // So to make it the correct biased exponent of x^(1/3), we add - // 1023 - 341 = 682 - // to the quotient x_e / 3. - unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent()); - unsigned out_e = (x_e / 3 + exp_bias_correction); - unsigned shift_e = x_e % 3; - - // Set x_r = 1.mantissa - double x_r = - FPBits(x_bits.get_mantissa() | - (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) - .get_val(); - - // Set a = (-1)^x_sign * 2^(x_e % 3) * (1.mantissa) - uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; - a_bits |= - (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS)) - << FPBits::FRACTION_LEN); - double a = FPBits(a_bits).get_val(); - - // Initial approximation of x_r^(-2/3). - double p = intial_approximation(x_r); - - // Look up for 2^(-2*n/3) used for first approximation step. - constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, - 0x1.965fea53d6e3dp-2}; - - // x0 is an initial approximation of a^(-2/3) for 1 <= |a| < 8. - // Relative error: < 1.16 * 2^(-21). - double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p); - - // First iteration in double precision. - DoubleDouble a_sq = fputil::exact_mult(a, a); - - // h0 = x0^3 * a^2 - 1 - DoubleDouble x0_sq = fputil::exact_mult(x0, x0); - DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq); - - double h0 = get_error(x0_3, a_sq); - -#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS - constexpr double REL_ERROR = 0; -#else - constexpr double REL_ERROR = 0x1.0p-51; -#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS - - // Taylor polynomial of (1 + h)^(-1/3): - // (1 + h)^(-1/3) = 1 - h/3 + 2 h^2 / 9 - 14 h^3 / 81 + ... - constexpr double ERR_COEFFS[3] = { - -0x1.5555555555555p-2 - REL_ERROR, // -1/3 - relative_error - 0x1.c71c71c71c71cp-3, // 2/9 - -0x1.61f9add3c0ca4p-3, // -14/81 - }; - // e0 = -14 * h^2 / 81 + 2 * h / 9 - 1/3 - relative_error. - double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]); - double x0_h0 = x0 * h0; - - // x1 = x0 (1 - h0/3 + 2 h0^2 / 9 - 14 h0^3 / 81) - // x1 approximate a^(-2/3) with relative errors bounded by: - // | x1 / a^(-2/3) - 1 | < (34/243) h0^4 < h0 * REL_ERROR - DoubleDouble x1_dd{x0_h0 * e0, x0}; - - // r1 = x1 * a ~ a^(-2/3) * a = a^(1/3). - DoubleDouble r1 = fputil::quick_mult(a, x1_dd); - - // Lambda function to update the exponent of the result. - auto update_exponent = [=](double r) -> double { - uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; - // Adjust exponent and sign. - uint64_t r_bits = - r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN); - return FPBits(r_bits).get_val(); - }; - -#ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS - // TODO: We probably don't need to use double-double if accurate tests and - // passes are skipped. - return update_exponent(r1.hi + r1.lo); -#else - // Accurate checks and passes. - double r1_lower = r1.hi + r1.lo; - double r1_upper = - r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo); - - // Ziv's accuracy test. - if (LIBC_LIKELY(r1_upper == r1_lower)) { - // Test for exact outputs. - // Check if lower (52 - 17 = 35) bits are 0's. - if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == - 0)) { - double r1_err = (r1_lower - r1.hi) - r1.lo; - if (FPBits(r1_err).abs().get_val() < 0x1.0p69) - fputil::clear_except_if_required(FE_INEXACT); - } - - return update_exponent(r1_lower); - } - - // Accuracy test failed, perform another Newton iteration. - double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; - - // Second iteration in double-double precision. - // h1 = x1^3 * a^2 - 1. - DoubleDouble x1_sq = fputil::exact_mult(x1, x1); - DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq); - double h1 = get_error(x1_3, a_sq); - - // e1 = -x1*h1/3. - double e1 = h1 * (x1 * -0x1.5555555555555p-2); - // x2 = x1*(1 - h1/3) = x1 + e1 ~ a^(-2/3) with relative errors < 2^-101. - DoubleDouble x2 = fputil::exact_add(x1, e1); - // r2 = a * x2 ~ a * a^(-2/3) = a^(1/3) with relative errors < 2^-100. - DoubleDouble r2 = fputil::quick_mult(a, x2); - - double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo); - double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo); - - // Ziv's accuracy test. - if (LIBC_LIKELY(r2_upper == r2_lower)) - return update_exponent(r2_upper); - - // TODO: Investigate removing float128 and just list exceptional cases. - // Apply another Newton iteration with ~126-bit accuracy. - Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo)); - // x2^3 - Float128 x2_3 = - fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128); - // a^2 - Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a)); - // x2^3 * a^2 - Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128); - // h2 = x2^3 * a^2 - 1 - Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0)); - double h2 = static_cast<double>(h2_f128); - // t2 = 1 - h2 / 3 - Float128 t2 = - fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2))); - // x3 = x2 * (1 - h2 / 3) ~ a^(-2/3) - Float128 x3 = fputil::quick_mul(x2_f128, t2); - // r3 = a * x3 ~ a * a^(-2/3) = a^(1/3) - Float128 r3 = fputil::quick_mul(Float128(a), x3); - - // Check for exact cases: - Float128::MantissaType rounding_bits = - r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; - - double result = static_cast<double>(r3); - if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || - (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { - // Output is exact. - r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; - - if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { - Float128 tmp{r3.sign, r3.exponent - 123, - 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; - Float128 r4 = fputil::quick_add(r3, tmp); - result = static_cast<double>(r4); - } else { - result = static_cast<double>(r3); - } - - fputil::clear_except_if_required(FE_INEXACT); - } - - return update_exponent(result); -#endif // LIBC_MATH_CBRT_SKIP_ACCURATE_PASS -} +LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { return math::cbrt(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp index 42e3ff0..2a15df2 100644 --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -12,84 +12,6 @@ namespace LIBC_NAMESPACE_DECL { -// Lookup table for logf(f) = logf(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -// Generated by Sollya with the following commands: -// display = hexadecimal; -// for n from 0 to 127 do { print(single(1 / (1 + n / 128.0))); }; -const float ONE_OVER_F_FLOAT[128] = { - 0x1p0f, 0x1.fc07fp-1f, 0x1.f81f82p-1f, 0x1.f4465ap-1f, - 0x1.f07c2p-1f, 0x1.ecc07cp-1f, 0x1.e9131ap-1f, 0x1.e573acp-1f, - 0x1.e1e1e2p-1f, 0x1.de5d6ep-1f, 0x1.dae608p-1f, 0x1.d77b66p-1f, - 0x1.d41d42p-1f, 0x1.d0cb58p-1f, 0x1.cd8568p-1f, 0x1.ca4b3p-1f, - 0x1.c71c72p-1f, 0x1.c3f8fp-1f, 0x1.c0e07p-1f, 0x1.bdd2b8p-1f, - 0x1.bacf92p-1f, 0x1.b7d6c4p-1f, 0x1.b4e81cp-1f, 0x1.b20364p-1f, - 0x1.af286cp-1f, 0x1.ac5702p-1f, 0x1.a98ef6p-1f, 0x1.a6d01ap-1f, - 0x1.a41a42p-1f, 0x1.a16d4p-1f, 0x1.9ec8eap-1f, 0x1.9c2d14p-1f, - 0x1.99999ap-1f, 0x1.970e5p-1f, 0x1.948b1p-1f, 0x1.920fb4p-1f, - 0x1.8f9c18p-1f, 0x1.8d3018p-1f, 0x1.8acb9p-1f, 0x1.886e6p-1f, - 0x1.861862p-1f, 0x1.83c978p-1f, 0x1.818182p-1f, 0x1.7f406p-1f, - 0x1.7d05f4p-1f, 0x1.7ad22p-1f, 0x1.78a4c8p-1f, 0x1.767dcep-1f, - 0x1.745d18p-1f, 0x1.724288p-1f, 0x1.702e06p-1f, 0x1.6e1f76p-1f, - 0x1.6c16c2p-1f, 0x1.6a13cep-1f, 0x1.681682p-1f, 0x1.661ec6p-1f, - 0x1.642c86p-1f, 0x1.623fa8p-1f, 0x1.605816p-1f, 0x1.5e75bcp-1f, - 0x1.5c9882p-1f, 0x1.5ac056p-1f, 0x1.58ed24p-1f, 0x1.571ed4p-1f, - 0x1.555556p-1f, 0x1.539094p-1f, 0x1.51d07ep-1f, 0x1.501502p-1f, - 0x1.4e5e0ap-1f, 0x1.4cab88p-1f, 0x1.4afd6ap-1f, 0x1.49539ep-1f, - 0x1.47ae14p-1f, 0x1.460cbcp-1f, 0x1.446f86p-1f, 0x1.42d662p-1f, - 0x1.414142p-1f, 0x1.3fb014p-1f, 0x1.3e22ccp-1f, 0x1.3c995ap-1f, - 0x1.3b13b2p-1f, 0x1.3991c2p-1f, 0x1.381382p-1f, 0x1.3698ep-1f, - 0x1.3521dp-1f, 0x1.33ae46p-1f, 0x1.323e34p-1f, 0x1.30d19p-1f, - 0x1.2f684cp-1f, 0x1.2e025cp-1f, 0x1.2c9fb4p-1f, 0x1.2b404ap-1f, - 0x1.29e412p-1f, 0x1.288b02p-1f, 0x1.27350cp-1f, 0x1.25e228p-1f, - 0x1.24924ap-1f, 0x1.234568p-1f, 0x1.21fb78p-1f, 0x1.20b47p-1f, - 0x1.1f7048p-1f, 0x1.1e2ef4p-1f, 0x1.1cf06ap-1f, 0x1.1bb4a4p-1f, - 0x1.1a7b96p-1f, 0x1.194538p-1f, 0x1.181182p-1f, 0x1.16e068p-1f, - 0x1.15b1e6p-1f, 0x1.1485fp-1f, 0x1.135c82p-1f, 0x1.12358ep-1f, - 0x1.111112p-1f, 0x1.0fef02p-1f, 0x1.0ecf56p-1f, 0x1.0db20ap-1f, - 0x1.0c9714p-1f, 0x1.0b7e6ep-1f, 0x1.0a681p-1f, 0x1.0953f4p-1f, - 0x1.08421p-1f, 0x1.07326p-1f, 0x1.0624dep-1f, 0x1.05198p-1f, - 0x1.041042p-1f, 0x1.03091cp-1f, 0x1.020408p-1f, 0x1.010102p-1f}; - -// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -// Generated by Sollya with the following commands: -// display = hexadecimal; -// for n from 0 to 127 do { print(single(log(1 + n / 128.0))); }; -const float LOG_F_FLOAT[128] = { - 0.0f, 0x1.fe02a6p-8f, 0x1.fc0a8cp-7f, 0x1.7b91bp-6f, - 0x1.f829bp-6f, 0x1.39e87cp-5f, 0x1.77459p-5f, 0x1.b42dd8p-5f, - 0x1.f0a30cp-5f, 0x1.16536ep-4f, 0x1.341d7ap-4f, 0x1.51b074p-4f, - 0x1.6f0d28p-4f, 0x1.8c345ep-4f, 0x1.a926d4p-4f, 0x1.c5e548p-4f, - 0x1.e27076p-4f, 0x1.fec914p-4f, 0x1.0d77e8p-3f, 0x1.1b72aep-3f, - 0x1.29553p-3f, 0x1.371fc2p-3f, 0x1.44d2b6p-3f, 0x1.526e5ep-3f, - 0x1.5ff308p-3f, 0x1.6d60fep-3f, 0x1.7ab89p-3f, 0x1.87fa06p-3f, - 0x1.9525aap-3f, 0x1.a23bc2p-3f, 0x1.af3c94p-3f, 0x1.bc2868p-3f, - 0x1.c8ff7cp-3f, 0x1.d5c216p-3f, 0x1.e27076p-3f, 0x1.ef0adcp-3f, - 0x1.fb9186p-3f, 0x1.04025ap-2f, 0x1.0a324ep-2f, 0x1.1058cp-2f, - 0x1.1675cap-2f, 0x1.1c898cp-2f, 0x1.22942p-2f, 0x1.2895a2p-2f, - 0x1.2e8e2cp-2f, 0x1.347ddap-2f, 0x1.3a64c6p-2f, 0x1.404308p-2f, - 0x1.4618bcp-2f, 0x1.4be5fap-2f, 0x1.51aad8p-2f, 0x1.576772p-2f, - 0x1.5d1bdcp-2f, 0x1.62c83p-2f, 0x1.686c82p-2f, 0x1.6e08eap-2f, - 0x1.739d8p-2f, 0x1.792a56p-2f, 0x1.7eaf84p-2f, 0x1.842d1ep-2f, - 0x1.89a338p-2f, 0x1.8f11e8p-2f, 0x1.947942p-2f, 0x1.99d958p-2f, - 0x1.9f323ep-2f, 0x1.a4840ap-2f, 0x1.a9cecap-2f, 0x1.af1294p-2f, - 0x1.b44f78p-2f, 0x1.b9858ap-2f, 0x1.beb4dap-2f, 0x1.c3dd7ap-2f, - 0x1.c8ff7cp-2f, 0x1.ce1afp-2f, 0x1.d32fe8p-2f, 0x1.d83e72p-2f, - 0x1.dd46ap-2f, 0x1.e24882p-2f, 0x1.e74426p-2f, 0x1.ec399ep-2f, - 0x1.f128f6p-2f, 0x1.f6124p-2f, 0x1.faf588p-2f, 0x1.ffd2ep-2f, - 0x1.02552ap-1f, 0x1.04bdfap-1f, 0x1.0723e6p-1f, 0x1.0986f4p-1f, - 0x1.0be72ep-1f, 0x1.0e4498p-1f, 0x1.109f3ap-1f, 0x1.12f71ap-1f, - 0x1.154c3ep-1f, 0x1.179eacp-1f, 0x1.19ee6cp-1f, 0x1.1c3b82p-1f, - 0x1.1e85f6p-1f, 0x1.20cdcep-1f, 0x1.23130ep-1f, 0x1.2555bcp-1f, - 0x1.2795e2p-1f, 0x1.29d38p-1f, 0x1.2c0e9ep-1f, 0x1.2e4744p-1f, - 0x1.307d74p-1f, 0x1.32b134p-1f, 0x1.34e28ap-1f, 0x1.37117cp-1f, - 0x1.393e0ep-1f, 0x1.3b6844p-1f, 0x1.3d9026p-1f, 0x1.3fb5b8p-1f, - 0x1.41d8fep-1f, 0x1.43f9fep-1f, 0x1.4618bcp-1f, 0x1.48353ep-1f, - 0x1.4a4f86p-1f, 0x1.4c679ap-1f, 0x1.4e7d82p-1f, 0x1.50913cp-1f, - 0x1.52a2d2p-1f, 0x1.54b246p-1f, 0x1.56bf9ep-1f, 0x1.58cadcp-1f, - 0x1.5ad404p-1f, 0x1.5cdb1ep-1f, 0x1.5ee02ap-1f, 0x1.60e33p-1f}; - // Range reduction constants for logarithms. // r(0) = 1, r(127) = 0.5 // r(k) = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + k*2^-7)) diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h index 72b1d564..9ee31f0 100644 --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -17,14 +17,6 @@ namespace LIBC_NAMESPACE_DECL { -// Lookup table for (1/f) where f = 1 + n*2^(-7), n = 0..127, -// computed and stored as float precision constants. -extern const float ONE_OVER_F_FLOAT[128]; - -// Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127, -// computed and stored as float precision constants. -extern const float LOG_F_FLOAT[128]; - // Lookup table for range reduction constants r for logarithms. extern const float R[128]; diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index a2a6d60..72f8da8 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -121,49 +121,6 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { return r; } -// x should be positive, normal finite value -// TODO: Simplify range reduction and polynomial degree for float16. -// See issue #137190. -LIBC_INLINE static float log_eval_f(float x) { - // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). - using FPBits = fputil::FPBits<float>; - FPBits xbits(x); - - float ex = static_cast<float>(xbits.get_exponent()); - // p1 is the leading 7 bits of mx, i.e. - // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). - int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); - - // Set bits to (1 + (mx - p1*2^(-7))) - xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); - xbits.set_biased_exponent(FPBits::EXP_BIAS); - // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). - float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; - - // Minimax polynomial for log(1 + dx), generated using Sollya: - // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); - // > Q = (P - 1) / x; - // > for i from 0 to degree(Q) do print(coeff(Q, i)); - constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, - 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; - - float dx2 = dx * dx; - - float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); - float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); - float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); - - float p = fputil::polyeval(dx2, dx, c1, c2, c3); - - // Generated by Sollya with the following commands: - // > display = hexadecimal; - // > round(log(2), SG, RN); - constexpr float LOGF_2 = 0x1.62e43p-1f; - - float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); - return result; -} - } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp index e18f293..09f55d3 100644 --- a/libc/src/wctype/iswalpha.cpp +++ b/libc/src/wctype/iswalpha.cpp @@ -14,6 +14,6 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(bool, iswalpha, (wint_t c)) { return internal::iswalpha(c); } +LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) { return internal::iswalpha(c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wctype/iswalpha.h b/libc/src/wctype/iswalpha.h index 681fc6b..0353388 100644 --- a/libc/src/wctype/iswalpha.h +++ b/libc/src/wctype/iswalpha.h @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE_DECL { -bool iswalpha(wint_t c); +int iswalpha(wint_t c); } // namespace LIBC_NAMESPACE_DECL |