diff options
Diffstat (limited to 'libc')
65 files changed, 1682 insertions, 725 deletions
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index e8f59c9..5e8278e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1264,6 +1264,7 @@ if(LLVM_LIBC_FULL_BUILD) # wchar.h entrypoints libc.src.wchar.mblen libc.src.wchar.mbrlen + libc.src.wchar.mbsinit libc.src.wchar.mbrtowc libc.src.wchar.mbtowc libc.src.wchar.wcrtomb diff --git a/libc/fuzzing/math/CMakeLists.txt b/libc/fuzzing/math/CMakeLists.txt index be63fe4..19416fc 100644 --- a/libc/fuzzing/math/CMakeLists.txt +++ b/libc/fuzzing/math/CMakeLists.txt @@ -205,3 +205,30 @@ add_libc_fuzzer( DEPENDS libc.src.math.cbrt ) + +add_libc_fuzzer( + fsqrt_fuzz + NEED_MPFR + SRCS + fsqrt_fuzz.cpp + DEPENDS + libc.src.math.fsqrt +) + +add_libc_fuzzer( + f16sqrt_fuzz + NEED_MPFR + SRCS + f16sqrt_fuzz.cpp + DEPENDS + libc.src.math.f16sqrt +) + +add_libc_fuzzer( + hypot_fuzz + NEED_MPFR + SRCS + hypot_fuzz.cpp + DEPENDS + libc.src.math.hypot +) diff --git a/libc/fuzzing/math/f16sqrt_fuzz.cpp b/libc/fuzzing/math/f16sqrt_fuzz.cpp new file mode 100644 index 0000000..9a097a5 --- /dev/null +++ b/libc/fuzzing/math/f16sqrt_fuzz.cpp @@ -0,0 +1,56 @@ +//===-- f16sqrt_fuzz.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc f16sqrt implementation. +/// +//===----------------------------------------------------------------------===// + +#include "src/math/f16sqrt.h" +#include "utils/MPFRWrapper/mpfr_inc.h" +#include <cstdint> +#include <cstring> +#include <iostream> +#include <math.h> + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + mpfr_t input; + mpfr_t out; + mpfr_init2(input, 53); + mpfr_init2(out, 128); + for (size_t i = 0; i < size / sizeof(double); ++i) { + double x; + std::memcpy(&x, data, sizeof(double)); + data += sizeof(double); + + // remove NaN, inf, and values outside the accepted range + if (isnan(x) || isinf(x) || x < 0) + continue; + // signed zeros already tested in unit tests + if (signbit(x) && x == 0.0) + continue; + + mpfr_set_d(input, x, MPFR_RNDN); + mpfr_sqrt(out, input, MPFR_RNDN); + float16 to_compare = mpfr_get_d(out, MPFR_RNDN); + + float16 result = LIBC_NAMESPACE::f16sqrt(x); + + if (result != to_compare) { + std::cout << std::hexfloat << "Failing input: " << x << std::endl; + std::cout << std::hexfloat + << "Failing output: " << static_cast<float>(result) + << std::endl; + std::cout << std::hexfloat + << "Expected: " << static_cast<float>(to_compare) << std::endl; + __builtin_trap(); + } + } + mpfr_clear(input); + mpfr_clear(out); + return 0; +} diff --git a/libc/fuzzing/math/fsqrt_fuzz.cpp b/libc/fuzzing/math/fsqrt_fuzz.cpp new file mode 100644 index 0000000..06bb231 --- /dev/null +++ b/libc/fuzzing/math/fsqrt_fuzz.cpp @@ -0,0 +1,53 @@ +//===-- fsqrt_fuzz.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc fsqrt implementation. +/// +//===----------------------------------------------------------------------===// + +#include "src/math/fsqrt.h" +#include "utils/MPFRWrapper/mpfr_inc.h" +#include <cstdint> +#include <cstring> +#include <iostream> +#include <math.h> + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + mpfr_t input; + mpfr_t out; + mpfr_init2(input, 53); + mpfr_init2(out, 128); + for (size_t i = 0; i < size / sizeof(double); ++i) { + double x; + std::memcpy(&x, data, sizeof(double)); + data += sizeof(double); + + // remove NaN, inf, and values outside the accepted range + if (isnan(x) || isinf(x) || x < 0) + continue; + // signed zeros already tested in unit tests + if (signbit(x) && x == 0.0) + continue; + + mpfr_set_d(input, x, MPFR_RNDN); + mpfr_sqrt(out, input, MPFR_RNDN); + float to_compare = mpfr_get_flt(out, MPFR_RNDN); + + float result = LIBC_NAMESPACE::fsqrt(x); + + if (result != to_compare) { + std::cout << std::hexfloat << "Failing input: " << x << std::endl; + std::cout << std::hexfloat << "Failing output: " << result << std::endl; + std::cout << std::hexfloat << "Expected: " << to_compare << std::endl; + __builtin_trap(); + } + } + mpfr_clear(input); + mpfr_clear(out); + return 0; +} diff --git a/libc/fuzzing/math/hypot_fuzz.cpp b/libc/fuzzing/math/hypot_fuzz.cpp new file mode 100644 index 0000000..6129e41 --- /dev/null +++ b/libc/fuzzing/math/hypot_fuzz.cpp @@ -0,0 +1,64 @@ +//===-- hypot_fuzz.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Fuzzing test for llvm-libc hypot implementation. +/// +//===----------------------------------------------------------------------===// + +#include "src/math/hypot.h" +#include "utils/MPFRWrapper/mpfr_inc.h" +#include <cstdint> +#include <cstring> +#include <iostream> +#include <math.h> + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + mpfr_t in_x; + mpfr_t in_y; + mpfr_t out; + mpfr_init2(in_x, 53); + mpfr_init2(in_y, 53); + mpfr_init2(out, 128); + + for (size_t i = 0; i < size / (2 * sizeof(double)); ++i) { + double x; + double y; + + std::memcpy(&x, data, sizeof(double)); + data += sizeof(double); + std::memcpy(&y, data, sizeof(double)); + data += sizeof(double); + + // remove NaN, inf, and signed zeros + if (isnan(x) || isinf(x) || (signbit(x) && x == 0.0)) + return 0; + if (isnan(y) || isinf(y) || (signbit(y) && y == 0.0)) + return 0; + + mpfr_set_d(in_x, x, MPFR_RNDN); + mpfr_set_d(in_y, y, MPFR_RNDN); + + int output = mpfr_hypot(out, in_x, in_y, MPFR_RNDN); + mpfr_subnormalize(out, output, MPFR_RNDN); + double to_compare = mpfr_get_d(out, MPFR_RNDN); + + double result = LIBC_NAMESPACE::hypot(x, y); + + if (result != to_compare) { + std::cout << std::hexfloat << "Failing x: " << x << std::endl; + std::cout << std::hexfloat << "Failing y: " << y << std::endl; + std::cout << std::hexfloat << "Failing output: " << result << std::endl; + std::cout << std::hexfloat << "Expected: " << to_compare << std::endl; + __builtin_trap(); + } + } + mpfr_clear(in_x); + mpfr_clear(in_y); + mpfr_clear(out); + return 0; +} diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 0285f19..6e1f595 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -53,6 +53,12 @@ functions: - type: wchar_t *__restrict - type: const char *__restrict - type: size_t + - name: mbsinit + standards: + - stdc + return_type: int + arguments: + - type: mbstate_t * - name: mblen standards: - stdc diff --git a/libc/shared/math.h b/libc/shared/math.h index e3c674c..e0f00f5 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -16,6 +16,11 @@ #include "math/acosf16.h" #include "math/acoshf.h" #include "math/acoshf16.h" +#include "math/acospif16.h" +#include "math/asin.h" +#include "math/asinf.h" +#include "math/asinf16.h" +#include "math/asinhf.h" #include "math/erff.h" #include "math/exp.h" #include "math/exp10.h" diff --git a/libc/shared/math/acospif16.h b/libc/shared/math/acospif16.h new file mode 100644 index 0000000..38225f2 --- /dev/null +++ b/libc/shared/math/acospif16.h @@ -0,0 +1,29 @@ +//===-- Shared acospif16 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ACOSPIF16_H +#define LLVM_LIBC_SHARED_MATH_ACOSPIF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "shared/libc_common.h" +#include "src/__support/math/acospif16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::acospif16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_ACOSPIF16_H diff --git a/libc/shared/math/asin.h b/libc/shared/math/asin.h new file mode 100644 index 0000000..0b2c8ea6 --- /dev/null +++ b/libc/shared/math/asin.h @@ -0,0 +1,23 @@ +//===-- Shared asin function ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ASIN_H +#define LLVM_LIBC_SHARED_MATH_ASIN_H + +#include "shared/libc_common.h" +#include "src/__support/math/asin.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::asin; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ASIN_H diff --git a/libc/shared/math/asinf.h b/libc/shared/math/asinf.h new file mode 100644 index 0000000..ac051bd --- /dev/null +++ b/libc/shared/math/asinf.h @@ -0,0 +1,23 @@ +//===-- Shared asinf function -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ASINF_H +#define LLVM_LIBC_SHARED_MATH_ASINF_H + +#include "shared/libc_common.h" +#include "src/__support/math/asinf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::asinf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ASINF_H diff --git a/libc/shared/math/asinf16.h b/libc/shared/math/asinf16.h new file mode 100644 index 0000000..d545e26 --- /dev/null +++ b/libc/shared/math/asinf16.h @@ -0,0 +1,28 @@ +//===-- Shared asinf16 function ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ASINF16_H +#define LLVM_LIBC_SHARED_MATH_ASINF16_H + +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/asinf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::asinf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_ASINF_H diff --git a/libc/shared/math/asinhf.h b/libc/shared/math/asinhf.h new file mode 100644 index 0000000..c4a5509 --- /dev/null +++ b/libc/shared/math/asinhf.h @@ -0,0 +1,23 @@ +//===-- Shared asinhf function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ASINHF_H +#define LLVM_LIBC_SHARED_MATH_ASINHF_H + +#include "shared/libc_common.h" +#include "src/__support/math/asinhf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::asinhf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ASINHF_H diff --git a/libc/shared/math/exp10f16.h b/libc/shared/math/exp10f16.h index af00787..d6ba067 100644 --- a/libc/shared/math/exp10f16.h +++ b/libc/shared/math/exp10f16.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SHARED_MATH_EXP10F_H -#define LLVM_LIBC_SHARED_MATH_EXP10F_H +#ifndef LLVM_LIBC_SHARED_MATH_EXP10F16_H +#define LLVM_LIBC_SHARED_MATH_EXP10F16_H #include "include/llvm-libc-macros/float16-macros.h" #include "shared/libc_common.h" @@ -26,4 +26,4 @@ using math::exp10f16; #endif // LIBC_TYPES_HAS_FLOAT16 -#endif // LLVM_LIBC_SHARED_MATH_EXP10F_H +#endif // LLVM_LIBC_SHARED_MATH_EXP10F16_H diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index 94f8b95..6e447fc 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -7,6 +7,7 @@ add_header_library( libc.hdr.fenv_macros libc.hdr.math_macros libc.hdr.stdint_proxy + libc.src.__support.CPP.type_traits libc.src.__support.macros.attributes libc.src.errno.errno ) diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index 7691088..7bd5643 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -12,6 +12,7 @@ #include "hdr/fenv_macros.h" #include "hdr/math_macros.h" #include "hdr/types/fenv_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" @@ -72,40 +73,58 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; } namespace LIBC_NAMESPACE_DECL { namespace fputil { -LIBC_INLINE static int clear_except_if_required([[maybe_unused]] int excepts) { +LIBC_INLINE static constexpr int +clear_except_if_required([[maybe_unused]] int excepts) { + if (cpp::is_constant_evaluated()) { + return 0; + } else { #ifndef LIBC_MATH_HAS_NO_EXCEPT - if (math_errhandling & MATH_ERREXCEPT) - return clear_except(excepts); + if (math_errhandling & MATH_ERREXCEPT) + return clear_except(excepts); #endif // LIBC_MATH_HAS_NO_EXCEPT - return 0; + return 0; + } } -LIBC_INLINE static int set_except_if_required([[maybe_unused]] int excepts) { +LIBC_INLINE static constexpr int +set_except_if_required([[maybe_unused]] int excepts) { + if (cpp::is_constant_evaluated()) { + return 0; + } else { #ifndef LIBC_MATH_HAS_NO_EXCEPT - if (math_errhandling & MATH_ERREXCEPT) - return set_except(excepts); + if (math_errhandling & MATH_ERREXCEPT) + return set_except(excepts); #endif // LIBC_MATH_HAS_NO_EXCEPT - return 0; + return 0; + } } -LIBC_INLINE static int raise_except_if_required([[maybe_unused]] int excepts) { +LIBC_INLINE static constexpr int +raise_except_if_required([[maybe_unused]] int excepts) { + if (cpp::is_constant_evaluated()) { + return 0; + } else { #ifndef LIBC_MATH_HAS_NO_EXCEPT - if (math_errhandling & MATH_ERREXCEPT) + if (math_errhandling & MATH_ERREXCEPT) #ifdef LIBC_TARGET_ARCH_IS_X86_64 - return raise_except</*SKIP_X87_FPU*/ true>(excepts); + return raise_except</*SKIP_X87_FPU*/ true>(excepts); #else // !LIBC_TARGET_ARCH_IS_X86 - return raise_except(excepts); + return raise_except(excepts); #endif // LIBC_TARGET_ARCH_IS_X86 #endif // LIBC_MATH_HAS_NO_EXCEPT - return 0; + return 0; + } } -LIBC_INLINE static void set_errno_if_required([[maybe_unused]] int err) { +LIBC_INLINE static constexpr void +set_errno_if_required([[maybe_unused]] int err) { + if (!cpp::is_constant_evaluated()) { #ifndef LIBC_MATH_HAS_NO_ERRNO - if (math_errhandling & MATH_ERRNO) - libc_errno = err; + if (math_errhandling & MATH_ERRNO) + libc_errno = err; #endif // LIBC_MATH_HAS_NO_ERRNO + } } } // namespace fputil diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 866aea7..2b78c4d 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -16,6 +16,7 @@ #include "allocator.h" +#include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/new.h" @@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024; constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE; constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1; constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8; +constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8; constexpr static uint32_t MIN_SIZE = 16; constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1; // The number of times to attempt claiming an in-progress slab allocation. -constexpr static uint32_t MAX_TRIES = 128; - -// A sentinel used to indicate an invalid but non-null pointer value. -constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max(); +constexpr static uint32_t MAX_TRIES = 1024; static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); @@ -70,8 +69,8 @@ static void rpc_free(void *ptr) { // Convert a potentially disjoint bitmask into an increasing integer per-lane // for use with indexing between gpu lanes. -static inline uint32_t lane_count(uint64_t lane_mask) { - return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1)); +static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) { + return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1)); } // Obtain an initial value to seed a random number generator. We use the rounded @@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) { void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) { uint64_t mask = gpu::get_lane_mask(); uint32_t workers = cpp::popcount(uniform); - for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers) + for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n; + i += workers) s[i] = c; } @@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) { return x && (x & (x - 1)) == 0; } -// Where this chunk size should start looking in the global array. -static inline constexpr uint32_t start_index(uint32_t chunk_index) { - return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) / - impl::get_chunk_id(SLAB_SIZE / 2); +// Where this chunk size should start looking in the global array. Small +// allocations are much more likely than large ones, so we give them the most +// space. We use a cubic easing function normalized on the possible chunks. +static inline constexpr uint32_t get_start_index(uint32_t chunk_size) { + constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2); + uint64_t norm = + (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk; + uint64_t bias = (norm * norm * norm) >> 32; + uint64_t inv = (1 << 16) - bias; + return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16); +} + +// Returns the id of the lane below this one that acts as its leader. +static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) { + uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0; + return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1; +} + +// We use a sentinal value to indicate a failed or in-progress allocation. +template <typename T> bool is_sentinel(const T &x) { + return x == cpp::numeric_limits<T>::max(); } } // namespace impl @@ -264,28 +281,33 @@ struct Slab { continue; // We try using any known empty bits from the previous attempt first. - uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask), - ~after ? (old_index & ~(BITS_IN_WORD - 1)) + - cpp::countr_zero(~after) - : impl::xorshift32(state)); + uint32_t start = gpu::shuffle( + mask, cpp::countr_zero(uniform & mask), + ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after) + : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD)); - uint32_t id = impl::lane_count(uniform & mask); + // Each lane tries to claim one bit in a single contiguous mask. + uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id()); uint32_t index = (start + id) % usable_bits(chunk_size); uint32_t slot = index / BITS_IN_WORD; uint32_t bit = index % BITS_IN_WORD; // Get the mask of bits destined for the same slot and coalesce it. - uint64_t match = uniform & gpu::match_any(mask, slot); - uint32_t length = cpp::popcount(match); - uint32_t bitmask = gpu::shuffle( - mask, cpp::countr_zero(match), - static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit); + uint32_t leader = impl::get_leader_id( + uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0), + gpu::get_lane_id()); + uint32_t length = cpp::popcount(uniform & mask) - + impl::lane_count(uniform & mask, leader); + uint32_t bitmask = + static_cast<uint32_t>( + (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1) + << bit; uint32_t before = 0; - if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match))) + if (gpu::get_lane_id() == leader) before = cpp::AtomicRef(get_bitfield()[slot]) .fetch_or(bitmask, cpp::MemoryOrder::RELAXED); - before = gpu::shuffle(mask, cpp::countr_zero(match), before); + before = gpu::shuffle(mask, leader, before); if (~before & (1 << bit)) result = ptr_from_index(index, chunk_size); else @@ -323,20 +345,20 @@ struct GuardPtr { private: struct RefCounter { // Indicates that the object is in its deallocation phase and thus invalid. - static constexpr uint64_t INVALID = uint64_t(1) << 63; + static constexpr uint32_t INVALID = uint32_t(1) << 31; // If a read preempts an unlock call we indicate this so the following // unlock call can swap out the helped bit and maintain exclusive ownership. - static constexpr uint64_t HELPED = uint64_t(1) << 62; + static constexpr uint32_t HELPED = uint32_t(1) << 30; // Resets the reference counter, cannot be reset to zero safely. - void reset(uint32_t n, uint64_t &count) { + void reset(uint32_t n, uint32_t &count) { counter.store(n, cpp::MemoryOrder::RELAXED); count = n; } // Acquire a slot in the reference counter if it is not invalid. - bool acquire(uint32_t n, uint64_t &count) { + bool acquire(uint32_t n, uint32_t &count) { count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n; return (count & INVALID) == 0; } @@ -349,7 +371,7 @@ private: // another thread resurrected the counter and we quit, or a parallel read // helped us invalidating it. For the latter, claim that flag and return. if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) { - uint64_t expected = 0; + uint32_t expected = 0; if (counter.compare_exchange_strong(expected, INVALID, cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) @@ -372,28 +394,29 @@ private: return (val & INVALID) ? 0 : val; } - cpp::Atomic<uint64_t> counter{0}; + cpp::Atomic<uint32_t> counter{0}; }; - cpp::Atomic<Slab *> ptr{nullptr}; - RefCounter ref{}; + cpp::Atomic<Slab *> ptr; + RefCounter ref; // Should be called be a single lane for each different pointer. template <typename... Args> - Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) { + Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) { Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED); if (!expected && ptr.compare_exchange_strong( - expected, reinterpret_cast<Slab *>(SENTINEL), + expected, + reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()), cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) { - count = cpp::numeric_limits<uint64_t>::max(); + count = cpp::numeric_limits<uint32_t>::max(); void *raw = impl::rpc_allocate(sizeof(Slab)); if (!raw) return nullptr; return new (raw) Slab(cpp::forward<Args>(args)...); } - if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL)) + if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected))) return nullptr; if (!ref.acquire(n, count)) @@ -405,7 +428,7 @@ private: // Finalize the associated memory and signal that it is ready to use by // resetting the counter. - void finalize(Slab *mem, uint32_t n, uint64_t &count) { + void finalize(Slab *mem, uint32_t n, uint32_t &count) { cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE); ptr.store(mem, cpp::MemoryOrder::RELAXED); cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); @@ -418,7 +441,7 @@ public: // The uniform mask represents which lanes share the same pointer. For each // uniform value we elect a leader to handle it on behalf of the other lanes. template <typename... Args> - Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count, + Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count, Args &&...args) { count = 0; Slab *result = nullptr; @@ -433,14 +456,15 @@ public: // We defer storing the newly allocated slab until now so that we can use // multiple lanes to initialize it and release it for use. - if (count == cpp::numeric_limits<uint64_t>::max()) { + if (impl::is_sentinel(count)) { result->initialize(uniform); if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform))) finalize(result, cpp::popcount(uniform), count); } - if (count != cpp::numeric_limits<uint64_t>::max()) - count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1; + if (!impl::is_sentinel(count)) + count = count - cpp::popcount(uniform) + + impl::lane_count(uniform, gpu::get_lane_id()) + 1; return result; } @@ -469,7 +493,7 @@ static GuardPtr slots[ARRAY_SIZE] = {}; // Keep a cache of the last successful slot for each chunk size. Initialize it // to an even spread of the total size. Must be updated if the chunking scheme // changes. -#define S(X) (impl::start_index(X)) +#define S(X) (impl::get_start_index(X)) static cpp::Atomic<uint32_t> indices[] = { S(16), S(32), S(48), S(64), S(96), S(112), S(128), S(192), S(224), S(256), S(384), S(448), S(512), S(768), @@ -481,26 +505,28 @@ static cpp::Atomic<uint32_t> indices[] = { #undef S // Tries to find a slab in the table that can support the given chunk size. -static Slab *find_slab(uint32_t chunk_size) { +static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { // We start at the index of the last successful allocation for this kind. uint32_t chunk_id = impl::get_chunk_id(chunk_size); uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED); - uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); - for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) { + for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) { uint32_t index = - !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE; + !offset ? start + : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE; - if (slots[index].use_count() < Slab::available_chunks(chunk_size)) { + if (!offset || + slots[index].use_count() < Slab::available_chunks(chunk_size)) { uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t reserved = 0; + uint32_t reserved = 0; Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); // If there is a slab allocation in progress we retry a few times. for (uint32_t retries = 0; - retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) { + !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES; + retries++) { uint64_t lane_mask = gpu::get_lane_mask(); slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); @@ -514,13 +540,17 @@ static Slab *find_slab(uint32_t chunk_size) { slab->get_chunk_size() == chunk_size) { if (index != start) indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED); + uniform = uniform & gpu::get_lane_mask(); return slab; } else if (slab && (reserved > Slab::available_chunks(chunk_size) || slab->get_chunk_size() != chunk_size)) { slots[index].unlock(gpu::get_lane_mask(), gpu::get_lane_mask() & uniform); - } else if (!slab && reserved == SENTINEL) { + } else if (!slab && impl::is_sentinel(reserved)) { + uniform = uniform & gpu::get_lane_mask(); return nullptr; + } else { + sleep_briefly(); } } } @@ -547,12 +577,12 @@ void *allocate(uint64_t size) { // Try to find a slab for the rounded up chunk size and allocate from it. uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size)); - Slab *slab = find_slab(chunk_size); - if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL)) + uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); + Slab *slab = find_slab(chunk_size, uniform); + if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab))) return nullptr; uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index()); void *ptr = slab->allocate(lane_mask, uniform); return ptr; } diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 926bbd5..13f46a1 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -4,7 +4,6 @@ add_header_library( acos.h DEPENDS .asin_utils - libc.src.__support.math.asin_utils libc.src.__support.FPUtil.double_double libc.src.__support.FPUtil.dyadic_float libc.src.__support.FPUtil.fenv_impl @@ -96,6 +95,21 @@ add_header_library( ) add_header_library( + acospif16 + HDRS + acospif16.h + DEPENDS + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types +) + +add_header_library( asin_utils HDRS asin_utils.h @@ -110,6 +124,67 @@ add_header_library( ) add_header_library( + asin + HDRS + asin.h + DEPENDS + .asin_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features +) + +add_header_library( + asinhf + HDRS + asinhf.h + DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( + asinf + HDRS + asinf.h + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features +) + +add_header_library( + asinf16 + HDRS + asinf16.h + DEPENDS + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization +) + +add_header_library( erff HDRS erff.h @@ -358,11 +433,10 @@ add_header_library( DEPENDS .exp10f16_utils libc.src.__support.FPUtil.fp_bits - src.__support.FPUtil.FEnvImpl - src.__support.FPUtil.FPBits - src.__support.FPUtil.cast - src.__support.FPUtil.rounding_mode - src.__support.FPUtil.except_value_utils - src.__support.macros.optimization - src.__support.macros.properties.cpu_features + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features ) diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h index a52ead7..0e1e413 100644 --- a/libc/src/__support/math/acos.h +++ b/libc/src/__support/math/acos.h @@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr double acos(double x) { +LIBC_INLINE static constexpr double acos(double x) { using DoubleDouble = fputil::DoubleDouble; using namespace asin_internal; using FPBits = fputil::FPBits<double>; diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h index 153087e..7a0c0e5 100644 --- a/libc/src/__support/math/acosf.h +++ b/libc/src/__support/math/acosf.h @@ -45,7 +45,7 @@ static constexpr fputil::ExceptValues<float, N_EXCEPTS> ACOSF_EXCEPTS = {{ } // namespace acosf_internal -static constexpr float acosf(float x) { +LIBC_INLINE static constexpr float acosf(float x) { using namespace acosf_internal; using namespace inv_trigf_utils_internal; using FPBits = typename fputil::FPBits<float>; diff --git a/libc/src/__support/math/acosf16.h b/libc/src/__support/math/acosf16.h index 58d3761..3f0e002 100644 --- a/libc/src/__support/math/acosf16.h +++ b/libc/src/__support/math/acosf16.h @@ -26,7 +26,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float16 acosf16(float16 x) { +LIBC_INLINE static constexpr float16 acosf16(float16 x) { // Generated by Sollya using the following command: // > round(pi/2, SG, RN); diff --git a/libc/src/__support/math/acoshf.h b/libc/src/__support/math/acoshf.h index f18f169..4e00311 100644 --- a/libc/src/__support/math/acoshf.h +++ b/libc/src/__support/math/acoshf.h @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float acoshf(float x) { +LIBC_INLINE static constexpr float acoshf(float x) { using namespace acoshf_internal; using FPBits_t = typename fputil::FPBits<float>; FPBits_t xbits(x); diff --git a/libc/src/__support/math/acoshf16.h b/libc/src/__support/math/acoshf16.h index 15e7f6a..e5be2a8 100644 --- a/libc/src/__support/math/acoshf16.h +++ b/libc/src/__support/math/acoshf16.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H -#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H #include "include/llvm-libc-macros/float16-macros.h" @@ -28,7 +28,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float16 acoshf16(float16 x) { +LIBC_INLINE static constexpr float16 acoshf16(float16 x) { using namespace acoshf_internal; constexpr size_t N_EXCEPTS = 2; @@ -120,4 +120,4 @@ static constexpr float16 acoshf16(float16 x) { #endif // LIBC_TYPES_HAS_FLOAT16 -#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H diff --git a/libc/src/__support/math/acospif16.h b/libc/src/__support/math/acospif16.h new file mode 100644 index 0000000..cf29c76 --- /dev/null +++ b/libc/src/__support/math/acospif16.h @@ -0,0 +1,147 @@ +//===-- Implementation header for acospif16 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 acospif16(float16 x) { + using FPBits = fputil::FPBits<float16>; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + uint16_t x_sign = x_u >> 15; + + // |x| > 0x1p0, |x| > 1, or x is NaN. + if (LIBC_UNLIKELY(x_abs > 0x3c00)) { + // acospif16(NaN) = NaN + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // 1 < |x| <= +inf + fputil::raise_except_if_required(FE_INVALID); + fputil::set_errno_if_required(EDOM); + + return FPBits::quiet_nan().get_val(); + } + + // |x| == 0x1p0, x is 1 or -1 + // if x is (-)1, return 1 + // if x is (+)1, return 0 + if (LIBC_UNLIKELY(x_abs == 0x3c00)) + return fputil::cast<float16>(x_sign ? 1.0f : 0.0f); + + float xf = x; + float xsq = xf * xf; + + // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya + // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0, + // 0.5]); + constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f, + 0x1.897e36p-6f, 0x1.9efafcp-7f, + 0x1.06d884p-6f}; + // |x| <= 0x1p-1, |x| <= 0.5 + if (x_abs <= 0x3800) { + // if x is 0, return 0.5 + if (LIBC_UNLIKELY(x_abs == 0)) + return fputil::cast<float16>(0.5f); + + // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then + // acospi(x) = 0.5 - asin(x)/pi + float interm = + fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2], + POLY_COEFFS[3], POLY_COEFFS[4]); + + return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f)); + } + + // When |x| > 0.5, assume that 0.5 < |x| <= 1 + // + // Step-by-step range-reduction proof: + // 1: Let y = asin(x), such that, x = sin(y) + // 2: From complimentary angle identity: + // x = sin(y) = cos(pi/2 - y) + // 3: Let z = pi/2 - y, such that x = cos(z) + // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A): + // z = 2A, z/2 = A + // cos(z) = 1 - 2 * sin^2(z/2) + // 5: Make sin(z/2) subject of the formula: + // sin(z/2) = sqrt((1 - cos(z))/2) + // 6: Recall [3]; x = cos(z). Therefore: + // sin(z/2) = sqrt((1 - x)/2) + // 7: Let u = (1 - x)/2 + // 8: Therefore: + // asin(sqrt(u)) = z/2 + // 2 * asin(sqrt(u)) = z + // 9: Recall [3]; z = pi/2 - y. Therefore: + // y = pi/2 - z + // y = pi/2 - 2 * asin(sqrt(u)) + // 10: Recall [1], y = asin(x). Therefore: + // asin(x) = pi/2 - 2 * asin(sqrt(u)) + // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) + // Therefore: + // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u))) + // acos(x) = 2 * asin(sqrt(u)) + // acospi(x) = 2 * (asin(sqrt(u)) / pi) + // + // THE RANGE REDUCTION, HOW? + // 12: Recall [7], u = (1 - x)/2 + // 13: Since 0.5 < x <= 1, therefore: + // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 + // + // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for + // Step [11] as `sqrt(u)` is in range. + // When -1 < x <= -0.5, the identity: + // acos(x) = pi - acos(-x) + // acospi(x) = 1 - acos(-x)/pi + // allows us to compute for the negative x value (lhs) + // with a positive x value instead (rhs). + + float xf_abs = (xf < 0 ? -xf : xf); + float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); + float sqrt_u = fputil::sqrt<float>(u); + + float asin_sqrt_u = + sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1], + POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]); + + // Same as acos(x), but devided the expression with pi + return fputil::cast<float16>( + x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f) + : 2.0f * asin_sqrt_u); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H diff --git a/libc/src/__support/math/asin.h b/libc/src/__support/math/asin.h new file mode 100644 index 0000000..5e06d04 --- /dev/null +++ b/libc/src/__support/math/asin.h @@ -0,0 +1,297 @@ +//===-- Implementation header for asin --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H + +#include "asin_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/math/asin_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr double asin(double x) { + using namespace asin_internal; + using FPBits = fputil::FPBits<double>; + + FPBits xbits(x); + int x_exp = xbits.get_biased_exponent(); + + // |x| < 0.5. + if (x_exp < FPBits::EXP_BIAS - 1) { + // |x| < 2^-26. + if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) { + // When |x| < 2^-26, the relative error of the approximation asin(x) ~ x + // is: + // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|) + // = x^2 / 6 + // < 2^-54 + // < epsilon(1)/2. + // So the correctly rounded values of asin(x) are: + // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, + // or (rounding mode = FE_UPWARD and x is + // negative), + // = x otherwise. + // To simplify the rounding decision and make it more efficient, we use + // fma(x, 2^-54, x) instead. + // Note: to use the formula x + 2^-54*x to decide the correct rounding, we + // do need fma(x, 2^-54, x) to prevent underflow caused by 2^-54*x when + // |x| < 2^-1022. For targets without FMA instructions, when x is close to + // denormal range, we normalize x, +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) + return x; +#elif defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) + return fputil::multiply_add(x, 0x1.0p-54, x); +#else + if (xbits.abs().uintval() == 0) + return x; + // Get sign(x) * min_normal. + FPBits eps_bits = FPBits::min_normal(); + eps_bits.set_sign(xbits.sign()); + double eps = eps_bits.get_val(); + double normalize_const = (x_exp == 0) ? eps : 0.0; + double scaled_normal = + fputil::multiply_add(x + normalize_const, 0x1.0p54, eps); + return fputil::multiply_add(scaled_normal, 0x1.0p-54, -normalize_const); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return x * asin_eval(x * x); +#else + using Float128 = fputil::DyadicFloat<128>; + using DoubleDouble = fputil::DoubleDouble; + + unsigned idx = 0; + DoubleDouble x_sq = fputil::exact_mult(x, x); + double err = xbits.abs().get_val() * 0x1.0p-51; + // Polynomial approximation: + // p ~ asin(x)/x + + DoubleDouble p = asin_eval(x_sq, idx, err); + // asin(x) ~ x * (ASIN_COEFFS[idx][0] + p) + DoubleDouble r0 = fputil::exact_mult(x, p.hi); + double r_lo = fputil::multiply_add(x, p.lo, r0.lo); + + // Ziv's accuracy test. + + double r_upper = r0.hi + (r_lo + err); + double r_lower = r0.hi + (r_lo - err); + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, perform 128-bit calculation. + + // Recalculate mod 1/64. + idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6)); + + // Get x^2 - idx/64 exactly. When FMA is available, double-double + // multiplication will be correct for all rounding modes. Otherwise we use + // Float128 directly. + Float128 x_f128(x); + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // u = x^2 - idx/64 + Float128 u_hi( + fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi)); + Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo)); +#else + Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128); + Float128 u = fputil::quick_add( + x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6))); +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + Float128 p_f128 = asin_eval(u, idx); + Float128 r = fputil::quick_mul(x_f128, p_f128); + + return static_cast<double>(r); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + // |x| >= 0.5 + + double x_abs = xbits.abs().get_val(); + + // Maintaining the sign: + constexpr double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[xbits.is_neg()]; + + // |x| >= 1 + if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) { + // x = +-1, asin(x) = +- pi/2 + if (x_abs == 1.0) { + // return +- pi/2 + return fputil::multiply_add(x_sign, PI_OVER_TWO.hi, + x_sign * PI_OVER_TWO.lo); + } + // |x| > 1, return NaN. + if (xbits.is_quiet_nan()) + return x; + + // Set domain error for non-NaN input. + if (!xbits.is_nan()) + fputil::set_errno_if_required(EDOM); + + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // When |x| >= 0.5, we perform range reduction as follow: + // + // Assume further that 0.5 <= x < 1, and let: + // y = asin(x) + // We will use the double angle formula: + // cos(2y) = 1 - 2 sin^2(y) + // and the complement angle identity: + // x = sin(y) = cos(pi/2 - y) + // = 1 - 2 sin^2 (pi/4 - y/2) + // So: + // sin(pi/4 - y/2) = sqrt( (1 - x)/2 ) + // And hence: + // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) ) + // Equivalently: + // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) ) + // Let u = (1 - x)/2, then: + // asin(x) = pi/2 - 2 * asin( sqrt(u) ) + // Moreover, since 0.5 <= x < 1: + // 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5, + // And hence we can reuse the same polynomial approximation of asin(x) when + // |x| <= 0.5: + // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u), + + // u = (1 - |x|)/2 + double u = fputil::multiply_add(x_abs, -0.5, 0.5); + // v_hi + v_lo ~ sqrt(u). + // Let: + // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi) + // Then: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // ~ v_hi + h / (2 * v_hi) + // So we can use: + // v_lo = h / (2 * v_hi). + // Then, + // asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u) + double v_hi = fputil::sqrt<double>(u); + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + double p = asin_eval(u); + double r = x_sign * fputil::multiply_add(-2.0 * v_hi, p, PI_OVER_TWO.hi); + return r; +#else + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double h = fputil::multiply_add(v_hi, -v_hi, u); +#else + DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi); + double h = (u - v_hi_sq.hi) - v_hi_sq.lo; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + // Scale v_lo and v_hi by 2 from the formula: + // vh = v_hi * 2 + // vl = 2*v_lo = h / v_hi. + double vh = v_hi * 2.0; + double vl = h / v_hi; + + // Polynomial approximation: + // p ~ asin(sqrt(u))/sqrt(u) + unsigned idx = 0; + double err = vh * 0x1.0p-51; + + DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err); + + // Perform computations in double-double arithmetic: + // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p) + DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p); + DoubleDouble r = fputil::exact_add(PI_OVER_TWO.hi, -r0.hi); + + double r_lo = PI_OVER_TWO.lo - r0.lo + r.lo; + + // Ziv's accuracy test. + +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double r_upper = fputil::multiply_add( + r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, err)); + double r_lower = fputil::multiply_add( + r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, -err)); +#else + r_lo *= x_sign; + r.hi *= x_sign; + double r_upper = r.hi + (r_lo + err); + double r_lower = r.hi + (r_lo - err); +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + if (LIBC_LIKELY(r_upper == r_lower)) + return r_upper; + + // Ziv's accuracy test failed, we redo the computations in Float128. + // Recalculate mod 1/64. + idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6)); + + // After the first step of Newton-Raphson approximating v = sqrt(u), we have + // that: + // sqrt(u) = v_hi + h / (sqrt(u) + v_hi) + // v_lo = h / (2 * v_hi) + // With error: + // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) ) + // = -h^2 / (2*v * (sqrt(u) + v)^2). + // Since: + // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u, + // we can add another correction term to (v_hi + v_lo) that is: + // v_ll = -h^2 / (2*v_hi * 4u) + // = -v_lo * (h / 4u) + // = -vl * (h / 8u), + // making the errors: + // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3) + // well beyond 128-bit precision needed. + + // Get the rounding error of vl = 2 * v_lo ~ h / vh + // Get full product of vh * vl +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi; +#else + DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl); + double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + // vll = 2*v_ll = -vl * (h / (4u)). + double t = h * (-0.25) / u; + double vll = fputil::multiply_add(vl, t, vl_lo); + // m_v = -(v_hi + v_lo + v_ll). + Float128 m_v = fputil::quick_add( + Float128(vh), fputil::quick_add(Float128(vl), Float128(vll))); + m_v.sign = Sign::NEG; + + // Perform computations in Float128: + // asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u). + Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u)); + + Float128 p_f128 = asin_eval(y_f128, idx); + Float128 r0_f128 = fputil::quick_mul(m_v, p_f128); + Float128 r_f128 = fputil::quick_add(PI_OVER_TWO_F128, r0_f128); + + if (xbits.is_neg()) + r_f128.sign = Sign::NEG; + + return static_cast<double>(r_f128); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H diff --git a/libc/src/__support/math/asinf.h b/libc/src/__support/math/asinf.h new file mode 100644 index 0000000..bfa0dc3 --- /dev/null +++ b/libc/src/__support/math/asinf.h @@ -0,0 +1,175 @@ +//===-- Implementation header for asinf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H + +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float asinf(float x) { + using namespace inv_trigf_utils_internal; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr size_t N_EXCEPTS = 2; + + // Exceptional values when |x| <= 0.5 + constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{ + // (inputs, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ) + {0x3d09bf86, 0x3d09c62c, 1, 0, 1}, + // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ) + {0x3de5fa1e, 0x3de6768e, 1, 0, 0}, + }}; + + // Exceptional values when 0.5 < |x| <= 1 + constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{ + // (inputs, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ) + {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0}, + // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ) + {0x3f7741b6, 0x3fa7832a, 1, 0, 0}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace inv_trigf_utils_internal; + using FPBits = typename fputil::FPBits<float>; + + FPBits xbits(x); + uint32_t x_uint = xbits.uintval(); + uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU; + constexpr double SIGN[2] = {1.0, -1.0}; + uint32_t x_sign = x_uint >> 31; + + // |x| <= 0.5-ish + if (x_abs < 0x3f04'471dU) { + // |x| < 0x1.d12edp-12 + if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) { + // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x + // is: + // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|) + // = x^2 / 6 + // < 2^-25 + // < epsilon(1)/2. + // So the correctly rounded values of asin(x) are: + // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, + // or (rounding mode = FE_UPWARD and x is + // negative), + // = x otherwise. + // To simplify the rounding decision and make it more efficient, we use + // fma(x, 2^-25, x) instead. + // An exhaustive test shows that this formula work correctly for all + // rounding modes up to |x| < 0x1.d12edp-12. + // Note: to use the formula x + 2^-25*x to decide the correct rounding, we + // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when + // |x| < 2^-125. For targets without FMA instructions, we simply use + // double for intermediate results as it is more efficient than using an + // emulated version of FMA. +#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + return fputil::multiply_add(x, 0x1.0p-25f, x); +#else + double xd = static_cast<double>(x); + return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd)); +#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Check for exceptional values + if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign); + LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // For |x| <= 0.5, we approximate asinf(x) by: + // asin(x) = x * P(x^2) + // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating + // asin(x)/x on [0, 0.5] generated by Sollya with: + // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], + // [|1, D...|], [0, 0.5]); + // An exhaustive test shows that this approximation works well up to a + // little more than 0.5. + double xd = static_cast<double>(x); + double xsq = xd * xd; + double x3 = xd * xsq; + double r = asin_eval(xsq); + return static_cast<float>(fputil::multiply_add(x3, r, xd)); + } + + // |x| > 1, return NaNs. + if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + if (x_abs <= 0x7f80'0000U) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + + return FPBits::quiet_nan().get_val(); + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Check for exceptional values + if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign); + LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // When |x| > 0.5, we perform range reduction as follow: + // + // Assume further that 0.5 < x <= 1, and let: + // y = asin(x) + // We will use the double angle formula: + // cos(2y) = 1 - 2 sin^2(y) + // and the complement angle identity: + // x = sin(y) = cos(pi/2 - y) + // = 1 - 2 sin^2 (pi/4 - y/2) + // So: + // sin(pi/4 - y/2) = sqrt( (1 - x)/2 ) + // And hence: + // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) ) + // Equivalently: + // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) ) + // Let u = (1 - x)/2, then: + // asin(x) = pi/2 - 2 * asin( sqrt(u) ) + // Moreover, since 0.5 < x <= 1: + // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5, + // And hence we can reuse the same polynomial approximation of asin(x) when + // |x| <= 0.5: + // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u), + + xbits.set_sign(Sign::POS); + double sign = SIGN[x_sign]; + double xd = static_cast<double>(xbits.get_val()); + double u = fputil::multiply_add(-0.5, xd, 0.5); + double c1 = sign * (-2 * fputil::sqrt<double>(u)); + double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1); + double c3 = c1 * u; + + double r = asin_eval(u); + return static_cast<float>(fputil::multiply_add(c3, r, c2)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H diff --git a/libc/src/__support/math/asinf16.h b/libc/src/__support/math/asinf16.h new file mode 100644 index 0000000..3d032a4 --- /dev/null +++ b/libc/src/__support/math/asinf16.h @@ -0,0 +1,146 @@ +//===-- Implementation header for asinf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 asinf16(float16 x) { + + // Generated by Sollya using the following command: + // > round(pi/2, D, RN); + constexpr float PI_2 = 0x1.921fb54442d18p0f; + + using FPBits = fputil::FPBits<float16>; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + float xf = x; + + // |x| > 0x1p0, |x| > 1, or x is NaN. + if (LIBC_UNLIKELY(x_abs > 0x3c00)) { + // asinf16(NaN) = NaN + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // 1 < |x| <= +/-inf + fputil::raise_except_if_required(FE_INVALID); + fputil::set_errno_if_required(EDOM); + + return FPBits::quiet_nan().get_val(); + } + + float xsq = xf * xf; + + // |x| <= 0x1p-1, |x| <= 0.5 + if (x_abs <= 0x3800) { + // asinf16(+/-0) = +/-0 + if (LIBC_UNLIKELY(x_abs == 0)) + return x; + + // Exhaustive tests show that, + // for |x| <= 0x1.878p-9, when: + // x > 0, and rounding upward, or + // x < 0, and rounding downward, then, + // asin(x) = x * 2^-11 + x + // else, in other rounding modes, + // asin(x) = x + if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) { + int rounding = fputil::quick_get_round(); + + if ((xbits.is_pos() && rounding == FE_UPWARD) || + (xbits.is_neg() && rounding == FE_DOWNWARD)) + return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf)); + return x; + } + + // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with: + // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); + float result = + fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f, + 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); + return fputil::cast<float16>(xf * result); + } + + // When |x| > 0.5, assume that 0.5 < |x| <= 1, + // + // Step-by-step range-reduction proof: + // 1: Let y = asin(x), such that, x = sin(y) + // 2: From complimentary angle identity: + // x = sin(y) = cos(pi/2 - y) + // 3: Let z = pi/2 - y, such that x = cos(z) + // 4: From double angle formula; cos(2A) = 1 - sin^2(A): + // z = 2A, z/2 = A + // cos(z) = 1 - 2 * sin^2(z/2) + // 5: Make sin(z/2) subject of the formula: + // sin(z/2) = sqrt((1 - cos(z))/2) + // 6: Recall [3]; x = cos(z). Therefore: + // sin(z/2) = sqrt((1 - x)/2) + // 7: Let u = (1 - x)/2 + // 8: Therefore: + // asin(sqrt(u)) = z/2 + // 2 * asin(sqrt(u)) = z + // 9: Recall [3], z = pi/2 - y. Therefore: + // y = pi/2 - z + // y = pi/2 - 2 * asin(sqrt(u)) + // 10: Recall [1], y = asin(x). Therefore: + // asin(x) = pi/2 - 2 * asin(sqrt(u)) + // + // WHY? + // 11: Recall [7], u = (1 - x)/2 + // 12: Since 0.5 < x <= 1, therefore: + // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 + // + // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for + // Step [10] as `sqrt(u)` is in range. + + // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0 + float xf_abs = (xf < 0 ? -xf : xf); + float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0); + float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); + float u_sqrt = fputil::sqrt<float>(u); + + // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with: + // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); + float asin_sqrt_u = + u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f, + 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); + + return fputil::cast<float16>(sign * + fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h new file mode 100644 index 0000000..1c08a6e --- /dev/null +++ b/libc/src/__support/math/asinhf.h @@ -0,0 +1,125 @@ +//===-- Implementation header for asinf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float asinhf(float x) { + using namespace acoshf_internal; + using FPBits_t = typename fputil::FPBits<float>; + FPBits_t xbits(x); + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = xbits.abs().uintval(); + + // |x| <= 2^-3 + if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x - 0x1.5555555555555p-3 * x * x * x)); + } + + double x_d = x; + double x_sq = x_d * x_d; + // Generated by Sollya with: + // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], + // [0, 2^-2]); + double p = fputil::polyeval( + x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, + -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, + 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); + return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); + } + + const double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[x_u >> 31]; + double x_d = x; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Helper functions to set results for exceptional cases. + auto round_result_slightly_down = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * (-0x1.0p-24f)); + }; + auto round_result_slightly_up = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * 0x1.0p-24f); + }; + + if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits_t::quiet_nan().get_val(); + } + + return x; + } + + // Exceptional cases when x > 2^24. + switch (x_abs) { + case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f + return round_result_slightly_down(0x1.1e0696p4f); + case 0x4c803f2c: // |x| = 0x1.007e58p26f + return round_result_slightly_down(0x1.2b786cp4f); + case 0x4f8ffb03: // |x| = 0x1.1ff606p32f + return round_result_slightly_up(0x1.6fdd34p4f); + case 0x5c569e88: // |x| = 0x1.ad3d1p57f + return round_result_slightly_up(0x1.45c146p5f); + case 0x5e68984e: // |x| = 0x1.d1309cp61f + return round_result_slightly_up(0x1.5c9442p5f); + case 0x655890d3: // |x| = 0x1.b121a6p75f + return round_result_slightly_down(0x1.a9a3f2p5f); + case 0x65de7ca6: // |x| = 0x1.bcf94cp76f + return round_result_slightly_up(0x1.af66cp5f); + case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f + return round_result_slightly_down(0x1.08b512p6f); + case 0x7997f30a: // |x| = 0x1.2fe614p116f + return round_result_slightly_up(0x1.451436p6f); + } + } else { + // Exceptional cases when x < 2^24. + if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { + // |x| = 0x1.575e4cp12f + return round_result_slightly_down(0x1.29becap3f); + } + if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { + // |x| = 0x1.a5209p20f + return round_result_slightly_down(0x1.e1b92p3f); + } + } +#else + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) + return x; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // asinh(x) = log(x + sqrt(x^2 + 1)) + return static_cast<float>( + x_sign * log_eval(fputil::multiply_add( + x_d, x_sign, + fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H diff --git a/libc/src/__support/math/erff.h b/libc/src/__support/math/erff.h index e54ec77..b81be30 100644 --- a/libc/src/__support/math/erff.h +++ b/libc/src/__support/math/erff.h @@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float erff(float x) { +LIBC_INLINE static constexpr float erff(float x) { // Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by // Sollya with: > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h index ff59ff7..83638e8 100644 --- a/libc/src/__support/math/exp.h +++ b/libc/src/__support/math/exp.h @@ -67,7 +67,7 @@ namespace { // Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24. // For |dx| < 2^-13 + 2^-30: // | output - expm1(dx) / dx | < 2^-51. -static double poly_approx_d(double dx) { +LIBC_INLINE static double poly_approx_d(double dx) { // dx^2 double dx2 = dx * dx; // c0 = 1 + dx / 2 @@ -85,7 +85,7 @@ static double poly_approx_d(double dx) { // Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720 // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-101 -static DoubleDouble poly_approx_dd(const DoubleDouble &dx) { +LIBC_INLINE static DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // Taylor polynomial. constexpr DoubleDouble COEFFS[] = { {0, 0x1p0}, // 1 @@ -106,7 +106,7 @@ static DoubleDouble poly_approx_dd(const DoubleDouble &dx) { // Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^7 / 5040 // For |dx| < 2^-13 + 2^-30: // | output - exp(dx) | < 2^-126. -static Float128 poly_approx_f128(const Float128 &dx) { +LIBC_INLINE static Float128 poly_approx_f128(const Float128 &dx) { constexpr Float128 COEFFS_128[]{ {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 @@ -127,7 +127,7 @@ static Float128 poly_approx_f128(const Float128 &dx) { // Compute exp(x) using 128-bit precision. // TODO(lntue): investigate triple-double precision implementation for this // step. -static Float128 exp_f128(double x, double kd, int idx1, int idx2) { +LIBC_INLINE static Float128 exp_f128(double x, double kd, int idx1, int idx2) { // Recalculate dx: double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact @@ -160,8 +160,8 @@ static Float128 exp_f128(double x, double kd, int idx1, int idx2) { } // Compute exp(x) with double-double precision. -static DoubleDouble exp_double_double(double x, double kd, - const DoubleDouble &exp_mid) { +LIBC_INLINE static DoubleDouble exp_double_double(double x, double kd, + const DoubleDouble &exp_mid) { // Recalculate dx: // dx = x - k * 2^-12 * log(2) double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact @@ -184,7 +184,7 @@ static DoubleDouble exp_double_double(double x, double kd, // Check for exceptional cases when // |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9 -static double set_exceptional(double x) { +LIBC_INLINE static double set_exceptional(double x) { using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -234,7 +234,7 @@ static double set_exceptional(double x) { namespace math { -static double exp(double x) { +LIBC_INLINE static double exp(double x) { using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h index fa60e40c..12a09d7 100644 --- a/libc/src/__support/math/exp10.h +++ b/libc/src/__support/math/exp10.h @@ -83,7 +83,8 @@ LIBC_INLINE static double exp10_poly_approx_d(double dx) { // > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]); // Error bounds: // | output - 10^(dx) | < 2^-101 -static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) { +LIBC_INLINE static constexpr DoubleDouble +exp10_poly_approx_dd(const DoubleDouble &dx) { // Taylor polynomial. constexpr DoubleDouble COEFFS[] = { {0, 0x1p0}, @@ -105,7 +106,8 @@ static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) { // Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7 // For |dx| < 2^-14: // | output - 10^dx | < 1.5 * 2^-124. -static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) { +LIBC_INLINE static constexpr Float128 +exp10_poly_approx_f128(const Float128 &dx) { constexpr Float128 COEFFS_128[]{ {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0 {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128}, @@ -126,7 +128,8 @@ static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) { // Compute 10^(x) using 128-bit precision. // TODO(lntue): investigate triple-double precision implementation for this // step. -static Float128 exp10_f128(double x, double kd, int idx1, int idx2) { +LIBC_INLINE static Float128 exp10_f128(double x, double kd, int idx1, + int idx2) { double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144 @@ -157,8 +160,8 @@ static Float128 exp10_f128(double x, double kd, int idx1, int idx2) { } // Compute 10^x with double-double precision. -static DoubleDouble exp10_double_double(double x, double kd, - const DoubleDouble &exp_mid) { +LIBC_INLINE static DoubleDouble +exp10_double_double(double x, double kd, const DoubleDouble &exp_mid) { // Recalculate dx: // dx = x - k * 2^-12 * log10(2) double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact @@ -180,7 +183,7 @@ static DoubleDouble exp10_double_double(double x, double kd, #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS // When output is denormal. -static double exp10_denorm(double x) { +LIBC_INLINE static double exp10_denorm(double x) { // Range reduction. double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21); int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19); @@ -234,7 +237,7 @@ static double exp10_denorm(double x) { // * x >= log10(2^1024) // * x <= log10(2^-1022) // * x is inf or nan -static constexpr double exp10_set_exceptional(double x) { +LIBC_INLINE static constexpr double exp10_set_exceptional(double x) { using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); @@ -285,7 +288,7 @@ static constexpr double exp10_set_exceptional(double x) { namespace math { -static constexpr double exp10(double x) { +LIBC_INLINE static constexpr double exp10(double x) { using FPBits = typename fputil::FPBits<double>; FPBits xbits(x); diff --git a/libc/src/__support/math/exp10f.h b/libc/src/__support/math/exp10f.h index 807b4f0..76ae197 100644 --- a/libc/src/__support/math/exp10f.h +++ b/libc/src/__support/math/exp10f.h @@ -20,7 +20,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float exp10f(float x) { +LIBC_INLINE static constexpr float exp10f(float x) { using FPBits = typename fputil::FPBits<float>; FPBits xbits(x); diff --git a/libc/src/__support/math/exp10f16.h b/libc/src/__support/math/exp10f16.h index 0d8b125..3eca867 100644 --- a/libc/src/__support/math/exp10f16.h +++ b/libc/src/__support/math/exp10f16.h @@ -57,7 +57,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS> }}; #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr float16 exp10f16(float16 x) { +LIBC_INLINE static constexpr float16 exp10f16(float16 x) { using FPBits = fputil::FPBits<float16>; FPBits x_bits(x); diff --git a/libc/src/__support/math/exp10f16_utils.h b/libc/src/__support/math/exp10f16_utils.h index bffb81b..5952a41 100644 --- a/libc/src/__support/math/exp10f16_utils.h +++ b/libc/src/__support/math/exp10f16_utils.h @@ -19,8 +19,7 @@ namespace LIBC_NAMESPACE_DECL { -LIBC_INLINE static constexpr ExpRangeReduction -exp10_range_reduction(float16 x) { +LIBC_INLINE static ExpRangeReduction exp10_range_reduction(float16 x) { // For -8 < x < 5, to compute 10^x, we perform the following range reduction: // find hi, mid, lo, such that: // x = (hi + mid) * log2(10) + lo, in which diff --git a/libc/src/__support/math/exp10f_utils.h b/libc/src/__support/math/exp10f_utils.h index c30def9..010a2f1 100644 --- a/libc/src/__support/math/exp10f_utils.h +++ b/libc/src/__support/math/exp10f_utils.h @@ -89,7 +89,7 @@ struct Exp10Base : public ExpBase { 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0, 0x1.1429e74a98f43p-1}; - static double powb_lo(double dx) { + LIBC_INLINE static double powb_lo(double dx) { using fputil::multiply_add; double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * dx diff --git a/libc/src/__support/math/exp_utils.h b/libc/src/__support/math/exp_utils.h index fc9ab10..ef408ed 100644 --- a/libc/src/__support/math/exp_utils.h +++ b/libc/src/__support/math/exp_utils.h @@ -22,8 +22,8 @@ namespace LIBC_NAMESPACE_DECL { // So if we scale x up by 2^1022, we can use // double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range. template <bool SKIP_ZIV_TEST = false> -static constexpr cpp::optional<double> ziv_test_denorm(int hi, double mid, - double lo, double err) { +LIBC_INLINE static constexpr cpp::optional<double> +ziv_test_denorm(int hi, double mid, double lo, double err) { using FPBits = typename fputil::FPBits<double>; // Scaling factor = 1/(min normal number) = 2^1022 diff --git a/libc/src/__support/math/expf.h b/libc/src/__support/math/expf.h index 88c1514..f7e11be 100644 --- a/libc/src/__support/math/expf.h +++ b/libc/src/__support/math/expf.h @@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float expf(float x) { +LIBC_INLINE static constexpr float expf(float x) { using FPBits = typename fputil::FPBits<float>; FPBits xbits(x); diff --git a/libc/src/__support/math/expf16.h b/libc/src/__support/math/expf16.h index ded28c7..14302a7 100644 --- a/libc/src/__support/math/expf16.h +++ b/libc/src/__support/math/expf16.h @@ -31,7 +31,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float16 expf16(float16 x) { +LIBC_INLINE static constexpr float16 expf16(float16 x) { #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS constexpr fputil::ExceptValues<float16, 2> EXPF16_EXCEPTS_LO = {{ // (input, RZ output, RU offset, RD offset, RN offset) diff --git a/libc/src/__support/math/expf16_utils.h b/libc/src/__support/math/expf16_utils.h index 8a2fc94..4204dab7 100644 --- a/libc/src/__support/math/expf16_utils.h +++ b/libc/src/__support/math/expf16_utils.h @@ -47,7 +47,8 @@ struct ExpRangeReduction { float exp_lo; }; -[[maybe_unused]] static ExpRangeReduction exp_range_reduction(float16 x) { +[[maybe_unused]] LIBC_INLINE static ExpRangeReduction +exp_range_reduction(float16 x) { // For -18 < x < 12, to compute exp(x), we perform the following range // reduction: find hi, mid, lo, such that: // x = hi + mid + lo, in which diff --git a/libc/src/__support/math/frexpf.h b/libc/src/__support/math/frexpf.h index 4d2f494..7834a12 100644 --- a/libc/src/__support/math/frexpf.h +++ b/libc/src/__support/math/frexpf.h @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float frexpf(float x, int *exp) { +LIBC_INLINE static constexpr float frexpf(float x, int *exp) { return fputil::frexp(x, *exp); } diff --git a/libc/src/__support/math/frexpf128.h b/libc/src/__support/math/frexpf128.h index 2fd5bc4..5218b26 100644 --- a/libc/src/__support/math/frexpf128.h +++ b/libc/src/__support/math/frexpf128.h @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float128 frexpf128(float128 x, int *exp) { +LIBC_INLINE static constexpr float128 frexpf128(float128 x, int *exp) { return fputil::frexp(x, *exp); } diff --git a/libc/src/__support/math/frexpf16.h b/libc/src/__support/math/frexpf16.h index 8deeba0..530b61a 100644 --- a/libc/src/__support/math/frexpf16.h +++ b/libc/src/__support/math/frexpf16.h @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float16 frexpf16(float16 x, int *exp) { +LIBC_INLINE static constexpr float16 frexpf16(float16 x, int *exp) { return fputil::frexp(x, *exp); } diff --git a/libc/src/__support/math/ldexpf.h b/libc/src/__support/math/ldexpf.h index 3a5ec1d..9ef5d96 100644 --- a/libc/src/__support/math/ldexpf.h +++ b/libc/src/__support/math/ldexpf.h @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float ldexpf(float x, int exp) { +LIBC_INLINE static constexpr float ldexpf(float x, int exp) { return fputil::ldexp(x, exp); } diff --git a/libc/src/__support/math/ldexpf128.h b/libc/src/__support/math/ldexpf128.h index 3625830..4fba20c 100644 --- a/libc/src/__support/math/ldexpf128.h +++ b/libc/src/__support/math/ldexpf128.h @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float128 ldexpf128(float128 x, int exp) { +LIBC_INLINE static constexpr float128 ldexpf128(float128 x, int exp) { return fputil::ldexp(x, exp); } diff --git a/libc/src/__support/math/ldexpf16.h b/libc/src/__support/math/ldexpf16.h index fbead87..d978d22 100644 --- a/libc/src/__support/math/ldexpf16.h +++ b/libc/src/__support/math/ldexpf16.h @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace math { -static constexpr float16 ldexpf16(float16 x, int exp) { +LIBC_INLINE static constexpr float16 ldexpf16(float16 x, int exp) { return fputil::ldexp(x, exp); } diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index a001d99..f91feacb 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3889,12 +3889,7 @@ add_entrypoint_object( HDRS ../asinhf.h DEPENDS - .explogxf - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization + libc.src.__support.math.asinhf ) add_entrypoint_object( @@ -3958,13 +3953,7 @@ add_entrypoint_object( HDRS ../asinf.h DEPENDS - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.math.inv_trigf_utils + libc.src.__support.math.asinf ) add_entrypoint_object( @@ -3974,16 +3963,7 @@ add_entrypoint_object( HDRS ../asinf16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.asinf16 ) add_entrypoint_object( @@ -3993,16 +3973,7 @@ add_entrypoint_object( HDRS ../asin.h DEPENDS - libc.src.__support.math.asin_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.cpu_features + libc.src.__support.math.asin ) add_entrypoint_object( @@ -4043,16 +4014,8 @@ add_entrypoint_object( HDRS ../acospif16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.acospif16 + libc.src.errno.errno ) add_header_library( diff --git a/libc/src/math/generic/acospif16.cpp b/libc/src/math/generic/acospif16.cpp index bfdf169..09cbd99 100644 --- a/libc/src/math/generic/acospif16.cpp +++ b/libc/src/math/generic/acospif16.cpp @@ -7,128 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/acospif16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/acospif16.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, acospif16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - uint16_t x_sign = x_u >> 15; - - // |x| > 0x1p0, |x| > 1, or x is NaN. - if (LIBC_UNLIKELY(x_abs > 0x3c00)) { - // acospif16(NaN) = NaN - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // 1 < |x| <= +inf - fputil::raise_except_if_required(FE_INVALID); - fputil::set_errno_if_required(EDOM); - - return FPBits::quiet_nan().get_val(); - } - - // |x| == 0x1p0, x is 1 or -1 - // if x is (-)1, return 1 - // if x is (+)1, return 0 - if (LIBC_UNLIKELY(x_abs == 0x3c00)) - return fputil::cast<float16>(x_sign ? 1.0f : 0.0f); - - float xf = x; - float xsq = xf * xf; - - // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya - // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0, - // 0.5]); - constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f, - 0x1.897e36p-6f, 0x1.9efafcp-7f, - 0x1.06d884p-6f}; - // |x| <= 0x1p-1, |x| <= 0.5 - if (x_abs <= 0x3800) { - // if x is 0, return 0.5 - if (LIBC_UNLIKELY(x_abs == 0)) - return fputil::cast<float16>(0.5f); - - // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then - // acospi(x) = 0.5 - asin(x)/pi - float interm = - fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2], - POLY_COEFFS[3], POLY_COEFFS[4]); - - return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f)); - } - - // When |x| > 0.5, assume that 0.5 < |x| <= 1 - // - // Step-by-step range-reduction proof: - // 1: Let y = asin(x), such that, x = sin(y) - // 2: From complimentary angle identity: - // x = sin(y) = cos(pi/2 - y) - // 3: Let z = pi/2 - y, such that x = cos(z) - // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A): - // z = 2A, z/2 = A - // cos(z) = 1 - 2 * sin^2(z/2) - // 5: Make sin(z/2) subject of the formula: - // sin(z/2) = sqrt((1 - cos(z))/2) - // 6: Recall [3]; x = cos(z). Therefore: - // sin(z/2) = sqrt((1 - x)/2) - // 7: Let u = (1 - x)/2 - // 8: Therefore: - // asin(sqrt(u)) = z/2 - // 2 * asin(sqrt(u)) = z - // 9: Recall [3]; z = pi/2 - y. Therefore: - // y = pi/2 - z - // y = pi/2 - 2 * asin(sqrt(u)) - // 10: Recall [1], y = asin(x). Therefore: - // asin(x) = pi/2 - 2 * asin(sqrt(u)) - // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x) - // Therefore: - // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u))) - // acos(x) = 2 * asin(sqrt(u)) - // acospi(x) = 2 * (asin(sqrt(u)) / pi) - // - // THE RANGE REDUCTION, HOW? - // 12: Recall [7], u = (1 - x)/2 - // 13: Since 0.5 < x <= 1, therefore: - // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 - // - // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for - // Step [11] as `sqrt(u)` is in range. - // When -1 < x <= -0.5, the identity: - // acos(x) = pi - acos(-x) - // acospi(x) = 1 - acos(-x)/pi - // allows us to compute for the negative x value (lhs) - // with a positive x value instead (rhs). - - float xf_abs = (xf < 0 ? -xf : xf); - float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); - float sqrt_u = fputil::sqrt<float>(u); - - float asin_sqrt_u = - sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1], - POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]); - - // Same as acos(x), but devided the expression with pi - return fputil::cast<float16>( - x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f) - : 2.0f * asin_sqrt_u); + return math::acospif16(x); } + } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp index d286fce..b5ba9ea 100644 --- a/libc/src/math/generic/asin.cpp +++ b/libc/src/math/generic/asin.cpp @@ -7,23 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asin.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/__support/math/asin_utils.h" +#include "src/__support/math/asin.h" namespace LIBC_NAMESPACE_DECL { -using DoubleDouble = fputil::DoubleDouble; -using Float128 = fputil::DyadicFloat<128>; - LLVM_LIBC_FUNCTION(double, asin, (double x)) { using namespace asin_internal; using FPBits = fputil::FPBits<double>; diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp index 77d6de9..9c6766f 100644 --- a/libc/src/math/generic/asinf.cpp +++ b/libc/src/math/generic/asinf.cpp @@ -7,161 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinf.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA - -#include "src/__support/math/inv_trigf_utils.h" +#include "src/__support/math/asinf.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 2; - -// Exceptional values when |x| <= 0.5 -static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{ - // (inputs, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ) - {0x3d09bf86, 0x3d09c62c, 1, 0, 1}, - // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ) - {0x3de5fa1e, 0x3de6768e, 1, 0, 0}, -}}; - -// Exceptional values when 0.5 < |x| <= 1 -static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{ - // (inputs, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ) - {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0}, - // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ) - {0x3f7741b6, 0x3fa7832a, 1, 0, 0}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float, asinf, (float x)) { - using namespace inv_trigf_utils_internal; - using FPBits = typename fputil::FPBits<float>; - - FPBits xbits(x); - uint32_t x_uint = xbits.uintval(); - uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU; - constexpr double SIGN[2] = {1.0, -1.0}; - uint32_t x_sign = x_uint >> 31; - - // |x| <= 0.5-ish - if (x_abs < 0x3f04'471dU) { - // |x| < 0x1.d12edp-12 - if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) { - // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x - // is: - // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|) - // = x^2 / 6 - // < 2^-25 - // < epsilon(1)/2. - // So the correctly rounded values of asin(x) are: - // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, - // or (rounding mode = FE_UPWARD and x is - // negative), - // = x otherwise. - // To simplify the rounding decision and make it more efficient, we use - // fma(x, 2^-25, x) instead. - // An exhaustive test shows that this formula work correctly for all - // rounding modes up to |x| < 0x1.d12edp-12. - // Note: to use the formula x + 2^-25*x to decide the correct rounding, we - // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when - // |x| < 2^-125. For targets without FMA instructions, we simply use - // double for intermediate results as it is more efficient than using an - // emulated version of FMA. -#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) - return fputil::multiply_add(x, 0x1.0p-25f, x); -#else - double xd = static_cast<double>(x); - return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd)); -#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Check for exceptional values - if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign); - LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // For |x| <= 0.5, we approximate asinf(x) by: - // asin(x) = x * P(x^2) - // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating - // asin(x)/x on [0, 0.5] generated by Sollya with: - // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], - // [|1, D...|], [0, 0.5]); - // An exhaustive test shows that this approximation works well up to a - // little more than 0.5. - double xd = static_cast<double>(x); - double xsq = xd * xd; - double x3 = xd * xsq; - double r = asin_eval(xsq); - return static_cast<float>(fputil::multiply_add(x3, r, xd)); - } - - // |x| > 1, return NaNs. - if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - if (x_abs <= 0x7f80'0000U) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - - return FPBits::quiet_nan().get_val(); - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Check for exceptional values - if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign); - LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // When |x| > 0.5, we perform range reduction as follow: - // - // Assume further that 0.5 < x <= 1, and let: - // y = asin(x) - // We will use the double angle formula: - // cos(2y) = 1 - 2 sin^2(y) - // and the complement angle identity: - // x = sin(y) = cos(pi/2 - y) - // = 1 - 2 sin^2 (pi/4 - y/2) - // So: - // sin(pi/4 - y/2) = sqrt( (1 - x)/2 ) - // And hence: - // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) ) - // Equivalently: - // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) ) - // Let u = (1 - x)/2, then: - // asin(x) = pi/2 - 2 * asin( sqrt(u) ) - // Moreover, since 0.5 < x <= 1: - // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5, - // And hence we can reuse the same polynomial approximation of asin(x) when - // |x| <= 0.5: - // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u), - - xbits.set_sign(Sign::POS); - double sign = SIGN[x_sign]; - double xd = static_cast<double>(xbits.get_val()); - double u = fputil::multiply_add(-0.5, xd, 0.5); - double c1 = sign * (-2 * fputil::sqrt<double>(u)); - double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1); - double c3 = c1 * u; - - double r = asin_eval(u); - return static_cast<float>(fputil::multiply_add(c3, r, c2)); -} +LLVM_LIBC_FUNCTION(float, asinf, (float x)) { return math::asinf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asinf16.cpp b/libc/src/math/generic/asinf16.cpp index 518c384..af8dbfe 100644 --- a/libc/src/math/generic/asinf16.cpp +++ b/libc/src/math/generic/asinf16.cpp @@ -7,127 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/asinf16.h" namespace LIBC_NAMESPACE_DECL { -// Generated by Sollya using the following command: -// > round(pi/2, D, RN); -static constexpr float PI_2 = 0x1.921fb54442d18p0f; - -LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - float xf = x; - - // |x| > 0x1p0, |x| > 1, or x is NaN. - if (LIBC_UNLIKELY(x_abs > 0x3c00)) { - // asinf16(NaN) = NaN - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // 1 < |x| <= +/-inf - fputil::raise_except_if_required(FE_INVALID); - fputil::set_errno_if_required(EDOM); - - return FPBits::quiet_nan().get_val(); - } - - float xsq = xf * xf; - - // |x| <= 0x1p-1, |x| <= 0.5 - if (x_abs <= 0x3800) { - // asinf16(+/-0) = +/-0 - if (LIBC_UNLIKELY(x_abs == 0)) - return x; - - // Exhaustive tests show that, - // for |x| <= 0x1.878p-9, when: - // x > 0, and rounding upward, or - // x < 0, and rounding downward, then, - // asin(x) = x * 2^-11 + x - // else, in other rounding modes, - // asin(x) = x - if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) { - int rounding = fputil::quick_get_round(); - - if ((xbits.is_pos() && rounding == FE_UPWARD) || - (xbits.is_neg() && rounding == FE_DOWNWARD)) - return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf)); - return x; - } - - // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with: - // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); - float result = - fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f, - 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); - return fputil::cast<float16>(xf * result); - } - - // When |x| > 0.5, assume that 0.5 < |x| <= 1, - // - // Step-by-step range-reduction proof: - // 1: Let y = asin(x), such that, x = sin(y) - // 2: From complimentary angle identity: - // x = sin(y) = cos(pi/2 - y) - // 3: Let z = pi/2 - y, such that x = cos(z) - // 4: From double angle formula; cos(2A) = 1 - sin^2(A): - // z = 2A, z/2 = A - // cos(z) = 1 - 2 * sin^2(z/2) - // 5: Make sin(z/2) subject of the formula: - // sin(z/2) = sqrt((1 - cos(z))/2) - // 6: Recall [3]; x = cos(z). Therefore: - // sin(z/2) = sqrt((1 - x)/2) - // 7: Let u = (1 - x)/2 - // 8: Therefore: - // asin(sqrt(u)) = z/2 - // 2 * asin(sqrt(u)) = z - // 9: Recall [3], z = pi/2 - y. Therefore: - // y = pi/2 - z - // y = pi/2 - 2 * asin(sqrt(u)) - // 10: Recall [1], y = asin(x). Therefore: - // asin(x) = pi/2 - 2 * asin(sqrt(u)) - // - // WHY? - // 11: Recall [7], u = (1 - x)/2 - // 12: Since 0.5 < x <= 1, therefore: - // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5 - // - // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for - // Step [10] as `sqrt(u)` is in range. - - // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0 - float xf_abs = (xf < 0 ? -xf : xf); - float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0); - float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f); - float u_sqrt = fputil::sqrt<float>(u); - - // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with: - // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]); - float asin_sqrt_u = - u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f, - 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f); - - return fputil::cast<float16>(sign * - fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2)); -} +LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) { return math::asinf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp index 3aed3bc..45023c8 100644 --- a/libc/src/math/generic/asinhf.cpp +++ b/libc/src/math/generic/asinhf.cpp @@ -7,112 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinhf.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/common_constants.h" -#include "src/math/generic/explogxf.h" +#include "src/__support/math/asinhf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { - using namespace acoshf_internal; - using FPBits_t = typename fputil::FPBits<float>; - FPBits_t xbits(x); - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = xbits.abs().uintval(); - - // |x| <= 2^-3 - if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) - ? x - : (x - 0x1.5555555555555p-3 * x * x * x)); - } - - double x_d = x; - double x_sq = x_d * x_d; - // Generated by Sollya with: - // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], - // [0, 2^-2]); - double p = fputil::polyeval( - x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, - -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, - 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); - return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); - } - - const double SIGN[2] = {1.0, -1.0}; - double x_sign = SIGN[x_u >> 31]; - double x_d = x; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Helper functions to set results for exceptional cases. - auto round_result_slightly_down = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * (-0x1.0p-24f)); - }; - auto round_result_slightly_up = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * 0x1.0p-24f); - }; - - if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits_t::quiet_nan().get_val(); - } - - return x; - } - - // Exceptional cases when x > 2^24. - switch (x_abs) { - case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f - return round_result_slightly_down(0x1.1e0696p4f); - case 0x4c803f2c: // |x| = 0x1.007e58p26f - return round_result_slightly_down(0x1.2b786cp4f); - case 0x4f8ffb03: // |x| = 0x1.1ff606p32f - return round_result_slightly_up(0x1.6fdd34p4f); - case 0x5c569e88: // |x| = 0x1.ad3d1p57f - return round_result_slightly_up(0x1.45c146p5f); - case 0x5e68984e: // |x| = 0x1.d1309cp61f - return round_result_slightly_up(0x1.5c9442p5f); - case 0x655890d3: // |x| = 0x1.b121a6p75f - return round_result_slightly_down(0x1.a9a3f2p5f); - case 0x65de7ca6: // |x| = 0x1.bcf94cp76f - return round_result_slightly_up(0x1.af66cp5f); - case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f - return round_result_slightly_down(0x1.08b512p6f); - case 0x7997f30a: // |x| = 0x1.2fe614p116f - return round_result_slightly_up(0x1.451436p6f); - } - } else { - // Exceptional cases when x < 2^24. - if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { - // |x| = 0x1.575e4cp12f - return round_result_slightly_down(0x1.29becap3f); - } - if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { - // |x| = 0x1.a5209p20f - return round_result_slightly_down(0x1.e1b92p3f); - } - } -#else - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) - return x; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // asinh(x) = log(x + sqrt(x^2 + 1)) - return static_cast<float>( - x_sign * log_eval(fputil::multiply_add( - x_d, x_sign, - fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); -} +LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h index f3dcdb9..a93ee25 100644 --- a/libc/src/math/generic/range_reduction_double_common.h +++ b/libc/src/math/generic/range_reduction_double_common.h @@ -278,7 +278,7 @@ private: }; #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static Float128 range_reduction_small_f128(double x) { +LIBC_INLINE static Float128 range_reduction_small_f128(double x) { constexpr Float128 PI_OVER_128_F128 = { Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128}; constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5; diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt index e879230..548938f 100644 --- a/libc/src/stdio/baremetal/CMakeLists.txt +++ b/libc/src/stdio/baremetal/CMakeLists.txt @@ -72,6 +72,7 @@ add_entrypoint_object( ../scanf.h DEPENDS .scanf_internal + libc.include.inttypes libc.src.stdio.scanf_core.scanf_main libc.src.__support.arg_list libc.src.__support.OSUtil.osutil diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index dee125c..561180c 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -35,6 +35,7 @@ add_header_library( core_structs.h DEPENDS .scanf_config + libc.include.inttypes libc.src.__support.CPP.string_view libc.src.__support.CPP.bitset libc.src.__support.FPUtil.fp_bits @@ -97,6 +98,7 @@ add_header_library( DEPENDS .reader .core_structs + libc.include.inttypes libc.src.__support.common libc.src.__support.ctype_utils libc.src.__support.CPP.bitset diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 43f44a9..49f4a1b 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -137,6 +137,21 @@ add_entrypoint_object( ) add_entrypoint_object( + mbsinit + SRCS + mbsinit.cpp + HDRS + mbsinit.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.wchar.character_converter + libc.src.__support.wchar.mbstate +) + +add_entrypoint_object( mbrtowc SRCS mbrtowc.cpp diff --git a/libc/src/wchar/mbsinit.cpp b/libc/src/wchar/mbsinit.cpp new file mode 100644 index 0000000..23ba542 --- /dev/null +++ b/libc/src/wchar/mbsinit.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of mbsinit -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsinit.h" + +#include "hdr/types/mbstate_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/character_converter.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, mbsinit, (mbstate_t * ps)) { + if (ps == nullptr) + return true; + internal::CharacterConverter cr(reinterpret_cast<internal::mbstate *>(ps)); + return cr.isValidState() && cr.isEmpty(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsinit.h b/libc/src/wchar/mbsinit.h new file mode 100644 index 0000000..fa6be0f --- /dev/null +++ b/libc/src/wchar/mbsinit.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbsinit ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSINIT_H +#define LLVM_LIBC_SRC_WCHAR_MBSINIT_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int mbsinit(mbstate_t *ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSINIT_H diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index e0218c7..55a3cee 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,13 +17,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// returns true if the character exists in the string -LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return true; - } - return false; +LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) { + for (; *s && *s != c; ++s) + ; + return (*s == c) ? s : nullptr; } // bool should be true for wcscspn for complimentary span @@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2, bool not_match_set) { size_t i = 0; for (; s1[i]; ++i) { - bool in_set = wcschr(s1[i], s2); + bool in_set = internal::wcschr(s2, s1[i]); if (in_set == not_match_set) return i; } diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp index defc2ce..8ac4916 100644 --- a/libc/src/wchar/wcschr.cpp +++ b/libc/src/wchar/wcschr.cpp @@ -11,15 +11,14 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) { - for (; *s && *s != c; ++s) - ; - if (*s == c) - return s; - return nullptr; + LIBC_CRASH_ON_NULLPTR(s); + return internal::wcschr(s, c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp index a00ba99..5d86a49 100644 --- a/libc/src/wchar/wcspbrk.cpp +++ b/libc/src/wchar/wcspbrk.cpp @@ -11,17 +11,10 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool contains_char(const wchar_t *str, wchar_t target) { - for (; *str != L'\0'; str++) - if (*str == target) - return true; - - return false; -} - LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, (const wchar_t *src, const wchar_t *breakset)) { LIBC_CRASH_ON_NULLPTR(src); @@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, // currently O(n * m), can be further optimized to O(n + m) with a hash set for (int src_idx = 0; src[src_idx] != 0; src_idx++) - if (contains_char(breakset, src[src_idx])) + if (internal::wcschr(breakset, src[src_idx])) return src + src_idx; return nullptr; diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 291efc1..ed4f0aa 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -10,18 +10,12 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { - for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) - if (wc == *delim_ptr) - return true; - return false; -} - LLVM_LIBC_FUNCTION(wchar_t *, wcstok, - (wchar_t *__restrict str, const wchar_t *__restrict delim, + (wchar_t *__restrict str, const wchar_t *__restrict delims, wchar_t **__restrict context)) { if (str == nullptr) { if (*context == nullptr) @@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, str = *context; } - wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); - ++tok_start) - ; + wchar_t *tok_start = str; + while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start)) + ++tok_start; - for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); - ++tok_end) - ; + wchar_t *tok_end = tok_start; + while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end)) + ++tok_end; if (*tok_end != L'\0') { *tok_end = L'\0'; diff --git a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp index 77479f8..4c540a8 100644 --- a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp +++ b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp @@ -14,6 +14,20 @@ using namespace LIBC_NAMESPACE; +static inline uint32_t entropy() { + return (static_cast<uint32_t>(gpu::processor_clock()) ^ + (gpu::get_thread_id_x() * 0x632be59b) ^ + (gpu::get_block_id_x() * 0x85157af5)) * + 0x9e3779bb; +} + +static inline uint32_t xorshift32(uint32_t &state) { + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; + return state * 0x9e3779bb; +} + static inline void use(uint8_t *ptr, uint32_t size) { EXPECT_NE(ptr, nullptr); for (int i = 0; i < size; ++i) @@ -34,5 +48,19 @@ TEST_MAIN(int, char **, char **) { for (int i = 0; i < 256; ++i) free(ptrs[i]); + + uint32_t state = entropy(); + for (int i = 0; i < 1024; ++i) { + if (xorshift32(state) % 2) { + uint64_t size = xorshift32(state) % 256 + 16; + uint64_t *ptr = reinterpret_cast<uint64_t *>(malloc(size)); + *ptr = gpu::get_thread_id(); + + EXPECT_EQ(*ptr, gpu::get_thread_id()); + ASSERT_TRUE(ptr); + ASSERT_TRUE(__builtin_is_aligned(ptr, 16)); + free(ptr); + } + } return 0; } diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index 89b607d..77f3617 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -12,6 +12,11 @@ add_fp_unittest( libc.src.__support.math.acosf16 libc.src.__support.math.acoshf libc.src.__support.math.acoshf16 + libc.src.__support.math.acospif16 + libc.src.__support.math.asin + libc.src.__support.math.asinf + libc.src.__support.math.asinf16 + libc.src.__support.math.asinhf libc.src.__support.math.erff libc.src.__support.math.exp libc.src.__support.math.exp10 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 8d3cebd..2e4450a 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -8,64 +8,73 @@ #include "shared/math.h" #include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" #ifdef LIBC_TYPES_HAS_FLOAT16 TEST(LlvmLibcSharedMathTest, AllFloat16) { int exponent; - EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf16(1.0f)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acoshf16(1.0f16)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acospif16(1.0f16)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16)); - ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(float(8), 5)); + ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5)); ASSERT_FP_EQ(float16(-1 * (8 << 5)), - LIBC_NAMESPACE::shared::ldexpf16(float(-8), 5)); + LIBC_NAMESPACE::shared::ldexpf16(-8.0f16, 5)); - EXPECT_FP_EQ_ALL_ROUNDING(0.75f16, - LIBC_NAMESPACE::shared::frexpf16(24.0f, &exponent)); + EXPECT_FP_EQ_ALL_ROUNDING( + 0.75f16, LIBC_NAMESPACE::shared::frexpf16(24.0f16, &exponent)); EXPECT_EQ(exponent, 5); EXPECT_FP_EQ(0x1.921fb6p+0f16, LIBC_NAMESPACE::shared::acosf16(0.0f16)); } -#endif +#endif // LIBC_TYPES_HAS_FLOAT16 TEST(LlvmLibcSharedMathTest, AllFloat) { int exponent; EXPECT_FP_EQ(0x1.921fb6p+0, LIBC_NAMESPACE::shared::acosf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinhf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f)); - EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f)); - EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f)); EXPECT_FP_EQ_ALL_ROUNDING(0.75f, LIBC_NAMESPACE::shared::frexpf(24.0f, &exponent)); EXPECT_EQ(exponent, 5); - ASSERT_FP_EQ(float(8 << 5), LIBC_NAMESPACE::shared::ldexpf(float(8), 5)); - ASSERT_FP_EQ(float(-1 * (8 << 5)), - LIBC_NAMESPACE::shared::ldexpf(float(-8), 5)); + ASSERT_FP_EQ(float(8 << 5), LIBC_NAMESPACE::shared::ldexpf(8.0f, 5)); + ASSERT_FP_EQ(float(-1 * (8 << 5)), LIBC_NAMESPACE::shared::ldexpf(-8.0f, 5)); } TEST(LlvmLibcSharedMathTest, AllDouble) { EXPECT_FP_EQ(0x1.921fb54442d18p+0, LIBC_NAMESPACE::shared::acos(0.0)); + EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::asin(0.0)); EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp(0.0)); EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0)); } +#ifdef LIBC_TYPES_HAS_FLOAT128 + TEST(LlvmLibcSharedMathTest, AllFloat128) { int exponent; - EXPECT_FP_EQ_ALL_ROUNDING( - float128(0.75), LIBC_NAMESPACE::shared::frexpf128(24.0f, &exponent)); + EXPECT_FP_EQ_ALL_ROUNDING(float128(0.75), LIBC_NAMESPACE::shared::frexpf128( + float128(24), &exponent)); EXPECT_EQ(exponent, 5); ASSERT_FP_EQ(float128(8 << 5), - LIBC_NAMESPACE::shared::ldexpf128(float(8), 5)); + LIBC_NAMESPACE::shared::ldexpf128(float128(8), 5)); ASSERT_FP_EQ(float128(-1 * (8 << 5)), - LIBC_NAMESPACE::shared::ldexpf128(float(-8), 5)); + LIBC_NAMESPACE::shared::ldexpf128(float128(-8), 5)); } + +#endif // LIBC_TYPES_HAS_FLOAT128 diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index f420ecc..fad89dc 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -99,7 +99,22 @@ add_libc_test( libc.src.string.memset libc.src.wchar.mbrlen libc.hdr.types.mbstate_t + libc.hdr.types.wchar_t libc.test.UnitTest.ErrnoCheckingTest +) + +add_libc_test( + mbsinit_test + SUITE + libc_wchar_unittests + SRCS + mbsinit_test.cpp + DEPENDS + libc.src.string.memset + libc.src.wchar.mbsinit + libc.src.wchar.mbrtowc + libc.hdr.types.mbstate_t + libc.hdr.types.wchar_t ) add_libc_test( diff --git a/libc/test/src/wchar/mbsinit_test.cpp b/libc/test/src/wchar/mbsinit_test.cpp new file mode 100644 index 0000000..ecb48aa --- /dev/null +++ b/libc/test/src/wchar/mbsinit_test.cpp @@ -0,0 +1,33 @@ +//===-- Unittests for mbsinit ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/wchar_t.h" +#include "src/string/memset.h" +#include "src/wchar/mbrtowc.h" +#include "src/wchar/mbsinit.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcMBSInitTest, EmptyState) { + mbstate_t ps; + LIBC_NAMESPACE::memset(&ps, 0, sizeof(mbstate_t)); + ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0); + ASSERT_NE(LIBC_NAMESPACE::mbsinit(nullptr), 0); +} + +TEST(LlvmLibcMBSInitTest, ConversionTest) { + const char *src = "\xf0\x9f\xa4\xa3"; // 4 byte emoji + wchar_t dest[2]; + mbstate_t ps; + LIBC_NAMESPACE::memset(&ps, 0, sizeof(mbstate_t)); + + ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0); + LIBC_NAMESPACE::mbrtowc(dest, src, 2, &ps); // partial conversion + ASSERT_EQ(LIBC_NAMESPACE::mbsinit(&ps), 0); + LIBC_NAMESPACE::mbrtowc(dest, src + 2, 2, &ps); // complete conversion + ASSERT_NE(LIBC_NAMESPACE::mbsinit(&ps), 0); // state should be reset now +} |