diff options
Diffstat (limited to 'libc/src')
-rw-r--r-- | libc/src/__support/GPU/allocator.cpp | 134 | ||||
-rw-r--r-- | libc/src/__support/math/CMakeLists.txt | 14 | ||||
-rw-r--r-- | libc/src/__support/math/asinhf.h | 125 | ||||
-rw-r--r-- | libc/src/math/generic/CMakeLists.txt | 7 | ||||
-rw-r--r-- | libc/src/math/generic/asinhf.cpp | 106 | ||||
-rw-r--r-- | libc/src/stdio/baremetal/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/src/stdio/scanf_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | libc/src/wchar/wchar_utils.h | 13 | ||||
-rw-r--r-- | libc/src/wchar/wcschr.cpp | 9 | ||||
-rw-r--r-- | libc/src/wchar/wcspbrk.cpp | 11 | ||||
-rw-r--r-- | libc/src/wchar/wcstok.cpp | 23 |
11 files changed, 246 insertions, 199 deletions
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 866aea7..2b78c4d 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -16,6 +16,7 @@ #include "allocator.h" +#include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/new.h" @@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024; constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE; constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1; constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8; +constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8; constexpr static uint32_t MIN_SIZE = 16; constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1; // The number of times to attempt claiming an in-progress slab allocation. -constexpr static uint32_t MAX_TRIES = 128; - -// A sentinel used to indicate an invalid but non-null pointer value. -constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max(); +constexpr static uint32_t MAX_TRIES = 1024; static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); @@ -70,8 +69,8 @@ static void rpc_free(void *ptr) { // Convert a potentially disjoint bitmask into an increasing integer per-lane // for use with indexing between gpu lanes. -static inline uint32_t lane_count(uint64_t lane_mask) { - return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1)); +static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) { + return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1)); } // Obtain an initial value to seed a random number generator. We use the rounded @@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) { void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) { uint64_t mask = gpu::get_lane_mask(); uint32_t workers = cpp::popcount(uniform); - for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers) + for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n; + i += workers) s[i] = c; } @@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) { return x && (x & (x - 1)) == 0; } -// Where this chunk size should start looking in the global array. -static inline constexpr uint32_t start_index(uint32_t chunk_index) { - return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) / - impl::get_chunk_id(SLAB_SIZE / 2); +// Where this chunk size should start looking in the global array. Small +// allocations are much more likely than large ones, so we give them the most +// space. We use a cubic easing function normalized on the possible chunks. +static inline constexpr uint32_t get_start_index(uint32_t chunk_size) { + constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2); + uint64_t norm = + (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk; + uint64_t bias = (norm * norm * norm) >> 32; + uint64_t inv = (1 << 16) - bias; + return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16); +} + +// Returns the id of the lane below this one that acts as its leader. +static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) { + uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0; + return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1; +} + +// We use a sentinal value to indicate a failed or in-progress allocation. +template <typename T> bool is_sentinel(const T &x) { + return x == cpp::numeric_limits<T>::max(); } } // namespace impl @@ -264,28 +281,33 @@ struct Slab { continue; // We try using any known empty bits from the previous attempt first. - uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask), - ~after ? (old_index & ~(BITS_IN_WORD - 1)) + - cpp::countr_zero(~after) - : impl::xorshift32(state)); + uint32_t start = gpu::shuffle( + mask, cpp::countr_zero(uniform & mask), + ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after) + : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD)); - uint32_t id = impl::lane_count(uniform & mask); + // Each lane tries to claim one bit in a single contiguous mask. + uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id()); uint32_t index = (start + id) % usable_bits(chunk_size); uint32_t slot = index / BITS_IN_WORD; uint32_t bit = index % BITS_IN_WORD; // Get the mask of bits destined for the same slot and coalesce it. - uint64_t match = uniform & gpu::match_any(mask, slot); - uint32_t length = cpp::popcount(match); - uint32_t bitmask = gpu::shuffle( - mask, cpp::countr_zero(match), - static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit); + uint32_t leader = impl::get_leader_id( + uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0), + gpu::get_lane_id()); + uint32_t length = cpp::popcount(uniform & mask) - + impl::lane_count(uniform & mask, leader); + uint32_t bitmask = + static_cast<uint32_t>( + (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1) + << bit; uint32_t before = 0; - if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match))) + if (gpu::get_lane_id() == leader) before = cpp::AtomicRef(get_bitfield()[slot]) .fetch_or(bitmask, cpp::MemoryOrder::RELAXED); - before = gpu::shuffle(mask, cpp::countr_zero(match), before); + before = gpu::shuffle(mask, leader, before); if (~before & (1 << bit)) result = ptr_from_index(index, chunk_size); else @@ -323,20 +345,20 @@ struct GuardPtr { private: struct RefCounter { // Indicates that the object is in its deallocation phase and thus invalid. - static constexpr uint64_t INVALID = uint64_t(1) << 63; + static constexpr uint32_t INVALID = uint32_t(1) << 31; // If a read preempts an unlock call we indicate this so the following // unlock call can swap out the helped bit and maintain exclusive ownership. - static constexpr uint64_t HELPED = uint64_t(1) << 62; + static constexpr uint32_t HELPED = uint32_t(1) << 30; // Resets the reference counter, cannot be reset to zero safely. - void reset(uint32_t n, uint64_t &count) { + void reset(uint32_t n, uint32_t &count) { counter.store(n, cpp::MemoryOrder::RELAXED); count = n; } // Acquire a slot in the reference counter if it is not invalid. - bool acquire(uint32_t n, uint64_t &count) { + bool acquire(uint32_t n, uint32_t &count) { count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n; return (count & INVALID) == 0; } @@ -349,7 +371,7 @@ private: // another thread resurrected the counter and we quit, or a parallel read // helped us invalidating it. For the latter, claim that flag and return. if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) { - uint64_t expected = 0; + uint32_t expected = 0; if (counter.compare_exchange_strong(expected, INVALID, cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) @@ -372,28 +394,29 @@ private: return (val & INVALID) ? 0 : val; } - cpp::Atomic<uint64_t> counter{0}; + cpp::Atomic<uint32_t> counter{0}; }; - cpp::Atomic<Slab *> ptr{nullptr}; - RefCounter ref{}; + cpp::Atomic<Slab *> ptr; + RefCounter ref; // Should be called be a single lane for each different pointer. template <typename... Args> - Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) { + Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) { Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED); if (!expected && ptr.compare_exchange_strong( - expected, reinterpret_cast<Slab *>(SENTINEL), + expected, + reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()), cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) { - count = cpp::numeric_limits<uint64_t>::max(); + count = cpp::numeric_limits<uint32_t>::max(); void *raw = impl::rpc_allocate(sizeof(Slab)); if (!raw) return nullptr; return new (raw) Slab(cpp::forward<Args>(args)...); } - if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL)) + if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected))) return nullptr; if (!ref.acquire(n, count)) @@ -405,7 +428,7 @@ private: // Finalize the associated memory and signal that it is ready to use by // resetting the counter. - void finalize(Slab *mem, uint32_t n, uint64_t &count) { + void finalize(Slab *mem, uint32_t n, uint32_t &count) { cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE); ptr.store(mem, cpp::MemoryOrder::RELAXED); cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); @@ -418,7 +441,7 @@ public: // The uniform mask represents which lanes share the same pointer. For each // uniform value we elect a leader to handle it on behalf of the other lanes. template <typename... Args> - Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count, + Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count, Args &&...args) { count = 0; Slab *result = nullptr; @@ -433,14 +456,15 @@ public: // We defer storing the newly allocated slab until now so that we can use // multiple lanes to initialize it and release it for use. - if (count == cpp::numeric_limits<uint64_t>::max()) { + if (impl::is_sentinel(count)) { result->initialize(uniform); if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform))) finalize(result, cpp::popcount(uniform), count); } - if (count != cpp::numeric_limits<uint64_t>::max()) - count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1; + if (!impl::is_sentinel(count)) + count = count - cpp::popcount(uniform) + + impl::lane_count(uniform, gpu::get_lane_id()) + 1; return result; } @@ -469,7 +493,7 @@ static GuardPtr slots[ARRAY_SIZE] = {}; // Keep a cache of the last successful slot for each chunk size. Initialize it // to an even spread of the total size. Must be updated if the chunking scheme // changes. -#define S(X) (impl::start_index(X)) +#define S(X) (impl::get_start_index(X)) static cpp::Atomic<uint32_t> indices[] = { S(16), S(32), S(48), S(64), S(96), S(112), S(128), S(192), S(224), S(256), S(384), S(448), S(512), S(768), @@ -481,26 +505,28 @@ static cpp::Atomic<uint32_t> indices[] = { #undef S // Tries to find a slab in the table that can support the given chunk size. -static Slab *find_slab(uint32_t chunk_size) { +static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { // We start at the index of the last successful allocation for this kind. uint32_t chunk_id = impl::get_chunk_id(chunk_size); uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED); - uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); - for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) { + for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) { uint32_t index = - !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE; + !offset ? start + : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE; - if (slots[index].use_count() < Slab::available_chunks(chunk_size)) { + if (!offset || + slots[index].use_count() < Slab::available_chunks(chunk_size)) { uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t reserved = 0; + uint32_t reserved = 0; Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); // If there is a slab allocation in progress we retry a few times. for (uint32_t retries = 0; - retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) { + !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES; + retries++) { uint64_t lane_mask = gpu::get_lane_mask(); slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); @@ -514,13 +540,17 @@ static Slab *find_slab(uint32_t chunk_size) { slab->get_chunk_size() == chunk_size) { if (index != start) indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED); + uniform = uniform & gpu::get_lane_mask(); return slab; } else if (slab && (reserved > Slab::available_chunks(chunk_size) || slab->get_chunk_size() != chunk_size)) { slots[index].unlock(gpu::get_lane_mask(), gpu::get_lane_mask() & uniform); - } else if (!slab && reserved == SENTINEL) { + } else if (!slab && impl::is_sentinel(reserved)) { + uniform = uniform & gpu::get_lane_mask(); return nullptr; + } else { + sleep_briefly(); } } } @@ -547,12 +577,12 @@ void *allocate(uint64_t size) { // Try to find a slab for the rounded up chunk size and allocate from it. uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size)); - Slab *slab = find_slab(chunk_size); - if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL)) + uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); + Slab *slab = find_slab(chunk_size, uniform); + if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab))) return nullptr; uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index()); void *ptr = slab->allocate(lane_mask, uniform); return ptr; } diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 1050938..13f46a1 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -141,6 +141,20 @@ add_header_library( ) add_header_library( + asinhf + HDRS + asinhf.h + DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h new file mode 100644 index 0000000..1c08a6e --- /dev/null +++ b/libc/src/__support/math/asinhf.h @@ -0,0 +1,125 @@ +//===-- Implementation header for asinf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float asinhf(float x) { + using namespace acoshf_internal; + using FPBits_t = typename fputil::FPBits<float>; + FPBits_t xbits(x); + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = xbits.abs().uintval(); + + // |x| <= 2^-3 + if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x - 0x1.5555555555555p-3 * x * x * x)); + } + + double x_d = x; + double x_sq = x_d * x_d; + // Generated by Sollya with: + // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], + // [0, 2^-2]); + double p = fputil::polyeval( + x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, + -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, + 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); + return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); + } + + const double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[x_u >> 31]; + double x_d = x; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Helper functions to set results for exceptional cases. + auto round_result_slightly_down = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * (-0x1.0p-24f)); + }; + auto round_result_slightly_up = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * 0x1.0p-24f); + }; + + if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits_t::quiet_nan().get_val(); + } + + return x; + } + + // Exceptional cases when x > 2^24. + switch (x_abs) { + case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f + return round_result_slightly_down(0x1.1e0696p4f); + case 0x4c803f2c: // |x| = 0x1.007e58p26f + return round_result_slightly_down(0x1.2b786cp4f); + case 0x4f8ffb03: // |x| = 0x1.1ff606p32f + return round_result_slightly_up(0x1.6fdd34p4f); + case 0x5c569e88: // |x| = 0x1.ad3d1p57f + return round_result_slightly_up(0x1.45c146p5f); + case 0x5e68984e: // |x| = 0x1.d1309cp61f + return round_result_slightly_up(0x1.5c9442p5f); + case 0x655890d3: // |x| = 0x1.b121a6p75f + return round_result_slightly_down(0x1.a9a3f2p5f); + case 0x65de7ca6: // |x| = 0x1.bcf94cp76f + return round_result_slightly_up(0x1.af66cp5f); + case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f + return round_result_slightly_down(0x1.08b512p6f); + case 0x7997f30a: // |x| = 0x1.2fe614p116f + return round_result_slightly_up(0x1.451436p6f); + } + } else { + // Exceptional cases when x < 2^24. + if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { + // |x| = 0x1.575e4cp12f + return round_result_slightly_down(0x1.29becap3f); + } + if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { + // |x| = 0x1.a5209p20f + return round_result_slightly_down(0x1.e1b92p3f); + } + } +#else + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) + return x; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // asinh(x) = log(x + sqrt(x^2 + 1)) + return static_cast<float>( + x_sign * log_eval(fputil::multiply_add( + x_d, x_sign, + fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d4d268c..f91feacb 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3889,12 +3889,7 @@ add_entrypoint_object( HDRS ../asinhf.h DEPENDS - .explogxf - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization + libc.src.__support.math.asinhf ) add_entrypoint_object( diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp index 3aed3bc..45023c8 100644 --- a/libc/src/math/generic/asinhf.cpp +++ b/libc/src/math/generic/asinhf.cpp @@ -7,112 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinhf.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/common_constants.h" -#include "src/math/generic/explogxf.h" +#include "src/__support/math/asinhf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { - using namespace acoshf_internal; - using FPBits_t = typename fputil::FPBits<float>; - FPBits_t xbits(x); - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = xbits.abs().uintval(); - - // |x| <= 2^-3 - if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) - ? x - : (x - 0x1.5555555555555p-3 * x * x * x)); - } - - double x_d = x; - double x_sq = x_d * x_d; - // Generated by Sollya with: - // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], - // [0, 2^-2]); - double p = fputil::polyeval( - x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, - -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, - 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); - return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); - } - - const double SIGN[2] = {1.0, -1.0}; - double x_sign = SIGN[x_u >> 31]; - double x_d = x; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Helper functions to set results for exceptional cases. - auto round_result_slightly_down = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * (-0x1.0p-24f)); - }; - auto round_result_slightly_up = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * 0x1.0p-24f); - }; - - if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits_t::quiet_nan().get_val(); - } - - return x; - } - - // Exceptional cases when x > 2^24. - switch (x_abs) { - case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f - return round_result_slightly_down(0x1.1e0696p4f); - case 0x4c803f2c: // |x| = 0x1.007e58p26f - return round_result_slightly_down(0x1.2b786cp4f); - case 0x4f8ffb03: // |x| = 0x1.1ff606p32f - return round_result_slightly_up(0x1.6fdd34p4f); - case 0x5c569e88: // |x| = 0x1.ad3d1p57f - return round_result_slightly_up(0x1.45c146p5f); - case 0x5e68984e: // |x| = 0x1.d1309cp61f - return round_result_slightly_up(0x1.5c9442p5f); - case 0x655890d3: // |x| = 0x1.b121a6p75f - return round_result_slightly_down(0x1.a9a3f2p5f); - case 0x65de7ca6: // |x| = 0x1.bcf94cp76f - return round_result_slightly_up(0x1.af66cp5f); - case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f - return round_result_slightly_down(0x1.08b512p6f); - case 0x7997f30a: // |x| = 0x1.2fe614p116f - return round_result_slightly_up(0x1.451436p6f); - } - } else { - // Exceptional cases when x < 2^24. - if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { - // |x| = 0x1.575e4cp12f - return round_result_slightly_down(0x1.29becap3f); - } - if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { - // |x| = 0x1.a5209p20f - return round_result_slightly_down(0x1.e1b92p3f); - } - } -#else - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) - return x; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // asinh(x) = log(x + sqrt(x^2 + 1)) - return static_cast<float>( - x_sign * log_eval(fputil::multiply_add( - x_d, x_sign, - fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); -} +LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt index e879230..548938f 100644 --- a/libc/src/stdio/baremetal/CMakeLists.txt +++ b/libc/src/stdio/baremetal/CMakeLists.txt @@ -72,6 +72,7 @@ add_entrypoint_object( ../scanf.h DEPENDS .scanf_internal + libc.include.inttypes libc.src.stdio.scanf_core.scanf_main libc.src.__support.arg_list libc.src.__support.OSUtil.osutil diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index dee125c..561180c 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -35,6 +35,7 @@ add_header_library( core_structs.h DEPENDS .scanf_config + libc.include.inttypes libc.src.__support.CPP.string_view libc.src.__support.CPP.bitset libc.src.__support.FPUtil.fp_bits @@ -97,6 +98,7 @@ add_header_library( DEPENDS .reader .core_structs + libc.include.inttypes libc.src.__support.common libc.src.__support.ctype_utils libc.src.__support.CPP.bitset diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index e0218c7..55a3cee 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,13 +17,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// returns true if the character exists in the string -LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return true; - } - return false; +LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) { + for (; *s && *s != c; ++s) + ; + return (*s == c) ? s : nullptr; } // bool should be true for wcscspn for complimentary span @@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2, bool not_match_set) { size_t i = 0; for (; s1[i]; ++i) { - bool in_set = wcschr(s1[i], s2); + bool in_set = internal::wcschr(s2, s1[i]); if (in_set == not_match_set) return i; } diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp index defc2ce..8ac4916 100644 --- a/libc/src/wchar/wcschr.cpp +++ b/libc/src/wchar/wcschr.cpp @@ -11,15 +11,14 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) { - for (; *s && *s != c; ++s) - ; - if (*s == c) - return s; - return nullptr; + LIBC_CRASH_ON_NULLPTR(s); + return internal::wcschr(s, c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp index a00ba99..5d86a49 100644 --- a/libc/src/wchar/wcspbrk.cpp +++ b/libc/src/wchar/wcspbrk.cpp @@ -11,17 +11,10 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool contains_char(const wchar_t *str, wchar_t target) { - for (; *str != L'\0'; str++) - if (*str == target) - return true; - - return false; -} - LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, (const wchar_t *src, const wchar_t *breakset)) { LIBC_CRASH_ON_NULLPTR(src); @@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, // currently O(n * m), can be further optimized to O(n + m) with a hash set for (int src_idx = 0; src[src_idx] != 0; src_idx++) - if (contains_char(breakset, src[src_idx])) + if (internal::wcschr(breakset, src[src_idx])) return src + src_idx; return nullptr; diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 291efc1..ed4f0aa 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -10,18 +10,12 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { - for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) - if (wc == *delim_ptr) - return true; - return false; -} - LLVM_LIBC_FUNCTION(wchar_t *, wcstok, - (wchar_t *__restrict str, const wchar_t *__restrict delim, + (wchar_t *__restrict str, const wchar_t *__restrict delims, wchar_t **__restrict context)) { if (str == nullptr) { if (*context == nullptr) @@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, str = *context; } - wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); - ++tok_start) - ; + wchar_t *tok_start = str; + while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start)) + ++tok_start; - for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); - ++tok_end) - ; + wchar_t *tok_end = tok_start; + while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end)) + ++tok_end; if (*tok_end != L'\0') { *tok_end = L'\0'; |