diff options
Diffstat (limited to 'libc/src')
41 files changed, 1470 insertions, 629 deletions
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h index e6fad1b..e999ece 100644 --- a/libc/src/__support/FPUtil/cast.h +++ b/libc/src/__support/FPUtil/cast.h @@ -66,9 +66,9 @@ cast(InType x) { cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN); DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x); return xd.template as<OutType, /*ShouldSignalExceptions=*/true>(); + } else { + return static_cast<OutType>(x); } - - return static_cast<OutType>(x); } } // namespace LIBC_NAMESPACE::fputil diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 866aea7..8fff4cc 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -16,6 +16,7 @@ #include "allocator.h" +#include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/new.h" @@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024; constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE; constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1; constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8; +constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8; constexpr static uint32_t MIN_SIZE = 16; constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1; // The number of times to attempt claiming an in-progress slab allocation. -constexpr static uint32_t MAX_TRIES = 128; - -// A sentinel used to indicate an invalid but non-null pointer value. -constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max(); +constexpr static uint32_t MAX_TRIES = 1024; static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); @@ -70,8 +69,8 @@ static void rpc_free(void *ptr) { // Convert a potentially disjoint bitmask into an increasing integer per-lane // for use with indexing between gpu lanes. -static inline uint32_t lane_count(uint64_t lane_mask) { - return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1)); +static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) { + return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1)); } // Obtain an initial value to seed a random number generator. We use the rounded @@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) { void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) { uint64_t mask = gpu::get_lane_mask(); uint32_t workers = cpp::popcount(uniform); - for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers) + for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n; + i += workers) s[i] = c; } @@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) { return x && (x & (x - 1)) == 0; } -// Where this chunk size should start looking in the global array. -static inline constexpr uint32_t start_index(uint32_t chunk_index) { - return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) / - impl::get_chunk_id(SLAB_SIZE / 2); +// Where this chunk size should start looking in the global array. Small +// allocations are much more likely than large ones, so we give them the most +// space. We use a cubic easing function normalized on the possible chunks. +static inline constexpr uint32_t get_start_index(uint32_t chunk_size) { + constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2); + uint64_t norm = + (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk; + uint64_t bias = (norm * norm * norm) >> 32; + uint64_t inv = (1 << 16) - bias; + return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16); +} + +// Returns the id of the lane below this one that acts as its leader. +static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) { + uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0; + return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1; +} + +// We use a sentinal value to indicate a failed or in-progress allocation. +template <typename T> bool is_sentinel(const T &x) { + return x == cpp::numeric_limits<T>::max(); } } // namespace impl @@ -264,28 +281,33 @@ struct Slab { continue; // We try using any known empty bits from the previous attempt first. - uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask), - ~after ? (old_index & ~(BITS_IN_WORD - 1)) + - cpp::countr_zero(~after) - : impl::xorshift32(state)); + uint32_t start = gpu::shuffle( + mask, cpp::countr_zero(uniform & mask), + ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after) + : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD)); - uint32_t id = impl::lane_count(uniform & mask); + // Each lane tries to claim one bit in a single contiguous mask. + uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id()); uint32_t index = (start + id) % usable_bits(chunk_size); uint32_t slot = index / BITS_IN_WORD; uint32_t bit = index % BITS_IN_WORD; // Get the mask of bits destined for the same slot and coalesce it. - uint64_t match = uniform & gpu::match_any(mask, slot); - uint32_t length = cpp::popcount(match); - uint32_t bitmask = gpu::shuffle( - mask, cpp::countr_zero(match), - static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit); + uint32_t leader = impl::get_leader_id( + uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0), + gpu::get_lane_id()); + uint32_t length = cpp::popcount(uniform & mask) - + impl::lane_count(uniform & mask, leader); + uint32_t bitmask = + static_cast<uint32_t>( + (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1) + << bit; uint32_t before = 0; - if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match))) + if (gpu::get_lane_id() == leader) before = cpp::AtomicRef(get_bitfield()[slot]) .fetch_or(bitmask, cpp::MemoryOrder::RELAXED); - before = gpu::shuffle(mask, cpp::countr_zero(match), before); + before = gpu::shuffle(mask, leader, before); if (~before & (1 << bit)) result = ptr_from_index(index, chunk_size); else @@ -323,20 +345,20 @@ struct GuardPtr { private: struct RefCounter { // Indicates that the object is in its deallocation phase and thus invalid. - static constexpr uint64_t INVALID = uint64_t(1) << 63; + static constexpr uint32_t INVALID = uint32_t(1) << 31; // If a read preempts an unlock call we indicate this so the following // unlock call can swap out the helped bit and maintain exclusive ownership. - static constexpr uint64_t HELPED = uint64_t(1) << 62; + static constexpr uint32_t HELPED = uint32_t(1) << 30; // Resets the reference counter, cannot be reset to zero safely. - void reset(uint32_t n, uint64_t &count) { + void reset(uint32_t n, uint32_t &count) { counter.store(n, cpp::MemoryOrder::RELAXED); count = n; } // Acquire a slot in the reference counter if it is not invalid. - bool acquire(uint32_t n, uint64_t &count) { + bool acquire(uint32_t n, uint32_t &count) { count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n; return (count & INVALID) == 0; } @@ -349,7 +371,7 @@ private: // another thread resurrected the counter and we quit, or a parallel read // helped us invalidating it. For the latter, claim that flag and return. if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) { - uint64_t expected = 0; + uint32_t expected = 0; if (counter.compare_exchange_strong(expected, INVALID, cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) @@ -372,28 +394,29 @@ private: return (val & INVALID) ? 0 : val; } - cpp::Atomic<uint64_t> counter{0}; + cpp::Atomic<uint32_t> counter{0}; }; - cpp::Atomic<Slab *> ptr{nullptr}; - RefCounter ref{}; + cpp::Atomic<Slab *> ptr; + RefCounter ref; // Should be called be a single lane for each different pointer. template <typename... Args> - Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) { + Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) { Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED); if (!expected && ptr.compare_exchange_strong( - expected, reinterpret_cast<Slab *>(SENTINEL), + expected, + reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()), cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) { - count = cpp::numeric_limits<uint64_t>::max(); + count = cpp::numeric_limits<uint32_t>::max(); void *raw = impl::rpc_allocate(sizeof(Slab)); if (!raw) return nullptr; return new (raw) Slab(cpp::forward<Args>(args)...); } - if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL)) + if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected))) return nullptr; if (!ref.acquire(n, count)) @@ -405,7 +428,7 @@ private: // Finalize the associated memory and signal that it is ready to use by // resetting the counter. - void finalize(Slab *mem, uint32_t n, uint64_t &count) { + void finalize(Slab *mem, uint32_t n, uint32_t &count) { cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE); ptr.store(mem, cpp::MemoryOrder::RELAXED); cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); @@ -418,7 +441,7 @@ public: // The uniform mask represents which lanes share the same pointer. For each // uniform value we elect a leader to handle it on behalf of the other lanes. template <typename... Args> - Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count, + Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count, Args &&...args) { count = 0; Slab *result = nullptr; @@ -433,14 +456,17 @@ public: // We defer storing the newly allocated slab until now so that we can use // multiple lanes to initialize it and release it for use. - if (count == cpp::numeric_limits<uint64_t>::max()) { + if (impl::is_sentinel(count)) { result->initialize(uniform); if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform))) finalize(result, cpp::popcount(uniform), count); + count = + gpu::shuffle(gpu::get_lane_mask(), cpp::countr_zero(uniform), count); } - if (count != cpp::numeric_limits<uint64_t>::max()) - count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1; + if (!impl::is_sentinel(count)) + count = count - cpp::popcount(uniform) + + impl::lane_count(uniform, gpu::get_lane_id()); return result; } @@ -469,7 +495,7 @@ static GuardPtr slots[ARRAY_SIZE] = {}; // Keep a cache of the last successful slot for each chunk size. Initialize it // to an even spread of the total size. Must be updated if the chunking scheme // changes. -#define S(X) (impl::start_index(X)) +#define S(X) (impl::get_start_index(X)) static cpp::Atomic<uint32_t> indices[] = { S(16), S(32), S(48), S(64), S(96), S(112), S(128), S(192), S(224), S(256), S(384), S(448), S(512), S(768), @@ -481,26 +507,28 @@ static cpp::Atomic<uint32_t> indices[] = { #undef S // Tries to find a slab in the table that can support the given chunk size. -static Slab *find_slab(uint32_t chunk_size) { +static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { // We start at the index of the last successful allocation for this kind. uint32_t chunk_id = impl::get_chunk_id(chunk_size); uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED); - uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); - for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) { + for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) { uint32_t index = - !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE; + !offset ? start + : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE; - if (slots[index].use_count() < Slab::available_chunks(chunk_size)) { + if (!offset || + slots[index].use_count() < Slab::available_chunks(chunk_size)) { uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t reserved = 0; + uint32_t reserved = 0; Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); // If there is a slab allocation in progress we retry a few times. for (uint32_t retries = 0; - retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) { + !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES; + retries++) { uint64_t lane_mask = gpu::get_lane_mask(); slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); @@ -510,17 +538,21 @@ static Slab *find_slab(uint32_t chunk_size) { // If we find a slab with a matching chunk size then we store the result. // Otherwise, we need to free the claimed lock and continue. In the case // of out-of-memory we receive a sentinel value and return a failure. - if (slab && reserved <= Slab::available_chunks(chunk_size) && + if (slab && reserved < Slab::available_chunks(chunk_size) && slab->get_chunk_size() == chunk_size) { if (index != start) indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED); + uniform = uniform & gpu::get_lane_mask(); return slab; - } else if (slab && (reserved > Slab::available_chunks(chunk_size) || + } else if (slab && (reserved >= Slab::available_chunks(chunk_size) || slab->get_chunk_size() != chunk_size)) { slots[index].unlock(gpu::get_lane_mask(), gpu::get_lane_mask() & uniform); - } else if (!slab && reserved == SENTINEL) { + } else if (!slab && impl::is_sentinel(reserved)) { + uniform = uniform & gpu::get_lane_mask(); return nullptr; + } else { + sleep_briefly(); } } } @@ -547,12 +579,12 @@ void *allocate(uint64_t size) { // Try to find a slab for the rounded up chunk size and allocate from it. uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size)); - Slab *slab = find_slab(chunk_size); - if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL)) + uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); + Slab *slab = find_slab(chunk_size, uniform); + if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab))) return nullptr; uint64_t lane_mask = gpu::get_lane_mask(); - uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index()); void *ptr = slab->allocate(lane_mask, uniform); return ptr; } diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 1050938..95acc962 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -141,6 +141,80 @@ add_header_library( ) add_header_library( + asinhf + HDRS + asinhf.h + DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( + asinhf16 + HDRS + asinhf16.h +DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( + atan_utils + HDRS + atan_utils.h +DEPENDS + libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.optimization +) + +add_header_library( + atan + HDRS + atan.h +DEPENDS + .atan_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.macros.optimization +) + +add_header_library( + atanf + HDRS + atanf.h + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h new file mode 100644 index 0000000..1c08a6e --- /dev/null +++ b/libc/src/__support/math/asinhf.h @@ -0,0 +1,125 @@ +//===-- Implementation header for asinf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float asinhf(float x) { + using namespace acoshf_internal; + using FPBits_t = typename fputil::FPBits<float>; + FPBits_t xbits(x); + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = xbits.abs().uintval(); + + // |x| <= 2^-3 + if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x - 0x1.5555555555555p-3 * x * x * x)); + } + + double x_d = x; + double x_sq = x_d * x_d; + // Generated by Sollya with: + // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], + // [0, 2^-2]); + double p = fputil::polyeval( + x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, + -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, + 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); + return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); + } + + const double SIGN[2] = {1.0, -1.0}; + double x_sign = SIGN[x_u >> 31]; + double x_d = x; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Helper functions to set results for exceptional cases. + auto round_result_slightly_down = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * (-0x1.0p-24f)); + }; + auto round_result_slightly_up = [x_sign](float r) -> float { + return fputil::multiply_add(static_cast<float>(x_sign), r, + static_cast<float>(x_sign) * 0x1.0p-24f); + }; + + if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits_t::quiet_nan().get_val(); + } + + return x; + } + + // Exceptional cases when x > 2^24. + switch (x_abs) { + case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f + return round_result_slightly_down(0x1.1e0696p4f); + case 0x4c803f2c: // |x| = 0x1.007e58p26f + return round_result_slightly_down(0x1.2b786cp4f); + case 0x4f8ffb03: // |x| = 0x1.1ff606p32f + return round_result_slightly_up(0x1.6fdd34p4f); + case 0x5c569e88: // |x| = 0x1.ad3d1p57f + return round_result_slightly_up(0x1.45c146p5f); + case 0x5e68984e: // |x| = 0x1.d1309cp61f + return round_result_slightly_up(0x1.5c9442p5f); + case 0x655890d3: // |x| = 0x1.b121a6p75f + return round_result_slightly_down(0x1.a9a3f2p5f); + case 0x65de7ca6: // |x| = 0x1.bcf94cp76f + return round_result_slightly_up(0x1.af66cp5f); + case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f + return round_result_slightly_down(0x1.08b512p6f); + case 0x7997f30a: // |x| = 0x1.2fe614p116f + return round_result_slightly_up(0x1.451436p6f); + } + } else { + // Exceptional cases when x < 2^24. + if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { + // |x| = 0x1.575e4cp12f + return round_result_slightly_down(0x1.29becap3f); + } + if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { + // |x| = 0x1.a5209p20f + return round_result_slightly_down(0x1.e1b92p3f); + } + } +#else + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) + return x; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // asinh(x) = log(x + sqrt(x^2 + 1)) + return static_cast<float>( + x_sign * log_eval(fputil::multiply_add( + x_d, x_sign, + fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H diff --git a/libc/src/__support/math/asinhf16.h b/libc/src/__support/math/asinhf16.h new file mode 100644 index 0000000..3c5171e --- /dev/null +++ b/libc/src/__support/math/asinhf16.h @@ -0,0 +1,121 @@ +//===-- Implementation header for asinhf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 asinhf16(float16 x) { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr size_t N_EXCEPTS = 8; + + constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + + // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ) + {0x3769, 0x372a, 1, 0, 1}, + // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ) + {0x3b5b, 0x3a96, 1, 0, 0}, + // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ) + {0x4b1f, 0x42b3, 1, 0, 0}, + // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ) + {0x4c9b, 0x4336, 1, 0, 1}, + // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ) + {0xb769, 0xb72a, 0, 1, 1}, + // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ) + {0xbb5b, 0xba96, 0, 1, 0}, + // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ) + {0xcb1f, 0xc2b3, 0, 1, 0}, + // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ) + {0xcc9b, 0xc336, 0, 1, 1}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace acoshf_internal; + using FPBits = fputil::FPBits<float16>; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Handle exceptional values + if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + float xf = x; + const float SIGN[2] = {1.0f, -1.0f}; + float x_sign = SIGN[x_u >> 15]; + + // |x| <= 0.25 + if (LIBC_UNLIKELY(x_abs <= 0x3400)) { + // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain + // rounding types. + if (LIBC_UNLIKELY(x_abs < 0x29c6)) { + int rounding = fputil::quick_get_round(); + if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0) + return fputil::cast<float16>(xf + 0x1p-24f); + if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0) + return fputil::cast<float16>(xf - 0x1p-24f); + return fputil::cast<float16>(xf); + } + + float x_sq = xf * xf; + // Generated by Sollya with: + // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]); + // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f + // for better accuracy. + float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f, + -0x1.6c53dep-5f, 0x1.bd114ep-5f); + + return fputil::cast<float16>(xf * p); + } + + // General case: asinh(x) = ln(x + sqrt(x^2 + 1)) + float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f)); + return fputil::cast<float16>( + x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H diff --git a/libc/src/__support/math/atan.h b/libc/src/__support/math/atan.h new file mode 100644 index 0000000..62190b0 --- /dev/null +++ b/libc/src/__support/math/atan.h @@ -0,0 +1,189 @@ +//===-- Implementation header for atan --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H + +#include "atan_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +// To compute atan(x), we divided it into the following cases: +// * |x| < 2^-26: +// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply +// return atan(x) = x - sign(x) * epsilon. +// * 2^-26 <= |x| < 1: +// We perform range reduction mod 2^-6 = 1/64 as follow: +// Let k = 2^(-6) * round(|x| * 2^6), then +// atan(x) = sign(x) * atan(|x|) +// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)). +// We store atan(k) in a look up table, and perform intermediate steps in +// double-double. +// * 1 < |x| < 2^53: +// First we perform the transformation y = 1/|x|: +// atan(x) = sign(x) * (pi/2 - atan(1/|x|)) +// = sign(x) * (pi/2 - atan(y)). +// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the +// previous case: +// Let k = 2^(-6) * round(y * 2^6), then +// atan(y) = atan(k) + atan((y - k) / (1 + y*k)) +// = atan(k) + atan((1/|x| - k) / (1 + k/|x|) +// = atan(k) + atan((1 - k*|x|) / (|x| + k)). +// * |x| >= 2^53: +// Using the reciprocal transformation: +// atan(x) = sign(x) * (pi/2 - atan(1/|x|)). +// We have that: +// atan(1/|x|) <= 1/|x| <= 2^-53, +// which is smaller than ulp(pi/2) / 2. +// So we can return: +// atan(x) = sign(x) * (pi/2 - epsilon) + +LIBC_INLINE static constexpr double atan(double x) { + + using namespace atan_internal; + using FPBits = fputil::FPBits<double>; + + constexpr double IS_NEG[2] = {1.0, -1.0}; + constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, + 0x1.921fb54442d18p0}; + constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, + -0x1.921fb54442d18p0}; + + FPBits xbits(x); + bool x_sign = xbits.is_neg(); + xbits = xbits.abs(); + uint64_t x_abs = xbits.uintval(); + int x_exp = + static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS; + + // |x| < 1. + if (x_exp < 0) { + if (LIBC_UNLIKELY(x_exp < -26)) { +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return x; +#else + if (x == 0.0) + return x; + // |x| < 2^-26 + return fputil::multiply_add(-0x1.0p-54, x, x); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + double x_d = xbits.get_val(); + // k = 2^-6 * round(2^6 * |x|) + double k = fputil::nearest_integer(0x1.0p6 * x_d); + unsigned idx = static_cast<unsigned>(k); + k *= 0x1.0p-6; + + // numerator = |x| - k + DoubleDouble num, den; + num.lo = 0.0; + num.hi = x_d - k; + + // denominator = 1 - k * |x| + den.hi = fputil::multiply_add(x_d, k, 1.0); + DoubleDouble prod = fputil::exact_mult(x_d, k); + // Using Dekker's 2SUM algorithm to compute the lower part. + den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo; + + // x_r = (|x| - k) / (1 + k * |x|) + DoubleDouble x_r = fputil::div(num, den); + + // Approximating atan(x_r) using Taylor polynomial. + DoubleDouble p = atan_eval(x_r); + + // atan(x) = sign(x) * (atan(k) + atan(x_r)) + // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) )) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo))); +#else + + DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi); + double c1 = c0.lo + (ATAN_I[idx].lo + p.lo); + double r = IS_NEG[x_sign] * (c0.hi + c1); + + return r; +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + // |x| >= 2^53 or x is NaN. + if (LIBC_UNLIKELY(x_exp >= 53)) { + // x is nan + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + // |x| >= 2^53 + // atan(x) ~ sign(x) * pi/2. + if (x_exp >= 53) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return IS_NEG[x_sign] * PI_OVER_2.hi; +#else + return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi, + IS_NEG[x_sign] * PI_OVER_2.lo); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + double x_d = xbits.get_val(); + double y = 1.0 / x_d; + + // k = 2^-6 * round(2^6 / |x|) + double k = fputil::nearest_integer(0x1.0p6 * y); + unsigned idx = static_cast<unsigned>(k); + k *= 0x1.0p-6; + + // denominator = |x| + k + DoubleDouble den = fputil::exact_add(x_d, k); + // numerator = 1 - k * |x| + DoubleDouble num; + num.hi = fputil::multiply_add(-x_d, k, 1.0); + DoubleDouble prod = fputil::exact_mult(x_d, k); + // Using Dekker's 2SUM algorithm to compute the lower part. + num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo; + + // x_r = (1/|x| - k) / (1 - k/|x|) + // = (1 - k * |x|) / (|x| - k) + DoubleDouble x_r = fputil::div(num, den); + + // Approximating atan(x_r) using Taylor polynomial. + DoubleDouble p = atan_eval(x_r); + + // atan(x) = sign(x) * (pi/2 - atan(1/|x|)) + // = sign(x) * (pi/2 - atan(k) - atan(x_r)) + // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k))) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo; + return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part)); +#else + DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi); + DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi); + double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo); + + double r = IS_NEG[!x_sign] * (c1.hi + c2); + + return r; +#endif +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H diff --git a/libc/src/math/generic/atan_utils.h b/libc/src/__support/math/atan_utils.h index 24c7271..9e8d7d6 100644 --- a/libc/src/math/generic/atan_utils.h +++ b/libc/src/__support/math/atan_utils.h @@ -18,7 +18,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace { +namespace atan_internal { using DoubleDouble = fputil::DoubleDouble; using Float128 = fputil::DyadicFloat<128>; @@ -29,7 +29,7 @@ using Float128 = fputil::DyadicFloat<128>; // b = round(atan(i/64) - a, D, RN); // print("{", b, ",", a, "},"); // }; -constexpr DoubleDouble ATAN_I[65] = { +static constexpr DoubleDouble ATAN_I[65] = { {0.0, 0.0}, {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7}, {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6}, @@ -110,7 +110,8 @@ constexpr DoubleDouble ATAN_I[65] = { // + x_lo * (1 - x_hi^2 + x_hi^4) // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by: // |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66. -[[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) { +[[maybe_unused]] LIBC_INLINE static DoubleDouble +atan_eval(const DoubleDouble &x) { DoubleDouble p; p.hi = x.hi; double x_hi_sq = x.hi * x.hi; @@ -142,7 +143,7 @@ constexpr DoubleDouble ATAN_I[65] = { // b = 2^ll + a; // print("{Sign::POS, ", 2^(ll - 128), ",", b, "},"); // }; -constexpr Float128 ATAN_I_F128[65] = { +static constexpr Float128 ATAN_I_F128[65] = { {Sign::POS, 0, 0_u128}, {Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128}, {Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128}, @@ -215,7 +216,7 @@ constexpr Float128 ATAN_I_F128[65] = { // [0, 2^-7]); // > dirtyinfnorm(atan(x) - P, [0, 2^-7]); // 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122 -constexpr Float128 ATAN_POLY_F128[] = { +static constexpr Float128 ATAN_POLY_F128[] = { {Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128}, {Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128}, {Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128}, @@ -225,7 +226,8 @@ constexpr Float128 ATAN_POLY_F128[] = { }; // Approximate atan for |x| <= 2^-7. -[[maybe_unused]] Float128 atan_eval(const Float128 &x) { +[[maybe_unused]] LIBC_INLINE static constexpr Float128 +atan_eval(const Float128 &x) { Float128 x_sq = fputil::quick_mul(x, x); Float128 x3 = fputil::quick_mul(x, x_sq); Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1], @@ -234,7 +236,7 @@ constexpr Float128 ATAN_POLY_F128[] = { return fputil::multiply_add(x3, p, x); } -} // anonymous namespace +} // namespace atan_internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h new file mode 100644 index 0000000..92799dc --- /dev/null +++ b/libc/src/__support/math/atanf.h @@ -0,0 +1,129 @@ +//===-- Implementation header for atanf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H + +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float atanf(float x) { + using namespace inv_trigf_utils_internal; + using FPBits = typename fputil::FPBits<float>; + + constexpr double FINAL_SIGN[2] = {1.0, -1.0}; + constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0, + -0x1.921fb54442d18p0}; + + FPBits x_bits(x); + Sign sign = x_bits.sign(); + x_bits.set_sign(Sign::POS); + uint32_t x_abs = x_bits.uintval(); + + // x is inf or nan, |x| < 2^-4 or |x|= > 16. + if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) { + double x_d = static_cast<double>(x); + double const_term = 0.0; + if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) { + // atan(+-Inf) = +-pi/2. + if (x_bits.is_inf()) { + volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()]; + return static_cast<float>(sign_pi_over_2); + } + if (x_bits.is_nan()) + return x; + // x >= 16 + x_d = -1.0 / x_d; + const_term = SIGNED_PI_OVER_2[sign.is_neg()]; + } + // 0 <= x < 1/16; + if (LIBC_UNLIKELY(x_bits.is_zero())) + return x; + // x <= 2^-12; + if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) { +#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + return fputil::multiply_add(x, -0x1.0p-25f, x); +#else + double x_d = static_cast<double>(x); + return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d)); +#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT + } + // Use Taylor polynomial: + // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11). + constexpr double ATAN_TAYLOR[6] = { + 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, + -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, + }; + double x2 = x_d * x_d; + double x4 = x2 * x2; + double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); + double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); + double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]); + double p = fputil::polyeval(x4, c0, c1, c2); + double r = fputil::multiply_add(x_d, p, const_term); + return static_cast<float>(r); + } + + // Range reduction steps: + // 1) atan(x) = sign(x) * atan(|x|) + // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|) + // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32. + // 4) Then we use polynomial approximation: + // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16) + // = P(x - k/16) + double x_d = 0, const_term = 0, final_sign = 0; + int idx = 0; + + if (x_abs > 0x3f80'0000U) { + // |x| > 1, we need to invert x, so we will perform range reduction in + // double precision. + x_d = 1.0 / static_cast<double>(x_bits.get_val()); + double k_d = fputil::nearest_integer(x_d * 0x1.0p4); + x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d); + idx = static_cast<int>(k_d); + final_sign = FINAL_SIGN[sign.is_pos()]; + // Adjust constant term of the polynomial by +- pi/2. + const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0], + SIGNED_PI_OVER_2[sign.is_neg()]); + } else { + // Exceptional value: + if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4 + return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f) + : fputil::round_result_slightly_up(-0x1.1a6386p-4f); + } + // Perform range reduction in single precision. + float x_f = x_bits.get_val(); + float k_f = fputil::nearest_integer(x_f * 0x1.0p4f); + x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f); + x_d = static_cast<double>(x_f); + idx = static_cast<int>(k_f); + final_sign = FINAL_SIGN[sign.is_neg()]; + const_term = final_sign * ATAN_COEFFS[idx][0]; + } + + double p = atan_eval(x_d, idx); + double r = fputil::multiply_add(final_sign * x_d, p, const_term); + + return static_cast<float>(r); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index cbb7886..14aaad2 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -124,3 +124,14 @@ add_object_library( libc.src.__support.threads.linux.raw_mutex libc.src.__support.CPP.mutex ) + +add_object_library( + barrier + HDRS + barrier.h + SRCS + barrier.cpp + DEPENDS + libc.src.__support.threads.CndVar + libc.src.__support.threads.mutex +) diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp new file mode 100644 index 0000000..cf7207b5 --- /dev/null +++ b/libc/src/__support/threads/linux/barrier.cpp @@ -0,0 +1,85 @@ +//===-- Implementation of Barrier class ------------- ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/linux/barrier.h" +#include "hdr/errno_macros.h" +#include "src/__support/threads/CndVar.h" +#include "src/__support/threads/mutex.h" + +namespace LIBC_NAMESPACE_DECL { + +int Barrier::init(Barrier *b, + [[maybe_unused]] const pthread_barrierattr_t *attr, + unsigned count) { + LIBC_ASSERT(attr == nullptr); // TODO implement barrierattr + if (count == 0) + return EINVAL; + + b->expected = count; + b->waiting = 0; + b->blocking = true; + + int err; + err = CndVar::init(&b->entering); + if (err != 0) + return err; + + err = CndVar::init(&b->exiting); + if (err != 0) + return err; + + auto mutex_err = Mutex::init(&b->m, false, false, false, false); + if (mutex_err != MutexError::NONE) + return EAGAIN; + + return 0; +} + +int Barrier::wait() { + m.lock(); + + // if the barrier is emptying out threads, wait until it finishes + while (!blocking) + entering.wait(&m); + waiting++; + + if (waiting < expected) { + // block threads until waiting = expected + while (blocking) + exiting.wait(&m); + } else { + // this is the last thread to call wait(), so lets wake everyone up + blocking = false; + exiting.broadcast(); + } + waiting--; + + if (waiting == 0) { + // all threads have exited the barrier, let's let the ones waiting to enter + // continue + blocking = true; + entering.broadcast(); + m.unlock(); + + // POSIX dictates that the barrier should return a special value to just one + // thread, so we can arbitrarily choose this thread + return PTHREAD_BARRIER_SERIAL_THREAD; + } + m.unlock(); + + return 0; +} + +int Barrier::destroy(Barrier *b) { + CndVar::destroy(&b->entering); + CndVar::destroy(&b->exiting); + Mutex::destroy(&b->m); + return 0; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h new file mode 100644 index 0000000..f0655bf --- /dev/null +++ b/libc/src/__support/threads/linux/barrier.h @@ -0,0 +1,50 @@ +//===-- A platform independent abstraction layer for barriers --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H +#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H + +#include "hdr/pthread_macros.h" +#include "include/llvm-libc-types/pthread_barrier_t.h" +#include "include/llvm-libc-types/pthread_barrierattr_t.h" +#include "src/__support/threads/CndVar.h" +#include "src/__support/threads/mutex.h" + +namespace LIBC_NAMESPACE_DECL { + +// NOTE: if the size of this class changes, you must ensure that the size of +// pthread_barrier_t (found in include/llvm-libc/types/pthread_barrier_t.h) is +// the same size +class Barrier { +private: + unsigned expected; + unsigned waiting; + bool blocking; + CndVar entering; + CndVar exiting; + Mutex m; + +public: + static int init(Barrier *b, const pthread_barrierattr_t *attr, + unsigned count); + static int destroy(Barrier *b); + int wait(); +}; + +static_assert( + sizeof(Barrier) == sizeof(pthread_barrier_t), + "The public pthread_barrier_t type cannot accommodate the internal " + "barrier type."); + +static_assert(alignof(Barrier) == alignof(pthread_barrier_t), + "The public pthread_barrier_t type has a different alignment " + "than the internal barrier type."); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index e363ad3..aed1d53 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -60,14 +60,31 @@ add_object_library( SRCS mbrtowc.cpp DEPENDS - libc.hdr.errno_macros - libc.hdr.types.wchar_t - libc.hdr.types.size_t - libc.src.__support.common - libc.src.__support.error_or - libc.src.__support.macros.config - .character_converter - .mbstate + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + .character_converter + .mbstate +) + +add_header_library( + mbsnrtowcs + HDRS + mbsnrtowcs.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + libc.src.__support.macros.null_check + .character_converter + .mbstate + .string_converter ) add_header_library( diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h new file mode 100644 index 0000000..54e3152 --- /dev/null +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -0,0 +1,66 @@ +//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H + +#include "hdr/errno_macros.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/character_converter.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, + const char **__restrict src, + size_t nmc, size_t len, + mbstate *__restrict ps) { + LIBC_CRASH_ON_NULLPTR(src); + // Checking if mbstate is valid + CharacterConverter char_conv(ps); + if (!char_conv.isValidState()) + return Error(EINVAL); + + StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps, + len, nmc); + size_t dst_idx = 0; + ErrorOr<char32_t> converted = str_conv.popUTF32(); + while (converted.has_value()) { + if (dst != nullptr) + dst[dst_idx] = converted.value(); + // null terminator should not be counted in return value + if (converted.value() == L'\0') { + if (dst != nullptr) + *src = nullptr; + return dst_idx; + } + dst_idx++; + converted = str_conv.popUTF32(); + } + + if (converted.error() == -1) { // if we hit conversion limit + if (dst != nullptr) + *src += str_conv.getSourceIndex(); + return dst_idx; + } + + return Error(converted.error()); +} + +} // namespace internal + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 455ad34..0522e0e 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -189,6 +189,7 @@ add_math_entrypoint_object(fabsf) add_math_entrypoint_object(fabsl) add_math_entrypoint_object(fabsf16) add_math_entrypoint_object(fabsf128) +add_math_entrypoint_object(fabsbf16) add_math_entrypoint_object(fadd) add_math_entrypoint_object(faddl) diff --git a/libc/src/math/fabsbf16.h b/libc/src/math/fabsbf16.h new file mode 100644 index 0000000..4993668 --- /dev/null +++ b/libc/src/math/fabsbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fabsbf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FABSBF16_H +#define LLVM_LIBC_SRC_MATH_FABSBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fabsbf16(bfloat16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FABSBF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d4d268c..701dc4b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -698,6 +698,19 @@ add_entrypoint_object( ) add_entrypoint_object( + fabsbf16 + SRCS + fabsbf16.cpp + HDRS + ../fabsbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( fadd SRCS fadd.cpp @@ -3889,12 +3902,7 @@ add_entrypoint_object( HDRS ../asinhf.h DEPENDS - .explogxf - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization + libc.src.__support.math.asinhf ) add_entrypoint_object( @@ -3904,18 +3912,7 @@ add_entrypoint_object( HDRS ../asinhf16.h DEPENDS - .explogxf - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.asinhf16 ) add_entrypoint_object( @@ -4023,19 +4020,6 @@ add_entrypoint_object( libc.src.errno.errno ) -add_header_library( - atan_utils - HDRS - atan_utils.h - DEPENDS - libc.src.__support.integer_literals - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization -) - add_entrypoint_object( atanf SRCS @@ -4043,14 +4027,7 @@ add_entrypoint_object( HDRS ../atanf.h DEPENDS - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.math.inv_trigf_utils + libc.src.__support.math.atanf ) add_entrypoint_object( @@ -4082,13 +4059,7 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 DEPENDS - .atan_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.macros.optimization + libc.src.__support.math.atan ) add_entrypoint_object( @@ -4118,7 +4089,7 @@ add_entrypoint_object( HDRS ../atan2.h DEPENDS - .atan_utils + libc.src.__support.math.atan_utils libc.src.__support.FPUtil.double_double libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -4144,7 +4115,7 @@ add_entrypoint_object( HDRS ../atan2f128.h DEPENDS - .atan_utils + libc.src.__support.math.atan_utils libc.src.__support.integer_literals libc.src.__support.uint128 libc.src.__support.FPUtil.dyadic_float diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp index 3aed3bc..45023c8 100644 --- a/libc/src/math/generic/asinhf.cpp +++ b/libc/src/math/generic/asinhf.cpp @@ -7,112 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinhf.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/common_constants.h" -#include "src/math/generic/explogxf.h" +#include "src/__support/math/asinhf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { - using namespace acoshf_internal; - using FPBits_t = typename fputil::FPBits<float>; - FPBits_t xbits(x); - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = xbits.abs().uintval(); - - // |x| <= 2^-3 - if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) - ? x - : (x - 0x1.5555555555555p-3 * x * x * x)); - } - - double x_d = x; - double x_sq = x_d * x_d; - // Generated by Sollya with: - // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|], - // [0, 2^-2]); - double p = fputil::polyeval( - x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, - -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, - 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); - return static_cast<float>(fputil::multiply_add(x_d, p, x_d)); - } - - const double SIGN[2] = {1.0, -1.0}; - double x_sign = SIGN[x_u >> 31]; - double x_d = x; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Helper functions to set results for exceptional cases. - auto round_result_slightly_down = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * (-0x1.0p-24f)); - }; - auto round_result_slightly_up = [x_sign](float r) -> float { - return fputil::multiply_add(static_cast<float>(x_sign), r, - static_cast<float>(x_sign) * 0x1.0p-24f); - }; - - if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) { - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits_t::quiet_nan().get_val(); - } - - return x; - } - - // Exceptional cases when x > 2^24. - switch (x_abs) { - case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f - return round_result_slightly_down(0x1.1e0696p4f); - case 0x4c803f2c: // |x| = 0x1.007e58p26f - return round_result_slightly_down(0x1.2b786cp4f); - case 0x4f8ffb03: // |x| = 0x1.1ff606p32f - return round_result_slightly_up(0x1.6fdd34p4f); - case 0x5c569e88: // |x| = 0x1.ad3d1p57f - return round_result_slightly_up(0x1.45c146p5f); - case 0x5e68984e: // |x| = 0x1.d1309cp61f - return round_result_slightly_up(0x1.5c9442p5f); - case 0x655890d3: // |x| = 0x1.b121a6p75f - return round_result_slightly_down(0x1.a9a3f2p5f); - case 0x65de7ca6: // |x| = 0x1.bcf94cp76f - return round_result_slightly_up(0x1.af66cp5f); - case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f - return round_result_slightly_down(0x1.08b512p6f); - case 0x7997f30a: // |x| = 0x1.2fe614p116f - return round_result_slightly_up(0x1.451436p6f); - } - } else { - // Exceptional cases when x < 2^24. - if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) { - // |x| = 0x1.575e4cp12f - return round_result_slightly_down(0x1.29becap3f); - } - if (LIBC_UNLIKELY(x_abs == 0x49d29048)) { - // |x| = 0x1.a5209p20f - return round_result_slightly_down(0x1.e1b92p3f); - } - } -#else - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) - return x; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // asinh(x) = log(x + sqrt(x^2 + 1)) - return static_cast<float>( - x_sign * log_eval(fputil::multiply_add( - x_d, x_sign, - fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0))))); -} +LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asinhf16.cpp b/libc/src/math/generic/asinhf16.cpp index 0a0b471..d517e63 100644 --- a/libc/src/math/generic/asinhf16.cpp +++ b/libc/src/math/generic/asinhf16.cpp @@ -7,102 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinhf16.h" -#include "explogxf.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/asinhf16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 8; - -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - - // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ) - {0x3769, 0x372a, 1, 0, 1}, - // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ) - {0x3b5b, 0x3a96, 1, 0, 0}, - // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ) - {0x4b1f, 0x42b3, 1, 0, 0}, - // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ) - {0x4c9b, 0x4336, 1, 0, 1}, - // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ) - {0xb769, 0xb72a, 0, 1, 1}, - // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ) - {0xbb5b, 0xba96, 0, 1, 0}, - // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ) - {0xcb1f, 0xc2b3, 0, 1, 0}, - // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ) - {0xcc9b, 0xc336, 0, 1, 1}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { - using namespace acoshf_internal; - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Handle exceptional values - if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - float xf = x; - const float SIGN[2] = {1.0f, -1.0f}; - float x_sign = SIGN[x_u >> 15]; - - // |x| <= 0.25 - if (LIBC_UNLIKELY(x_abs <= 0x3400)) { - // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain - // rounding types. - if (LIBC_UNLIKELY(x_abs < 0x29c6)) { - int rounding = fputil::quick_get_round(); - if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0) - return fputil::cast<float16>(xf + 0x1p-24f); - if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0) - return fputil::cast<float16>(xf - 0x1p-24f); - return fputil::cast<float16>(xf); - } - - float x_sq = xf * xf; - // Generated by Sollya with: - // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]); - // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f - // for better accuracy. - float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f, - -0x1.6c53dep-5f, 0x1.bd114ep-5f); - - return fputil::cast<float16>(xf * p); - } - - // General case: asinh(x) = ln(x + sqrt(x^2 + 1)) - float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f)); - return fputil::cast<float16>( - x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term))); -} +LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { return math::asinhf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atan.cpp b/libc/src/math/generic/atan.cpp index cbca605..93bf2e1 100644 --- a/libc/src/math/generic/atan.cpp +++ b/libc/src/math/generic/atan.cpp @@ -7,173 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atan.h" -#include "atan_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/atan.h" namespace LIBC_NAMESPACE_DECL { -// To compute atan(x), we divided it into the following cases: -// * |x| < 2^-26: -// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply -// return atan(x) = x - sign(x) * epsilon. -// * 2^-26 <= |x| < 1: -// We perform range reduction mod 2^-6 = 1/64 as follow: -// Let k = 2^(-6) * round(|x| * 2^6), then -// atan(x) = sign(x) * atan(|x|) -// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)). -// We store atan(k) in a look up table, and perform intermediate steps in -// double-double. -// * 1 < |x| < 2^53: -// First we perform the transformation y = 1/|x|: -// atan(x) = sign(x) * (pi/2 - atan(1/|x|)) -// = sign(x) * (pi/2 - atan(y)). -// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the -// previous case: -// Let k = 2^(-6) * round(y * 2^6), then -// atan(y) = atan(k) + atan((y - k) / (1 + y*k)) -// = atan(k) + atan((1/|x| - k) / (1 + k/|x|) -// = atan(k) + atan((1 - k*|x|) / (|x| + k)). -// * |x| >= 2^53: -// Using the reciprocal transformation: -// atan(x) = sign(x) * (pi/2 - atan(1/|x|)). -// We have that: -// atan(1/|x|) <= 1/|x| <= 2^-53, -// which is smaller than ulp(pi/2) / 2. -// So we can return: -// atan(x) = sign(x) * (pi/2 - epsilon) - -LLVM_LIBC_FUNCTION(double, atan, (double x)) { - using FPBits = fputil::FPBits<double>; - - constexpr double IS_NEG[2] = {1.0, -1.0}; - constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, - 0x1.921fb54442d18p0}; - constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, - -0x1.921fb54442d18p0}; - - FPBits xbits(x); - bool x_sign = xbits.is_neg(); - xbits = xbits.abs(); - uint64_t x_abs = xbits.uintval(); - int x_exp = - static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS; - - // |x| < 1. - if (x_exp < 0) { - if (LIBC_UNLIKELY(x_exp < -26)) { -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return x; -#else - if (x == 0.0) - return x; - // |x| < 2^-26 - return fputil::multiply_add(-0x1.0p-54, x, x); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - double x_d = xbits.get_val(); - // k = 2^-6 * round(2^6 * |x|) - double k = fputil::nearest_integer(0x1.0p6 * x_d); - unsigned idx = static_cast<unsigned>(k); - k *= 0x1.0p-6; - - // numerator = |x| - k - DoubleDouble num, den; - num.lo = 0.0; - num.hi = x_d - k; - - // denominator = 1 - k * |x| - den.hi = fputil::multiply_add(x_d, k, 1.0); - DoubleDouble prod = fputil::exact_mult(x_d, k); - // Using Dekker's 2SUM algorithm to compute the lower part. - den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo; - - // x_r = (|x| - k) / (1 + k * |x|) - DoubleDouble x_r = fputil::div(num, den); - - // Approximating atan(x_r) using Taylor polynomial. - DoubleDouble p = atan_eval(x_r); - - // atan(x) = sign(x) * (atan(k) + atan(x_r)) - // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) )) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo))); -#else - - DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi); - double c1 = c0.lo + (ATAN_I[idx].lo + p.lo); - double r = IS_NEG[x_sign] * (c0.hi + c1); - - return r; -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - // |x| >= 2^53 or x is NaN. - if (LIBC_UNLIKELY(x_exp >= 53)) { - // x is nan - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - // |x| >= 2^53 - // atan(x) ~ sign(x) * pi/2. - if (x_exp >= 53) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return IS_NEG[x_sign] * PI_OVER_2.hi; -#else - return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi, - IS_NEG[x_sign] * PI_OVER_2.lo); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - double x_d = xbits.get_val(); - double y = 1.0 / x_d; - - // k = 2^-6 * round(2^6 / |x|) - double k = fputil::nearest_integer(0x1.0p6 * y); - unsigned idx = static_cast<unsigned>(k); - k *= 0x1.0p-6; - - // denominator = |x| + k - DoubleDouble den = fputil::exact_add(x_d, k); - // numerator = 1 - k * |x| - DoubleDouble num; - num.hi = fputil::multiply_add(-x_d, k, 1.0); - DoubleDouble prod = fputil::exact_mult(x_d, k); - // Using Dekker's 2SUM algorithm to compute the lower part. - num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo; - - // x_r = (1/|x| - k) / (1 - k/|x|) - // = (1 - k * |x|) / (|x| - k) - DoubleDouble x_r = fputil::div(num, den); - - // Approximating atan(x_r) using Taylor polynomial. - DoubleDouble p = atan_eval(x_r); - - // atan(x) = sign(x) * (pi/2 - atan(1/|x|)) - // = sign(x) * (pi/2 - atan(k) - atan(x_r)) - // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k))) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo; - return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part)); -#else - DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi); - DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi); - double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo); - - double r = IS_NEG[!x_sign] * (c1.hi + c2); - - return r; -#endif -} +LLVM_LIBC_FUNCTION(double, atan, (double x)) { return math::atan(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp index aa770de..58042d3 100644 --- a/libc/src/math/generic/atan2.cpp +++ b/libc/src/math/generic/atan2.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2.h" -#include "atan_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/double_double.h" @@ -15,6 +14,7 @@ #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/atan_utils.h" namespace LIBC_NAMESPACE_DECL { @@ -72,6 +72,7 @@ namespace LIBC_NAMESPACE_DECL { // |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73. LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) { + using namespace atan_internal; using FPBits = fputil::FPBits<double>; constexpr double IS_NEG[2] = {1.0, -1.0}; diff --git a/libc/src/math/generic/atan2f128.cpp b/libc/src/math/generic/atan2f128.cpp index a3aba0b..8838d94 100644 --- a/libc/src/math/generic/atan2f128.cpp +++ b/libc/src/math/generic/atan2f128.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2f128.h" -#include "atan_utils.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" @@ -16,6 +15,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/types.h" +#include "src/__support/math/atan_utils.h" #include "src/__support/uint128.h" namespace LIBC_NAMESPACE_DECL { @@ -103,6 +103,7 @@ static constexpr Float128 CONST_ADJ[2][2][2] = { // |(atan(u) - P(u)) / P(u)| < 2^-114. LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) { + using namespace atan_internal; using FPBits = fputil::FPBits<float128>; using Float128 = fputil::DyadicFloat<128>; diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index 22f962e..acd32f0 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -7,116 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanf.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/math/inv_trigf_utils.h" +#include "src/__support/math/atanf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, atanf, (float x)) { - using namespace inv_trigf_utils_internal; - using FPBits = typename fputil::FPBits<float>; - - constexpr double FINAL_SIGN[2] = {1.0, -1.0}; - constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0, - -0x1.921fb54442d18p0}; - - FPBits x_bits(x); - Sign sign = x_bits.sign(); - x_bits.set_sign(Sign::POS); - uint32_t x_abs = x_bits.uintval(); - - // x is inf or nan, |x| < 2^-4 or |x|= > 16. - if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) { - double x_d = static_cast<double>(x); - double const_term = 0.0; - if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) { - // atan(+-Inf) = +-pi/2. - if (x_bits.is_inf()) { - volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()]; - return static_cast<float>(sign_pi_over_2); - } - if (x_bits.is_nan()) - return x; - // x >= 16 - x_d = -1.0 / x_d; - const_term = SIGNED_PI_OVER_2[sign.is_neg()]; - } - // 0 <= x < 1/16; - if (LIBC_UNLIKELY(x_bits.is_zero())) - return x; - // x <= 2^-12; - if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) { -#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) - return fputil::multiply_add(x, -0x1.0p-25f, x); -#else - double x_d = static_cast<double>(x); - return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d)); -#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT - } - // Use Taylor polynomial: - // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11). - constexpr double ATAN_TAYLOR[6] = { - 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, - -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, - }; - double x2 = x_d * x_d; - double x4 = x2 * x2; - double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); - double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); - double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]); - double p = fputil::polyeval(x4, c0, c1, c2); - double r = fputil::multiply_add(x_d, p, const_term); - return static_cast<float>(r); - } - - // Range reduction steps: - // 1) atan(x) = sign(x) * atan(|x|) - // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|) - // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32. - // 4) Then we use polynomial approximation: - // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16) - // = P(x - k/16) - double x_d, const_term, final_sign; - int idx; - - if (x_abs > 0x3f80'0000U) { - // |x| > 1, we need to invert x, so we will perform range reduction in - // double precision. - x_d = 1.0 / static_cast<double>(x_bits.get_val()); - double k_d = fputil::nearest_integer(x_d * 0x1.0p4); - x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d); - idx = static_cast<int>(k_d); - final_sign = FINAL_SIGN[sign.is_pos()]; - // Adjust constant term of the polynomial by +- pi/2. - const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0], - SIGNED_PI_OVER_2[sign.is_neg()]); - } else { - // Exceptional value: - if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4 - return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f) - : fputil::round_result_slightly_up(-0x1.1a6386p-4f); - } - // Perform range reduction in single precision. - float x_f = x_bits.get_val(); - float k_f = fputil::nearest_integer(x_f * 0x1.0p4f); - x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f); - x_d = static_cast<double>(x_f); - idx = static_cast<int>(k_f); - final_sign = FINAL_SIGN[sign.is_neg()]; - const_term = final_sign * ATAN_COEFFS[idx][0]; - } - - double p = atan_eval(x_d, idx); - double r = fputil::multiply_add(final_sign * x_d, p, const_term); - - return static_cast<float>(r); -} +LLVM_LIBC_FUNCTION(float, atanf, (float x)) { return math::atanf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fabsbf16.cpp b/libc/src/math/generic/fabsbf16.cpp new file mode 100644 index 0000000..ea39719 --- /dev/null +++ b/libc/src/math/generic/fabsbf16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of fabsbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fabsbf16.h" + +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fabsbf16, (bfloat16 x)) { return fputil::abs(x); } + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt index c5db6fa..fe31e6a 100644 --- a/libc/src/pthread/CMakeLists.txt +++ b/libc/src/pthread/CMakeLists.txt @@ -273,6 +273,40 @@ add_entrypoint_object( ) add_entrypoint_object( + pthread_barrier_init + SRCS + pthread_barrier_init.cpp + HDRS + pthread_barrier_init.h + DEPENDS + libc.src.errno.errno + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( + pthread_barrier_destroy + SRCS + pthread_barrier_destroy.cpp + HDRS + pthread_barrier_destroy.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( + pthread_barrier_wait + SRCS + pthread_barrier_wait.cpp + HDRS + pthread_barrier_wait.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( pthread_mutex_init SRCS pthread_mutex_init.cpp diff --git a/libc/src/pthread/pthread_barrier_destroy.cpp b/libc/src/pthread/pthread_barrier_destroy.cpp new file mode 100644 index 0000000..82de8f2 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_destroy.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of the pthread_barrier_destroy function ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_destroy.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_destroy, (pthread_barrier_t * b)) { + return Barrier::destroy(reinterpret_cast<Barrier *>(b)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_destroy.h b/libc/src/pthread/pthread_barrier_destroy.h new file mode 100644 index 0000000..e27552c --- /dev/null +++ b/libc/src/pthread/pthread_barrier_destroy.h @@ -0,0 +1,21 @@ +//===-- Implementation header for pthread_barrier_destroy --------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_destroy(pthread_barrier_t *b); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H diff --git a/libc/src/pthread/pthread_barrier_init.cpp b/libc/src/pthread/pthread_barrier_init.cpp new file mode 100644 index 0000000..2e92238 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_init.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of the pthread_barrier_init function ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_init.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "hdr/types/pthread_barrierattr_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_init, + (pthread_barrier_t * b, + const pthread_barrierattr_t *__restrict attr, + unsigned count)) { + return Barrier::init(reinterpret_cast<Barrier *>(b), attr, count); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_init.h b/libc/src/pthread/pthread_barrier_init.h new file mode 100644 index 0000000..bb17f3f --- /dev/null +++ b/libc/src/pthread/pthread_barrier_init.h @@ -0,0 +1,24 @@ +//===-- Implementation header for pthread_barrier_init ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H + +#include "hdr/types/pthread_barrier_t.h" +#include "hdr/types/pthread_barrierattr_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_init(pthread_barrier_t *b, + const pthread_barrierattr_t *__restrict attr, + unsigned count); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H diff --git a/libc/src/pthread/pthread_barrier_wait.cpp b/libc/src/pthread/pthread_barrier_wait.cpp new file mode 100644 index 0000000..dbd1333 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_wait.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of the pthread_barrier_wait function ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_wait.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_wait, (pthread_barrier_t * b)) { + return reinterpret_cast<Barrier *>(b)->wait(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_wait.h b/libc/src/pthread/pthread_barrier_wait.h new file mode 100644 index 0000000..16ddc06 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_wait.h @@ -0,0 +1,21 @@ +//===-- Implementation header for pthread_barrier_wait ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_wait(pthread_barrier_t *b); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 49f4a1b..9ba0a06 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -185,6 +185,55 @@ add_entrypoint_object( ) add_entrypoint_object( + mbstowcs + SRCS + mbstowcs.cpp + HDRS + mbstowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.macros.null_check + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( + mbsrtowcs + SRCS + mbsrtowcs.cpp + HDRS + mbsrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( + mbsnrtowcs + SRCS + mbsnrtowcs.cpp + HDRS + mbsnrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( wcstombs SRCS wcstombs.cpp diff --git a/libc/src/wchar/mbsnrtowcs.cpp b/libc/src/wchar/mbsnrtowcs.cpp new file mode 100644 index 0000000..28e0ff3 --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.cpp @@ -0,0 +1,39 @@ +//===-- Implementation of mbsnrtowcs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsnrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, nmc, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast<internal::mbstate *>(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h new file mode 100644 index 0000000..0d66b95 --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsnrtowcs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp new file mode 100644 index 0000000..82ca25a --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.cpp @@ -0,0 +1,39 @@ +//===-- Implementation of mbsrtowcs ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, SIZE_MAX, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast<internal::mbstate *>(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h new file mode 100644 index 0000000..f8d4cc2 --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsrtowcs -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp new file mode 100644 index 0000000..43e953c --- /dev/null +++ b/libc/src/wchar/mbstowcs.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of mbstowcs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbstowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbstowcs, + (wchar_t *__restrict pwcs, const char *__restrict s, + size_t n)) { + LIBC_CRASH_ON_NULLPTR(s); + // If destination is null, ignore n + n = pwcs == nullptr ? SIZE_MAX : n; + static internal::mbstate internal_mbstate; + const char *temp = s; + auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate); + + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h new file mode 100644 index 0000000..7d08a83 --- /dev/null +++ b/libc/src/wchar/mbstowcs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbstowcs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index e0218c7..55a3cee 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,13 +17,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// returns true if the character exists in the string -LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return true; - } - return false; +LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) { + for (; *s && *s != c; ++s) + ; + return (*s == c) ? s : nullptr; } // bool should be true for wcscspn for complimentary span @@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2, bool not_match_set) { size_t i = 0; for (; s1[i]; ++i) { - bool in_set = wcschr(s1[i], s2); + bool in_set = internal::wcschr(s2, s1[i]); if (in_set == not_match_set) return i; } diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp index defc2ce..8ac4916 100644 --- a/libc/src/wchar/wcschr.cpp +++ b/libc/src/wchar/wcschr.cpp @@ -11,15 +11,14 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) { - for (; *s && *s != c; ++s) - ; - if (*s == c) - return s; - return nullptr; + LIBC_CRASH_ON_NULLPTR(s); + return internal::wcschr(s, c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp index a00ba99..f329b73 100644 --- a/libc/src/wchar/wcspbrk.cpp +++ b/libc/src/wchar/wcspbrk.cpp @@ -11,17 +11,10 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/null_check.h" +#include "src/wchar/wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool contains_char(const wchar_t *str, wchar_t target) { - for (; *str != L'\0'; str++) - if (*str == target) - return true; - - return false; -} - LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, (const wchar_t *src, const wchar_t *breakset)) { LIBC_CRASH_ON_NULLPTR(src); @@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, // currently O(n * m), can be further optimized to O(n + m) with a hash set for (int src_idx = 0; src[src_idx] != 0; src_idx++) - if (contains_char(breakset, src[src_idx])) + if (internal::wcschr(breakset, src[src_idx])) return src + src_idx; return nullptr; diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 291efc1..ed4f0aa 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -10,18 +10,12 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { - for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) - if (wc == *delim_ptr) - return true; - return false; -} - LLVM_LIBC_FUNCTION(wchar_t *, wcstok, - (wchar_t *__restrict str, const wchar_t *__restrict delim, + (wchar_t *__restrict str, const wchar_t *__restrict delims, wchar_t **__restrict context)) { if (str == nullptr) { if (*context == nullptr) @@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, str = *context; } - wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); - ++tok_start) - ; + wchar_t *tok_start = str; + while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start)) + ++tok_start; - for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); - ++tok_end) - ; + wchar_t *tok_end = tok_start; + while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end)) + ++tok_end; if (*tok_end != L'\0') { *tok_end = L'\0'; |