aboutsummaryrefslogtreecommitdiff
path: root/libc/src
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src')
-rw-r--r--libc/src/__support/FPUtil/cast.h4
-rw-r--r--libc/src/__support/GPU/allocator.cpp140
-rw-r--r--libc/src/__support/math/CMakeLists.txt74
-rw-r--r--libc/src/__support/math/asinhf.h125
-rw-r--r--libc/src/__support/math/asinhf16.h121
-rw-r--r--libc/src/__support/math/atan.h189
-rw-r--r--libc/src/__support/math/atan_utils.h (renamed from libc/src/math/generic/atan_utils.h)16
-rw-r--r--libc/src/__support/math/atanf.h129
-rw-r--r--libc/src/__support/threads/linux/CMakeLists.txt11
-rw-r--r--libc/src/__support/threads/linux/barrier.cpp85
-rw-r--r--libc/src/__support/threads/linux/barrier.h50
-rw-r--r--libc/src/__support/wchar/CMakeLists.txt33
-rw-r--r--libc/src/__support/wchar/mbsnrtowcs.h66
-rw-r--r--libc/src/math/CMakeLists.txt1
-rw-r--r--libc/src/math/fabsbf16.h21
-rw-r--r--libc/src/math/generic/CMakeLists.txt67
-rw-r--r--libc/src/math/generic/asinhf.cpp106
-rw-r--r--libc/src/math/generic/asinhf16.cpp96
-rw-r--r--libc/src/math/generic/atan.cpp167
-rw-r--r--libc/src/math/generic/atan2.cpp3
-rw-r--r--libc/src/math/generic/atan2f128.cpp3
-rw-r--r--libc/src/math/generic/atanf.cpp110
-rw-r--r--libc/src/math/generic/fabsbf16.cpp19
-rw-r--r--libc/src/pthread/CMakeLists.txt34
-rw-r--r--libc/src/pthread/pthread_barrier_destroy.cpp22
-rw-r--r--libc/src/pthread/pthread_barrier_destroy.h21
-rw-r--r--libc/src/pthread/pthread_barrier_init.cpp26
-rw-r--r--libc/src/pthread/pthread_barrier_init.h24
-rw-r--r--libc/src/pthread/pthread_barrier_wait.cpp22
-rw-r--r--libc/src/pthread/pthread_barrier_wait.h21
-rw-r--r--libc/src/wchar/CMakeLists.txt49
-rw-r--r--libc/src/wchar/mbsnrtowcs.cpp39
-rw-r--r--libc/src/wchar/mbsnrtowcs.h24
-rw-r--r--libc/src/wchar/mbsrtowcs.cpp39
-rw-r--r--libc/src/wchar/mbsrtowcs.h24
-rw-r--r--libc/src/wchar/mbstowcs.cpp40
-rw-r--r--libc/src/wchar/mbstowcs.h22
-rw-r--r--libc/src/wchar/wchar_utils.h13
-rw-r--r--libc/src/wchar/wcschr.cpp9
-rw-r--r--libc/src/wchar/wcspbrk.cpp11
-rw-r--r--libc/src/wchar/wcstok.cpp23
41 files changed, 1470 insertions, 629 deletions
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h
index e6fad1b..e999ece 100644
--- a/libc/src/__support/FPUtil/cast.h
+++ b/libc/src/__support/FPUtil/cast.h
@@ -66,9 +66,9 @@ cast(InType x) {
cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN);
DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x);
return xd.template as<OutType, /*ShouldSignalExceptions=*/true>();
+ } else {
+ return static_cast<OutType>(x);
}
-
- return static_cast<OutType>(x);
}
} // namespace LIBC_NAMESPACE::fputil
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 866aea7..8fff4cc 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -16,6 +16,7 @@
#include "allocator.h"
+#include "src/__support/CPP/algorithm.h"
#include "src/__support/CPP/atomic.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/new.h"
@@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024;
constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE;
constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1;
constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8;
+constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8;
constexpr static uint32_t MIN_SIZE = 16;
constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
// The number of times to attempt claiming an in-progress slab allocation.
-constexpr static uint32_t MAX_TRIES = 128;
-
-// A sentinel used to indicate an invalid but non-null pointer value.
-constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max();
+constexpr static uint32_t MAX_TRIES = 1024;
static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
@@ -70,8 +69,8 @@ static void rpc_free(void *ptr) {
// Convert a potentially disjoint bitmask into an increasing integer per-lane
// for use with indexing between gpu lanes.
-static inline uint32_t lane_count(uint64_t lane_mask) {
- return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1));
+static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) {
+ return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1));
}
// Obtain an initial value to seed a random number generator. We use the rounded
@@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) {
void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
uint64_t mask = gpu::get_lane_mask();
uint32_t workers = cpp::popcount(uniform);
- for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers)
+ for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n;
+ i += workers)
s[i] = c;
}
@@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) {
return x && (x & (x - 1)) == 0;
}
-// Where this chunk size should start looking in the global array.
-static inline constexpr uint32_t start_index(uint32_t chunk_index) {
- return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) /
- impl::get_chunk_id(SLAB_SIZE / 2);
+// Where this chunk size should start looking in the global array. Small
+// allocations are much more likely than large ones, so we give them the most
+// space. We use a cubic easing function normalized on the possible chunks.
+static inline constexpr uint32_t get_start_index(uint32_t chunk_size) {
+ constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2);
+ uint64_t norm =
+ (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk;
+ uint64_t bias = (norm * norm * norm) >> 32;
+ uint64_t inv = (1 << 16) - bias;
+ return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16);
+}
+
+// Returns the id of the lane below this one that acts as its leader.
+static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
+ uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0;
+ return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1;
+}
+
+// We use a sentinal value to indicate a failed or in-progress allocation.
+template <typename T> bool is_sentinel(const T &x) {
+ return x == cpp::numeric_limits<T>::max();
}
} // namespace impl
@@ -264,28 +281,33 @@ struct Slab {
continue;
// We try using any known empty bits from the previous attempt first.
- uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask),
- ~after ? (old_index & ~(BITS_IN_WORD - 1)) +
- cpp::countr_zero(~after)
- : impl::xorshift32(state));
+ uint32_t start = gpu::shuffle(
+ mask, cpp::countr_zero(uniform & mask),
+ ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after)
+ : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD));
- uint32_t id = impl::lane_count(uniform & mask);
+ // Each lane tries to claim one bit in a single contiguous mask.
+ uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id());
uint32_t index = (start + id) % usable_bits(chunk_size);
uint32_t slot = index / BITS_IN_WORD;
uint32_t bit = index % BITS_IN_WORD;
// Get the mask of bits destined for the same slot and coalesce it.
- uint64_t match = uniform & gpu::match_any(mask, slot);
- uint32_t length = cpp::popcount(match);
- uint32_t bitmask = gpu::shuffle(
- mask, cpp::countr_zero(match),
- static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit);
+ uint32_t leader = impl::get_leader_id(
+ uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0),
+ gpu::get_lane_id());
+ uint32_t length = cpp::popcount(uniform & mask) -
+ impl::lane_count(uniform & mask, leader);
+ uint32_t bitmask =
+ static_cast<uint32_t>(
+ (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1)
+ << bit;
uint32_t before = 0;
- if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match)))
+ if (gpu::get_lane_id() == leader)
before = cpp::AtomicRef(get_bitfield()[slot])
.fetch_or(bitmask, cpp::MemoryOrder::RELAXED);
- before = gpu::shuffle(mask, cpp::countr_zero(match), before);
+ before = gpu::shuffle(mask, leader, before);
if (~before & (1 << bit))
result = ptr_from_index(index, chunk_size);
else
@@ -323,20 +345,20 @@ struct GuardPtr {
private:
struct RefCounter {
// Indicates that the object is in its deallocation phase and thus invalid.
- static constexpr uint64_t INVALID = uint64_t(1) << 63;
+ static constexpr uint32_t INVALID = uint32_t(1) << 31;
// If a read preempts an unlock call we indicate this so the following
// unlock call can swap out the helped bit and maintain exclusive ownership.
- static constexpr uint64_t HELPED = uint64_t(1) << 62;
+ static constexpr uint32_t HELPED = uint32_t(1) << 30;
// Resets the reference counter, cannot be reset to zero safely.
- void reset(uint32_t n, uint64_t &count) {
+ void reset(uint32_t n, uint32_t &count) {
counter.store(n, cpp::MemoryOrder::RELAXED);
count = n;
}
// Acquire a slot in the reference counter if it is not invalid.
- bool acquire(uint32_t n, uint64_t &count) {
+ bool acquire(uint32_t n, uint32_t &count) {
count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n;
return (count & INVALID) == 0;
}
@@ -349,7 +371,7 @@ private:
// another thread resurrected the counter and we quit, or a parallel read
// helped us invalidating it. For the latter, claim that flag and return.
if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
- uint64_t expected = 0;
+ uint32_t expected = 0;
if (counter.compare_exchange_strong(expected, INVALID,
cpp::MemoryOrder::RELAXED,
cpp::MemoryOrder::RELAXED))
@@ -372,28 +394,29 @@ private:
return (val & INVALID) ? 0 : val;
}
- cpp::Atomic<uint64_t> counter{0};
+ cpp::Atomic<uint32_t> counter{0};
};
- cpp::Atomic<Slab *> ptr{nullptr};
- RefCounter ref{};
+ cpp::Atomic<Slab *> ptr;
+ RefCounter ref;
// Should be called be a single lane for each different pointer.
template <typename... Args>
- Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) {
+ Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) {
Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED);
if (!expected &&
ptr.compare_exchange_strong(
- expected, reinterpret_cast<Slab *>(SENTINEL),
+ expected,
+ reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()),
cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
- count = cpp::numeric_limits<uint64_t>::max();
+ count = cpp::numeric_limits<uint32_t>::max();
void *raw = impl::rpc_allocate(sizeof(Slab));
if (!raw)
return nullptr;
return new (raw) Slab(cpp::forward<Args>(args)...);
}
- if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL))
+ if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected)))
return nullptr;
if (!ref.acquire(n, count))
@@ -405,7 +428,7 @@ private:
// Finalize the associated memory and signal that it is ready to use by
// resetting the counter.
- void finalize(Slab *mem, uint32_t n, uint64_t &count) {
+ void finalize(Slab *mem, uint32_t n, uint32_t &count) {
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
ptr.store(mem, cpp::MemoryOrder::RELAXED);
cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
@@ -418,7 +441,7 @@ public:
// The uniform mask represents which lanes share the same pointer. For each
// uniform value we elect a leader to handle it on behalf of the other lanes.
template <typename... Args>
- Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
+ Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count,
Args &&...args) {
count = 0;
Slab *result = nullptr;
@@ -433,14 +456,17 @@ public:
// We defer storing the newly allocated slab until now so that we can use
// multiple lanes to initialize it and release it for use.
- if (count == cpp::numeric_limits<uint64_t>::max()) {
+ if (impl::is_sentinel(count)) {
result->initialize(uniform);
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
finalize(result, cpp::popcount(uniform), count);
+ count =
+ gpu::shuffle(gpu::get_lane_mask(), cpp::countr_zero(uniform), count);
}
- if (count != cpp::numeric_limits<uint64_t>::max())
- count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1;
+ if (!impl::is_sentinel(count))
+ count = count - cpp::popcount(uniform) +
+ impl::lane_count(uniform, gpu::get_lane_id());
return result;
}
@@ -469,7 +495,7 @@ static GuardPtr slots[ARRAY_SIZE] = {};
// Keep a cache of the last successful slot for each chunk size. Initialize it
// to an even spread of the total size. Must be updated if the chunking scheme
// changes.
-#define S(X) (impl::start_index(X))
+#define S(X) (impl::get_start_index(X))
static cpp::Atomic<uint32_t> indices[] = {
S(16), S(32), S(48), S(64), S(96), S(112), S(128),
S(192), S(224), S(256), S(384), S(448), S(512), S(768),
@@ -481,26 +507,28 @@ static cpp::Atomic<uint32_t> indices[] = {
#undef S
// Tries to find a slab in the table that can support the given chunk size.
-static Slab *find_slab(uint32_t chunk_size) {
+static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
// We start at the index of the last successful allocation for this kind.
uint32_t chunk_id = impl::get_chunk_id(chunk_size);
uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED);
- uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
- for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) {
+ for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) {
uint32_t index =
- !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE;
+ !offset ? start
+ : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE;
- if (slots[index].use_count() < Slab::available_chunks(chunk_size)) {
+ if (!offset ||
+ slots[index].use_count() < Slab::available_chunks(chunk_size)) {
uint64_t lane_mask = gpu::get_lane_mask();
- uint64_t reserved = 0;
+ uint32_t reserved = 0;
Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask,
reserved, chunk_size, index);
// If there is a slab allocation in progress we retry a few times.
for (uint32_t retries = 0;
- retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) {
+ !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES;
+ retries++) {
uint64_t lane_mask = gpu::get_lane_mask();
slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved,
chunk_size, index);
@@ -510,17 +538,21 @@ static Slab *find_slab(uint32_t chunk_size) {
// If we find a slab with a matching chunk size then we store the result.
// Otherwise, we need to free the claimed lock and continue. In the case
// of out-of-memory we receive a sentinel value and return a failure.
- if (slab && reserved <= Slab::available_chunks(chunk_size) &&
+ if (slab && reserved < Slab::available_chunks(chunk_size) &&
slab->get_chunk_size() == chunk_size) {
if (index != start)
indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED);
+ uniform = uniform & gpu::get_lane_mask();
return slab;
- } else if (slab && (reserved > Slab::available_chunks(chunk_size) ||
+ } else if (slab && (reserved >= Slab::available_chunks(chunk_size) ||
slab->get_chunk_size() != chunk_size)) {
slots[index].unlock(gpu::get_lane_mask(),
gpu::get_lane_mask() & uniform);
- } else if (!slab && reserved == SENTINEL) {
+ } else if (!slab && impl::is_sentinel(reserved)) {
+ uniform = uniform & gpu::get_lane_mask();
return nullptr;
+ } else {
+ sleep_briefly();
}
}
}
@@ -547,12 +579,12 @@ void *allocate(uint64_t size) {
// Try to find a slab for the rounded up chunk size and allocate from it.
uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size));
- Slab *slab = find_slab(chunk_size);
- if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL))
+ uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
+ Slab *slab = find_slab(chunk_size, uniform);
+ if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab)))
return nullptr;
uint64_t lane_mask = gpu::get_lane_mask();
- uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index());
void *ptr = slab->allocate(lane_mask, uniform);
return ptr;
}
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 1050938..95acc962 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -141,6 +141,80 @@ add_header_library(
)
add_header_library(
+ asinhf
+ HDRS
+ asinhf.h
+ DEPENDS
+ .acoshf_utils
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.config
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
+ asinhf16
+ HDRS
+ asinhf16.h
+DEPENDS
+ .acoshf_utils
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.except_value_utils
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.config
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
+ atan_utils
+ HDRS
+ atan_utils.h
+DEPENDS
+ libc.src.__support.integer_literals
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.dyadic_float
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
+ atan
+ HDRS
+ atan.h
+DEPENDS
+ .atan_utils
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
+ atanf
+ HDRS
+ atanf.h
+ DEPENDS
+ .inv_trigf_utils
+ libc.src.__support.FPUtil.except_value_utils
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
asinf
HDRS
asinf.h
diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h
new file mode 100644
index 0000000..1c08a6e
--- /dev/null
+++ b/libc/src/__support/math/asinhf.h
@@ -0,0 +1,125 @@
+//===-- Implementation header for asinf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float asinhf(float x) {
+ using namespace acoshf_internal;
+ using FPBits_t = typename fputil::FPBits<float>;
+ FPBits_t xbits(x);
+ uint32_t x_u = xbits.uintval();
+ uint32_t x_abs = xbits.abs().uintval();
+
+ // |x| <= 2^-3
+ if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
+ // |x| <= 2^-26
+ if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
+ return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
+ ? x
+ : (x - 0x1.5555555555555p-3 * x * x * x));
+ }
+
+ double x_d = x;
+ double x_sq = x_d * x_d;
+ // Generated by Sollya with:
+ // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
+ // [0, 2^-2]);
+ double p = fputil::polyeval(
+ x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
+ -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
+ 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
+ return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
+ }
+
+ const double SIGN[2] = {1.0, -1.0};
+ double x_sign = SIGN[x_u >> 31];
+ double x_d = x;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ // Helper functions to set results for exceptional cases.
+ auto round_result_slightly_down = [x_sign](float r) -> float {
+ return fputil::multiply_add(static_cast<float>(x_sign), r,
+ static_cast<float>(x_sign) * (-0x1.0p-24f));
+ };
+ auto round_result_slightly_up = [x_sign](float r) -> float {
+ return fputil::multiply_add(static_cast<float>(x_sign), r,
+ static_cast<float>(x_sign) * 0x1.0p-24f);
+ };
+
+ if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
+ if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits_t::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // Exceptional cases when x > 2^24.
+ switch (x_abs) {
+ case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
+ return round_result_slightly_down(0x1.1e0696p4f);
+ case 0x4c803f2c: // |x| = 0x1.007e58p26f
+ return round_result_slightly_down(0x1.2b786cp4f);
+ case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
+ return round_result_slightly_up(0x1.6fdd34p4f);
+ case 0x5c569e88: // |x| = 0x1.ad3d1p57f
+ return round_result_slightly_up(0x1.45c146p5f);
+ case 0x5e68984e: // |x| = 0x1.d1309cp61f
+ return round_result_slightly_up(0x1.5c9442p5f);
+ case 0x655890d3: // |x| = 0x1.b121a6p75f
+ return round_result_slightly_down(0x1.a9a3f2p5f);
+ case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
+ return round_result_slightly_up(0x1.af66cp5f);
+ case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
+ return round_result_slightly_down(0x1.08b512p6f);
+ case 0x7997f30a: // |x| = 0x1.2fe614p116f
+ return round_result_slightly_up(0x1.451436p6f);
+ }
+ } else {
+ // Exceptional cases when x < 2^24.
+ if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
+ // |x| = 0x1.575e4cp12f
+ return round_result_slightly_down(0x1.29becap3f);
+ }
+ if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
+ // |x| = 0x1.a5209p20f
+ return round_result_slightly_down(0x1.e1b92p3f);
+ }
+ }
+#else
+ if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
+ return x;
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ // asinh(x) = log(x + sqrt(x^2 + 1))
+ return static_cast<float>(
+ x_sign * log_eval(fputil::multiply_add(
+ x_d, x_sign,
+ fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
diff --git a/libc/src/__support/math/asinhf16.h b/libc/src/__support/math/asinhf16.h
new file mode 100644
index 0000000..3c5171e
--- /dev/null
+++ b/libc/src/__support/math/asinhf16.h
@@ -0,0 +1,121 @@
+//===-- Implementation header for asinhf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 asinhf16(float16 x) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ constexpr size_t N_EXCEPTS = 8;
+
+ constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{
+ // (input, RZ output, RU offset, RD offset, RN offset)
+
+ // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ)
+ {0x3769, 0x372a, 1, 0, 1},
+ // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ)
+ {0x3b5b, 0x3a96, 1, 0, 0},
+ // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ)
+ {0x4b1f, 0x42b3, 1, 0, 0},
+ // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ)
+ {0x4c9b, 0x4336, 1, 0, 1},
+ // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ)
+ {0xb769, 0xb72a, 0, 1, 1},
+ // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ)
+ {0xbb5b, 0xba96, 0, 1, 0},
+ // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ)
+ {0xcb1f, 0xc2b3, 0, 1, 0},
+ // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ)
+ {0xcc9b, 0xc336, 0, 1, 1},
+ }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ using namespace acoshf_internal;
+ using FPBits = fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+
+ if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ // Handle exceptional values
+ if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+ return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ float xf = x;
+ const float SIGN[2] = {1.0f, -1.0f};
+ float x_sign = SIGN[x_u >> 15];
+
+ // |x| <= 0.25
+ if (LIBC_UNLIKELY(x_abs <= 0x3400)) {
+ // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain
+ // rounding types.
+ if (LIBC_UNLIKELY(x_abs < 0x29c6)) {
+ int rounding = fputil::quick_get_round();
+ if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0)
+ return fputil::cast<float16>(xf + 0x1p-24f);
+ if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0)
+ return fputil::cast<float16>(xf - 0x1p-24f);
+ return fputil::cast<float16>(xf);
+ }
+
+ float x_sq = xf * xf;
+ // Generated by Sollya with:
+ // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]);
+ // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f
+ // for better accuracy.
+ float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f,
+ -0x1.6c53dep-5f, 0x1.bd114ep-5f);
+
+ return fputil::cast<float16>(xf * p);
+ }
+
+ // General case: asinh(x) = ln(x + sqrt(x^2 + 1))
+ float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f));
+ return fputil::cast<float16>(
+ x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term)));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
diff --git a/libc/src/__support/math/atan.h b/libc/src/__support/math/atan.h
new file mode 100644
index 0000000..62190b0
--- /dev/null
+++ b/libc/src/__support/math/atan.h
@@ -0,0 +1,189 @@
+//===-- Implementation header for atan --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// To compute atan(x), we divided it into the following cases:
+// * |x| < 2^-26:
+// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
+// return atan(x) = x - sign(x) * epsilon.
+// * 2^-26 <= |x| < 1:
+// We perform range reduction mod 2^-6 = 1/64 as follow:
+// Let k = 2^(-6) * round(|x| * 2^6), then
+// atan(x) = sign(x) * atan(|x|)
+// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
+// We store atan(k) in a look up table, and perform intermediate steps in
+// double-double.
+// * 1 < |x| < 2^53:
+// First we perform the transformation y = 1/|x|:
+// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+// = sign(x) * (pi/2 - atan(y)).
+// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
+// previous case:
+// Let k = 2^(-6) * round(y * 2^6), then
+// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
+// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
+// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
+// * |x| >= 2^53:
+// Using the reciprocal transformation:
+// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
+// We have that:
+// atan(1/|x|) <= 1/|x| <= 2^-53,
+// which is smaller than ulp(pi/2) / 2.
+// So we can return:
+// atan(x) = sign(x) * (pi/2 - epsilon)
+
+LIBC_INLINE static constexpr double atan(double x) {
+
+ using namespace atan_internal;
+ using FPBits = fputil::FPBits<double>;
+
+ constexpr double IS_NEG[2] = {1.0, -1.0};
+ constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
+ 0x1.921fb54442d18p0};
+ constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
+ -0x1.921fb54442d18p0};
+
+ FPBits xbits(x);
+ bool x_sign = xbits.is_neg();
+ xbits = xbits.abs();
+ uint64_t x_abs = xbits.uintval();
+ int x_exp =
+ static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
+
+ // |x| < 1.
+ if (x_exp < 0) {
+ if (LIBC_UNLIKELY(x_exp < -26)) {
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x;
+#else
+ if (x == 0.0)
+ return x;
+ // |x| < 2^-26
+ return fputil::multiply_add(-0x1.0p-54, x, x);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ double x_d = xbits.get_val();
+ // k = 2^-6 * round(2^6 * |x|)
+ double k = fputil::nearest_integer(0x1.0p6 * x_d);
+ unsigned idx = static_cast<unsigned>(k);
+ k *= 0x1.0p-6;
+
+ // numerator = |x| - k
+ DoubleDouble num, den;
+ num.lo = 0.0;
+ num.hi = x_d - k;
+
+ // denominator = 1 - k * |x|
+ den.hi = fputil::multiply_add(x_d, k, 1.0);
+ DoubleDouble prod = fputil::exact_mult(x_d, k);
+ // Using Dekker's 2SUM algorithm to compute the lower part.
+ den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
+
+ // x_r = (|x| - k) / (1 + k * |x|)
+ DoubleDouble x_r = fputil::div(num, den);
+
+ // Approximating atan(x_r) using Taylor polynomial.
+ DoubleDouble p = atan_eval(x_r);
+
+ // atan(x) = sign(x) * (atan(k) + atan(x_r))
+ // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
+#else
+
+ DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
+ double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
+ double r = IS_NEG[x_sign] * (c0.hi + c1);
+
+ return r;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ // |x| >= 2^53 or x is NaN.
+ if (LIBC_UNLIKELY(x_exp >= 53)) {
+ // x is nan
+ if (xbits.is_nan()) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ return x;
+ }
+ // |x| >= 2^53
+ // atan(x) ~ sign(x) * pi/2.
+ if (x_exp >= 53)
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return IS_NEG[x_sign] * PI_OVER_2.hi;
+#else
+ return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
+ IS_NEG[x_sign] * PI_OVER_2.lo);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ double x_d = xbits.get_val();
+ double y = 1.0 / x_d;
+
+ // k = 2^-6 * round(2^6 / |x|)
+ double k = fputil::nearest_integer(0x1.0p6 * y);
+ unsigned idx = static_cast<unsigned>(k);
+ k *= 0x1.0p-6;
+
+ // denominator = |x| + k
+ DoubleDouble den = fputil::exact_add(x_d, k);
+ // numerator = 1 - k * |x|
+ DoubleDouble num;
+ num.hi = fputil::multiply_add(-x_d, k, 1.0);
+ DoubleDouble prod = fputil::exact_mult(x_d, k);
+ // Using Dekker's 2SUM algorithm to compute the lower part.
+ num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
+
+ // x_r = (1/|x| - k) / (1 - k/|x|)
+ // = (1 - k * |x|) / (|x| - k)
+ DoubleDouble x_r = fputil::div(num, den);
+
+ // Approximating atan(x_r) using Taylor polynomial.
+ DoubleDouble p = atan_eval(x_r);
+
+ // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+ // = sign(x) * (pi/2 - atan(k) - atan(x_r))
+ // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
+ return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
+#else
+ DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
+ DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
+ double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
+
+ double r = IS_NEG[!x_sign] * (c1.hi + c2);
+
+ return r;
+#endif
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
diff --git a/libc/src/math/generic/atan_utils.h b/libc/src/__support/math/atan_utils.h
index 24c7271..9e8d7d6 100644
--- a/libc/src/math/generic/atan_utils.h
+++ b/libc/src/__support/math/atan_utils.h
@@ -18,7 +18,7 @@
namespace LIBC_NAMESPACE_DECL {
-namespace {
+namespace atan_internal {
using DoubleDouble = fputil::DoubleDouble;
using Float128 = fputil::DyadicFloat<128>;
@@ -29,7 +29,7 @@ using Float128 = fputil::DyadicFloat<128>;
// b = round(atan(i/64) - a, D, RN);
// print("{", b, ",", a, "},");
// };
-constexpr DoubleDouble ATAN_I[65] = {
+static constexpr DoubleDouble ATAN_I[65] = {
{0.0, 0.0},
{-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
{-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
@@ -110,7 +110,8 @@ constexpr DoubleDouble ATAN_I[65] = {
// + x_lo * (1 - x_hi^2 + x_hi^4)
// Since p.lo is ~ x^3/3, the relative error from rounding is bounded by:
// |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66.
-[[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) {
+[[maybe_unused]] LIBC_INLINE static DoubleDouble
+atan_eval(const DoubleDouble &x) {
DoubleDouble p;
p.hi = x.hi;
double x_hi_sq = x.hi * x.hi;
@@ -142,7 +143,7 @@ constexpr DoubleDouble ATAN_I[65] = {
// b = 2^ll + a;
// print("{Sign::POS, ", 2^(ll - 128), ",", b, "},");
// };
-constexpr Float128 ATAN_I_F128[65] = {
+static constexpr Float128 ATAN_I_F128[65] = {
{Sign::POS, 0, 0_u128},
{Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128},
{Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128},
@@ -215,7 +216,7 @@ constexpr Float128 ATAN_I_F128[65] = {
// [0, 2^-7]);
// > dirtyinfnorm(atan(x) - P, [0, 2^-7]);
// 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122
-constexpr Float128 ATAN_POLY_F128[] = {
+static constexpr Float128 ATAN_POLY_F128[] = {
{Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128},
{Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128},
{Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128},
@@ -225,7 +226,8 @@ constexpr Float128 ATAN_POLY_F128[] = {
};
// Approximate atan for |x| <= 2^-7.
-[[maybe_unused]] Float128 atan_eval(const Float128 &x) {
+[[maybe_unused]] LIBC_INLINE static constexpr Float128
+atan_eval(const Float128 &x) {
Float128 x_sq = fputil::quick_mul(x, x);
Float128 x3 = fputil::quick_mul(x, x_sq);
Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1],
@@ -234,7 +236,7 @@ constexpr Float128 ATAN_POLY_F128[] = {
return fputil::multiply_add(x3, p, x);
}
-} // anonymous namespace
+} // namespace atan_internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h
new file mode 100644
index 0000000..92799dc
--- /dev/null
+++ b/libc/src/__support/math/atanf.h
@@ -0,0 +1,129 @@
+//===-- Implementation header for atanf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float atanf(float x) {
+ using namespace inv_trigf_utils_internal;
+ using FPBits = typename fputil::FPBits<float>;
+
+ constexpr double FINAL_SIGN[2] = {1.0, -1.0};
+ constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
+ -0x1.921fb54442d18p0};
+
+ FPBits x_bits(x);
+ Sign sign = x_bits.sign();
+ x_bits.set_sign(Sign::POS);
+ uint32_t x_abs = x_bits.uintval();
+
+ // x is inf or nan, |x| < 2^-4 or |x|= > 16.
+ if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
+ double x_d = static_cast<double>(x);
+ double const_term = 0.0;
+ if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
+ // atan(+-Inf) = +-pi/2.
+ if (x_bits.is_inf()) {
+ volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()];
+ return static_cast<float>(sign_pi_over_2);
+ }
+ if (x_bits.is_nan())
+ return x;
+ // x >= 16
+ x_d = -1.0 / x_d;
+ const_term = SIGNED_PI_OVER_2[sign.is_neg()];
+ }
+ // 0 <= x < 1/16;
+ if (LIBC_UNLIKELY(x_bits.is_zero()))
+ return x;
+ // x <= 2^-12;
+ if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+ return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+ double x_d = static_cast<double>(x);
+ return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+ }
+ // Use Taylor polynomial:
+ // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
+ constexpr double ATAN_TAYLOR[6] = {
+ 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
+ -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4,
+ };
+ double x2 = x_d * x_d;
+ double x4 = x2 * x2;
+ double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+ double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+ double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]);
+ double p = fputil::polyeval(x4, c0, c1, c2);
+ double r = fputil::multiply_add(x_d, p, const_term);
+ return static_cast<float>(r);
+ }
+
+ // Range reduction steps:
+ // 1) atan(x) = sign(x) * atan(|x|)
+ // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+ // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
+ // 4) Then we use polynomial approximation:
+ // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
+ // = P(x - k/16)
+ double x_d = 0, const_term = 0, final_sign = 0;
+ int idx = 0;
+
+ if (x_abs > 0x3f80'0000U) {
+ // |x| > 1, we need to invert x, so we will perform range reduction in
+ // double precision.
+ x_d = 1.0 / static_cast<double>(x_bits.get_val());
+ double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
+ x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
+ idx = static_cast<int>(k_d);
+ final_sign = FINAL_SIGN[sign.is_pos()];
+ // Adjust constant term of the polynomial by +- pi/2.
+ const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
+ SIGNED_PI_OVER_2[sign.is_neg()]);
+ } else {
+ // Exceptional value:
+ if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4
+ return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f)
+ : fputil::round_result_slightly_up(-0x1.1a6386p-4f);
+ }
+ // Perform range reduction in single precision.
+ float x_f = x_bits.get_val();
+ float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
+ x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
+ x_d = static_cast<double>(x_f);
+ idx = static_cast<int>(k_f);
+ final_sign = FINAL_SIGN[sign.is_neg()];
+ const_term = final_sign * ATAN_COEFFS[idx][0];
+ }
+
+ double p = atan_eval(x_d, idx);
+ double r = fputil::multiply_add(final_sign * x_d, p, const_term);
+
+ return static_cast<float>(r);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index cbb7886..14aaad2 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -124,3 +124,14 @@ add_object_library(
libc.src.__support.threads.linux.raw_mutex
libc.src.__support.CPP.mutex
)
+
+add_object_library(
+ barrier
+ HDRS
+ barrier.h
+ SRCS
+ barrier.cpp
+ DEPENDS
+ libc.src.__support.threads.CndVar
+ libc.src.__support.threads.mutex
+)
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
new file mode 100644
index 0000000..cf7207b5
--- /dev/null
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -0,0 +1,85 @@
+//===-- Implementation of Barrier class ------------- ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/threads/linux/barrier.h"
+#include "hdr/errno_macros.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int Barrier::init(Barrier *b,
+ [[maybe_unused]] const pthread_barrierattr_t *attr,
+ unsigned count) {
+ LIBC_ASSERT(attr == nullptr); // TODO implement barrierattr
+ if (count == 0)
+ return EINVAL;
+
+ b->expected = count;
+ b->waiting = 0;
+ b->blocking = true;
+
+ int err;
+ err = CndVar::init(&b->entering);
+ if (err != 0)
+ return err;
+
+ err = CndVar::init(&b->exiting);
+ if (err != 0)
+ return err;
+
+ auto mutex_err = Mutex::init(&b->m, false, false, false, false);
+ if (mutex_err != MutexError::NONE)
+ return EAGAIN;
+
+ return 0;
+}
+
+int Barrier::wait() {
+ m.lock();
+
+ // if the barrier is emptying out threads, wait until it finishes
+ while (!blocking)
+ entering.wait(&m);
+ waiting++;
+
+ if (waiting < expected) {
+ // block threads until waiting = expected
+ while (blocking)
+ exiting.wait(&m);
+ } else {
+ // this is the last thread to call wait(), so lets wake everyone up
+ blocking = false;
+ exiting.broadcast();
+ }
+ waiting--;
+
+ if (waiting == 0) {
+ // all threads have exited the barrier, let's let the ones waiting to enter
+ // continue
+ blocking = true;
+ entering.broadcast();
+ m.unlock();
+
+ // POSIX dictates that the barrier should return a special value to just one
+ // thread, so we can arbitrarily choose this thread
+ return PTHREAD_BARRIER_SERIAL_THREAD;
+ }
+ m.unlock();
+
+ return 0;
+}
+
+int Barrier::destroy(Barrier *b) {
+ CndVar::destroy(&b->entering);
+ CndVar::destroy(&b->exiting);
+ Mutex::destroy(&b->m);
+ return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
new file mode 100644
index 0000000..f0655bf
--- /dev/null
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -0,0 +1,50 @@
+//===-- A platform independent abstraction layer for barriers --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
+#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
+
+#include "hdr/pthread_macros.h"
+#include "include/llvm-libc-types/pthread_barrier_t.h"
+#include "include/llvm-libc-types/pthread_barrierattr_t.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// NOTE: if the size of this class changes, you must ensure that the size of
+// pthread_barrier_t (found in include/llvm-libc/types/pthread_barrier_t.h) is
+// the same size
+class Barrier {
+private:
+ unsigned expected;
+ unsigned waiting;
+ bool blocking;
+ CndVar entering;
+ CndVar exiting;
+ Mutex m;
+
+public:
+ static int init(Barrier *b, const pthread_barrierattr_t *attr,
+ unsigned count);
+ static int destroy(Barrier *b);
+ int wait();
+};
+
+static_assert(
+ sizeof(Barrier) == sizeof(pthread_barrier_t),
+ "The public pthread_barrier_t type cannot accommodate the internal "
+ "barrier type.");
+
+static_assert(alignof(Barrier) == alignof(pthread_barrier_t),
+ "The public pthread_barrier_t type has a different alignment "
+ "than the internal barrier type.");
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index e363ad3..aed1d53 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -60,14 +60,31 @@ add_object_library(
SRCS
mbrtowc.cpp
DEPENDS
- libc.hdr.errno_macros
- libc.hdr.types.wchar_t
- libc.hdr.types.size_t
- libc.src.__support.common
- libc.src.__support.error_or
- libc.src.__support.macros.config
- .character_converter
- .mbstate
+ libc.hdr.errno_macros
+ libc.hdr.types.wchar_t
+ libc.hdr.types.size_t
+ libc.src.__support.common
+ libc.src.__support.error_or
+ libc.src.__support.macros.config
+ .character_converter
+ .mbstate
+)
+
+add_header_library(
+ mbsnrtowcs
+ HDRS
+ mbsnrtowcs.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.types.wchar_t
+ libc.hdr.types.size_t
+ libc.src.__support.common
+ libc.src.__support.error_or
+ libc.src.__support.macros.config
+ libc.src.__support.macros.null_check
+ .character_converter
+ .mbstate
+ .string_converter
)
add_header_library(
diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h
new file mode 100644
index 0000000..54e3152
--- /dev/null
+++ b/libc/src/__support/wchar/mbsnrtowcs.h
@@ -0,0 +1,66 @@
+//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
+#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
+ const char **__restrict src,
+ size_t nmc, size_t len,
+ mbstate *__restrict ps) {
+ LIBC_CRASH_ON_NULLPTR(src);
+ // Checking if mbstate is valid
+ CharacterConverter char_conv(ps);
+ if (!char_conv.isValidState())
+ return Error(EINVAL);
+
+ StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
+ len, nmc);
+ size_t dst_idx = 0;
+ ErrorOr<char32_t> converted = str_conv.popUTF32();
+ while (converted.has_value()) {
+ if (dst != nullptr)
+ dst[dst_idx] = converted.value();
+ // null terminator should not be counted in return value
+ if (converted.value() == L'\0') {
+ if (dst != nullptr)
+ *src = nullptr;
+ return dst_idx;
+ }
+ dst_idx++;
+ converted = str_conv.popUTF32();
+ }
+
+ if (converted.error() == -1) { // if we hit conversion limit
+ if (dst != nullptr)
+ *src += str_conv.getSourceIndex();
+ return dst_idx;
+ }
+
+ return Error(converted.error());
+}
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 455ad34..0522e0e 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -189,6 +189,7 @@ add_math_entrypoint_object(fabsf)
add_math_entrypoint_object(fabsl)
add_math_entrypoint_object(fabsf16)
add_math_entrypoint_object(fabsf128)
+add_math_entrypoint_object(fabsbf16)
add_math_entrypoint_object(fadd)
add_math_entrypoint_object(faddl)
diff --git a/libc/src/math/fabsbf16.h b/libc/src/math/fabsbf16.h
new file mode 100644
index 0000000..4993668
--- /dev/null
+++ b/libc/src/math/fabsbf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for fabsbf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FABSBF16_H
+#define LLVM_LIBC_SRC_MATH_FABSBF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 fabsbf16(bfloat16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_FABSBF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index d4d268c..701dc4b 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -698,6 +698,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ fabsbf16
+ SRCS
+ fabsbf16.cpp
+ HDRS
+ ../fabsbf16.h
+ DEPENDS
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
fadd
SRCS
fadd.cpp
@@ -3889,12 +3902,7 @@ add_entrypoint_object(
HDRS
../asinhf.h
DEPENDS
- .explogxf
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
+ libc.src.__support.math.asinhf
)
add_entrypoint_object(
@@ -3904,18 +3912,7 @@ add_entrypoint_object(
HDRS
../asinhf16.h
DEPENDS
- .explogxf
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.cast
- libc.src.__support.FPUtil.except_value_utils
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.FPUtil.sqrt
- libc.src.__support.macros.optimization
- libc.src.__support.macros.properties.types
+ libc.src.__support.math.asinhf16
)
add_entrypoint_object(
@@ -4023,19 +4020,6 @@ add_entrypoint_object(
libc.src.errno.errno
)
-add_header_library(
- atan_utils
- HDRS
- atan_utils.h
- DEPENDS
- libc.src.__support.integer_literals
- libc.src.__support.FPUtil.double_double
- libc.src.__support.FPUtil.dyadic_float
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.macros.optimization
-)
-
add_entrypoint_object(
atanf
SRCS
@@ -4043,14 +4027,7 @@ add_entrypoint_object(
HDRS
../atanf.h
DEPENDS
- libc.src.__support.FPUtil.except_value_utils
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.macros.optimization
- libc.src.__support.math.inv_trigf_utils
+ libc.src.__support.math.atanf
)
add_entrypoint_object(
@@ -4082,13 +4059,7 @@ add_entrypoint_object(
COMPILE_OPTIONS
-O3
DEPENDS
- .atan_utils
- libc.src.__support.FPUtil.double_double
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.macros.optimization
+ libc.src.__support.math.atan
)
add_entrypoint_object(
@@ -4118,7 +4089,7 @@ add_entrypoint_object(
HDRS
../atan2.h
DEPENDS
- .atan_utils
+ libc.src.__support.math.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
@@ -4144,7 +4115,7 @@ add_entrypoint_object(
HDRS
../atan2f128.h
DEPENDS
- .atan_utils
+ libc.src.__support.math.atan_utils
libc.src.__support.integer_literals
libc.src.__support.uint128
libc.src.__support.FPUtil.dyadic_float
diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp
index 3aed3bc..45023c8 100644
--- a/libc/src/math/generic/asinhf.cpp
+++ b/libc/src/math/generic/asinhf.cpp
@@ -7,112 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asinhf.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/generic/common_constants.h"
-#include "src/math/generic/explogxf.h"
+#include "src/__support/math/asinhf.h"
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, asinhf, (float x)) {
- using namespace acoshf_internal;
- using FPBits_t = typename fputil::FPBits<float>;
- FPBits_t xbits(x);
- uint32_t x_u = xbits.uintval();
- uint32_t x_abs = xbits.abs().uintval();
-
- // |x| <= 2^-3
- if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
- // |x| <= 2^-26
- if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
- return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
- ? x
- : (x - 0x1.5555555555555p-3 * x * x * x));
- }
-
- double x_d = x;
- double x_sq = x_d * x_d;
- // Generated by Sollya with:
- // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
- // [0, 2^-2]);
- double p = fputil::polyeval(
- x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
- -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
- 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
- return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
- }
-
- const double SIGN[2] = {1.0, -1.0};
- double x_sign = SIGN[x_u >> 31];
- double x_d = x;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Helper functions to set results for exceptional cases.
- auto round_result_slightly_down = [x_sign](float r) -> float {
- return fputil::multiply_add(static_cast<float>(x_sign), r,
- static_cast<float>(x_sign) * (-0x1.0p-24f));
- };
- auto round_result_slightly_up = [x_sign](float r) -> float {
- return fputil::multiply_add(static_cast<float>(x_sign), r,
- static_cast<float>(x_sign) * 0x1.0p-24f);
- };
-
- if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits_t::quiet_nan().get_val();
- }
-
- return x;
- }
-
- // Exceptional cases when x > 2^24.
- switch (x_abs) {
- case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
- return round_result_slightly_down(0x1.1e0696p4f);
- case 0x4c803f2c: // |x| = 0x1.007e58p26f
- return round_result_slightly_down(0x1.2b786cp4f);
- case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
- return round_result_slightly_up(0x1.6fdd34p4f);
- case 0x5c569e88: // |x| = 0x1.ad3d1p57f
- return round_result_slightly_up(0x1.45c146p5f);
- case 0x5e68984e: // |x| = 0x1.d1309cp61f
- return round_result_slightly_up(0x1.5c9442p5f);
- case 0x655890d3: // |x| = 0x1.b121a6p75f
- return round_result_slightly_down(0x1.a9a3f2p5f);
- case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
- return round_result_slightly_up(0x1.af66cp5f);
- case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
- return round_result_slightly_down(0x1.08b512p6f);
- case 0x7997f30a: // |x| = 0x1.2fe614p116f
- return round_result_slightly_up(0x1.451436p6f);
- }
- } else {
- // Exceptional cases when x < 2^24.
- if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
- // |x| = 0x1.575e4cp12f
- return round_result_slightly_down(0x1.29becap3f);
- }
- if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
- // |x| = 0x1.a5209p20f
- return round_result_slightly_down(0x1.e1b92p3f);
- }
- }
-#else
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
- return x;
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- // asinh(x) = log(x + sqrt(x^2 + 1))
- return static_cast<float>(
- x_sign * log_eval(fputil::multiply_add(
- x_d, x_sign,
- fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
-}
+LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asinhf16.cpp b/libc/src/math/generic/asinhf16.cpp
index 0a0b471..d517e63 100644
--- a/libc/src/math/generic/asinhf16.cpp
+++ b/libc/src/math/generic/asinhf16.cpp
@@ -7,102 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/asinhf16.h"
-#include "explogxf.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/asinhf16.h"
namespace LIBC_NAMESPACE_DECL {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr size_t N_EXCEPTS = 8;
-
-static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{
- // (input, RZ output, RU offset, RD offset, RN offset)
-
- // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ)
- {0x3769, 0x372a, 1, 0, 1},
- // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ)
- {0x3b5b, 0x3a96, 1, 0, 0},
- // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ)
- {0x4b1f, 0x42b3, 1, 0, 0},
- // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ)
- {0x4c9b, 0x4336, 1, 0, 1},
- // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ)
- {0xb769, 0xb72a, 0, 1, 1},
- // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ)
- {0xbb5b, 0xba96, 0, 1, 0},
- // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ)
- {0xcb1f, 0xc2b3, 0, 1, 0},
- // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ)
- {0xcc9b, 0xc336, 0, 1, 1},
-}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) {
- using namespace acoshf_internal;
- using FPBits = fputil::FPBits<float16>;
- FPBits xbits(x);
-
- uint16_t x_u = xbits.uintval();
- uint16_t x_abs = x_u & 0x7fff;
-
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
-
- return x;
- }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Handle exceptional values
- if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- float xf = x;
- const float SIGN[2] = {1.0f, -1.0f};
- float x_sign = SIGN[x_u >> 15];
-
- // |x| <= 0.25
- if (LIBC_UNLIKELY(x_abs <= 0x3400)) {
- // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain
- // rounding types.
- if (LIBC_UNLIKELY(x_abs < 0x29c6)) {
- int rounding = fputil::quick_get_round();
- if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0)
- return fputil::cast<float16>(xf + 0x1p-24f);
- if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0)
- return fputil::cast<float16>(xf - 0x1p-24f);
- return fputil::cast<float16>(xf);
- }
-
- float x_sq = xf * xf;
- // Generated by Sollya with:
- // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]);
- // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f
- // for better accuracy.
- float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f,
- -0x1.6c53dep-5f, 0x1.bd114ep-5f);
-
- return fputil::cast<float16>(xf * p);
- }
-
- // General case: asinh(x) = ln(x + sqrt(x^2 + 1))
- float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f));
- return fputil::cast<float16>(
- x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term)));
-}
+LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { return math::asinhf16(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/atan.cpp b/libc/src/math/generic/atan.cpp
index cbca605..93bf2e1 100644
--- a/libc/src/math/generic/atan.cpp
+++ b/libc/src/math/generic/atan.cpp
@@ -7,173 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/atan.h"
-#include "atan_utils.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/atan.h"
namespace LIBC_NAMESPACE_DECL {
-// To compute atan(x), we divided it into the following cases:
-// * |x| < 2^-26:
-// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
-// return atan(x) = x - sign(x) * epsilon.
-// * 2^-26 <= |x| < 1:
-// We perform range reduction mod 2^-6 = 1/64 as follow:
-// Let k = 2^(-6) * round(|x| * 2^6), then
-// atan(x) = sign(x) * atan(|x|)
-// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
-// We store atan(k) in a look up table, and perform intermediate steps in
-// double-double.
-// * 1 < |x| < 2^53:
-// First we perform the transformation y = 1/|x|:
-// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
-// = sign(x) * (pi/2 - atan(y)).
-// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
-// previous case:
-// Let k = 2^(-6) * round(y * 2^6), then
-// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
-// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
-// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
-// * |x| >= 2^53:
-// Using the reciprocal transformation:
-// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
-// We have that:
-// atan(1/|x|) <= 1/|x| <= 2^-53,
-// which is smaller than ulp(pi/2) / 2.
-// So we can return:
-// atan(x) = sign(x) * (pi/2 - epsilon)
-
-LLVM_LIBC_FUNCTION(double, atan, (double x)) {
- using FPBits = fputil::FPBits<double>;
-
- constexpr double IS_NEG[2] = {1.0, -1.0};
- constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
- 0x1.921fb54442d18p0};
- constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
- -0x1.921fb54442d18p0};
-
- FPBits xbits(x);
- bool x_sign = xbits.is_neg();
- xbits = xbits.abs();
- uint64_t x_abs = xbits.uintval();
- int x_exp =
- static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
-
- // |x| < 1.
- if (x_exp < 0) {
- if (LIBC_UNLIKELY(x_exp < -26)) {
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- return x;
-#else
- if (x == 0.0)
- return x;
- // |x| < 2^-26
- return fputil::multiply_add(-0x1.0p-54, x, x);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- }
-
- double x_d = xbits.get_val();
- // k = 2^-6 * round(2^6 * |x|)
- double k = fputil::nearest_integer(0x1.0p6 * x_d);
- unsigned idx = static_cast<unsigned>(k);
- k *= 0x1.0p-6;
-
- // numerator = |x| - k
- DoubleDouble num, den;
- num.lo = 0.0;
- num.hi = x_d - k;
-
- // denominator = 1 - k * |x|
- den.hi = fputil::multiply_add(x_d, k, 1.0);
- DoubleDouble prod = fputil::exact_mult(x_d, k);
- // Using Dekker's 2SUM algorithm to compute the lower part.
- den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
-
- // x_r = (|x| - k) / (1 + k * |x|)
- DoubleDouble x_r = fputil::div(num, den);
-
- // Approximating atan(x_r) using Taylor polynomial.
- DoubleDouble p = atan_eval(x_r);
-
- // atan(x) = sign(x) * (atan(k) + atan(x_r))
- // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
-#else
-
- DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
- double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
- double r = IS_NEG[x_sign] * (c0.hi + c1);
-
- return r;
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- }
-
- // |x| >= 2^53 or x is NaN.
- if (LIBC_UNLIKELY(x_exp >= 53)) {
- // x is nan
- if (xbits.is_nan()) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
- return x;
- }
- // |x| >= 2^53
- // atan(x) ~ sign(x) * pi/2.
- if (x_exp >= 53)
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- return IS_NEG[x_sign] * PI_OVER_2.hi;
-#else
- return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
- IS_NEG[x_sign] * PI_OVER_2.lo);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- }
-
- double x_d = xbits.get_val();
- double y = 1.0 / x_d;
-
- // k = 2^-6 * round(2^6 / |x|)
- double k = fputil::nearest_integer(0x1.0p6 * y);
- unsigned idx = static_cast<unsigned>(k);
- k *= 0x1.0p-6;
-
- // denominator = |x| + k
- DoubleDouble den = fputil::exact_add(x_d, k);
- // numerator = 1 - k * |x|
- DoubleDouble num;
- num.hi = fputil::multiply_add(-x_d, k, 1.0);
- DoubleDouble prod = fputil::exact_mult(x_d, k);
- // Using Dekker's 2SUM algorithm to compute the lower part.
- num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
-
- // x_r = (1/|x| - k) / (1 - k/|x|)
- // = (1 - k * |x|) / (|x| - k)
- DoubleDouble x_r = fputil::div(num, den);
-
- // Approximating atan(x_r) using Taylor polynomial.
- DoubleDouble p = atan_eval(x_r);
-
- // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
- // = sign(x) * (pi/2 - atan(k) - atan(x_r))
- // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
- return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
-#else
- DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
- DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
- double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
-
- double r = IS_NEG[!x_sign] * (c1.hi + c2);
-
- return r;
-#endif
-}
+LLVM_LIBC_FUNCTION(double, atan, (double x)) { return math::atan(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp
index aa770de..58042d3 100644
--- a/libc/src/math/generic/atan2.cpp
+++ b/libc/src/math/generic/atan2.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "src/math/atan2.h"
-#include "atan_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
@@ -15,6 +14,7 @@
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/atan_utils.h"
namespace LIBC_NAMESPACE_DECL {
@@ -72,6 +72,7 @@ namespace LIBC_NAMESPACE_DECL {
// |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73.
LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) {
+ using namespace atan_internal;
using FPBits = fputil::FPBits<double>;
constexpr double IS_NEG[2] = {1.0, -1.0};
diff --git a/libc/src/math/generic/atan2f128.cpp b/libc/src/math/generic/atan2f128.cpp
index a3aba0b..8838d94 100644
--- a/libc/src/math/generic/atan2f128.cpp
+++ b/libc/src/math/generic/atan2f128.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "src/math/atan2f128.h"
-#include "atan_utils.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/dyadic_float.h"
#include "src/__support/FPUtil/multiply_add.h"
@@ -16,6 +15,7 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/types.h"
+#include "src/__support/math/atan_utils.h"
#include "src/__support/uint128.h"
namespace LIBC_NAMESPACE_DECL {
@@ -103,6 +103,7 @@ static constexpr Float128 CONST_ADJ[2][2][2] = {
// |(atan(u) - P(u)) / P(u)| < 2^-114.
LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) {
+ using namespace atan_internal;
using FPBits = fputil::FPBits<float128>;
using Float128 = fputil::DyadicFloat<128>;
diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp
index 22f962e..acd32f0 100644
--- a/libc/src/math/generic/atanf.cpp
+++ b/libc/src/math/generic/atanf.cpp
@@ -7,116 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/atanf.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/math/inv_trigf_utils.h"
+#include "src/__support/math/atanf.h"
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
- using namespace inv_trigf_utils_internal;
- using FPBits = typename fputil::FPBits<float>;
-
- constexpr double FINAL_SIGN[2] = {1.0, -1.0};
- constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
- -0x1.921fb54442d18p0};
-
- FPBits x_bits(x);
- Sign sign = x_bits.sign();
- x_bits.set_sign(Sign::POS);
- uint32_t x_abs = x_bits.uintval();
-
- // x is inf or nan, |x| < 2^-4 or |x|= > 16.
- if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
- double x_d = static_cast<double>(x);
- double const_term = 0.0;
- if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
- // atan(+-Inf) = +-pi/2.
- if (x_bits.is_inf()) {
- volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()];
- return static_cast<float>(sign_pi_over_2);
- }
- if (x_bits.is_nan())
- return x;
- // x >= 16
- x_d = -1.0 / x_d;
- const_term = SIGNED_PI_OVER_2[sign.is_neg()];
- }
- // 0 <= x < 1/16;
- if (LIBC_UNLIKELY(x_bits.is_zero()))
- return x;
- // x <= 2^-12;
- if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
-#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
- return fputil::multiply_add(x, -0x1.0p-25f, x);
-#else
- double x_d = static_cast<double>(x);
- return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
-#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
- }
- // Use Taylor polynomial:
- // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
- constexpr double ATAN_TAYLOR[6] = {
- 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
- -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4,
- };
- double x2 = x_d * x_d;
- double x4 = x2 * x2;
- double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
- double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
- double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]);
- double p = fputil::polyeval(x4, c0, c1, c2);
- double r = fputil::multiply_add(x_d, p, const_term);
- return static_cast<float>(r);
- }
-
- // Range reduction steps:
- // 1) atan(x) = sign(x) * atan(|x|)
- // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
- // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
- // 4) Then we use polynomial approximation:
- // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
- // = P(x - k/16)
- double x_d, const_term, final_sign;
- int idx;
-
- if (x_abs > 0x3f80'0000U) {
- // |x| > 1, we need to invert x, so we will perform range reduction in
- // double precision.
- x_d = 1.0 / static_cast<double>(x_bits.get_val());
- double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
- x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
- idx = static_cast<int>(k_d);
- final_sign = FINAL_SIGN[sign.is_pos()];
- // Adjust constant term of the polynomial by +- pi/2.
- const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
- SIGNED_PI_OVER_2[sign.is_neg()]);
- } else {
- // Exceptional value:
- if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4
- return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f)
- : fputil::round_result_slightly_up(-0x1.1a6386p-4f);
- }
- // Perform range reduction in single precision.
- float x_f = x_bits.get_val();
- float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
- x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
- x_d = static_cast<double>(x_f);
- idx = static_cast<int>(k_f);
- final_sign = FINAL_SIGN[sign.is_neg()];
- const_term = final_sign * ATAN_COEFFS[idx][0];
- }
-
- double p = atan_eval(x_d, idx);
- double r = fputil::multiply_add(final_sign * x_d, p, const_term);
-
- return static_cast<float>(r);
-}
+LLVM_LIBC_FUNCTION(float, atanf, (float x)) { return math::atanf(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsbf16.cpp b/libc/src/math/generic/fabsbf16.cpp
new file mode 100644
index 0000000..ea39719
--- /dev/null
+++ b/libc/src/math/generic/fabsbf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fabsbf16 function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fabsbf16.h"
+
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, fabsbf16, (bfloat16 x)) { return fputil::abs(x); }
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index c5db6fa..fe31e6a 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -273,6 +273,40 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ pthread_barrier_init
+ SRCS
+ pthread_barrier_init.cpp
+ HDRS
+ pthread_barrier_init.h
+ DEPENDS
+ libc.src.errno.errno
+ libc.include.pthread
+ libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
+ pthread_barrier_destroy
+ SRCS
+ pthread_barrier_destroy.cpp
+ HDRS
+ pthread_barrier_destroy.h
+ DEPENDS
+ libc.include.pthread
+ libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
+ pthread_barrier_wait
+ SRCS
+ pthread_barrier_wait.cpp
+ HDRS
+ pthread_barrier_wait.h
+ DEPENDS
+ libc.include.pthread
+ libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
pthread_mutex_init
SRCS
pthread_mutex_init.cpp
diff --git a/libc/src/pthread/pthread_barrier_destroy.cpp b/libc/src/pthread/pthread_barrier_destroy.cpp
new file mode 100644
index 0000000..82de8f2
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_destroy.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of the pthread_barrier_destroy function ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_destroy.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_destroy, (pthread_barrier_t * b)) {
+ return Barrier::destroy(reinterpret_cast<Barrier *>(b));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_destroy.h b/libc/src/pthread/pthread_barrier_destroy.h
new file mode 100644
index 0000000..e27552c
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_destroy.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for pthread_barrier_destroy --------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_destroy(pthread_barrier_t *b);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
diff --git a/libc/src/pthread/pthread_barrier_init.cpp b/libc/src/pthread/pthread_barrier_init.cpp
new file mode 100644
index 0000000..2e92238
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_init.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of the pthread_barrier_init function ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_init.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "hdr/types/pthread_barrierattr_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_init,
+ (pthread_barrier_t * b,
+ const pthread_barrierattr_t *__restrict attr,
+ unsigned count)) {
+ return Barrier::init(reinterpret_cast<Barrier *>(b), attr, count);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_init.h b/libc/src/pthread/pthread_barrier_init.h
new file mode 100644
index 0000000..bb17f3f
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_init.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for pthread_barrier_init ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "hdr/types/pthread_barrierattr_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_init(pthread_barrier_t *b,
+ const pthread_barrierattr_t *__restrict attr,
+ unsigned count);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
diff --git a/libc/src/pthread/pthread_barrier_wait.cpp b/libc/src/pthread/pthread_barrier_wait.cpp
new file mode 100644
index 0000000..dbd1333
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_wait.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of the pthread_barrier_wait function ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_wait.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_wait, (pthread_barrier_t * b)) {
+ return reinterpret_cast<Barrier *>(b)->wait();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_wait.h b/libc/src/pthread/pthread_barrier_wait.h
new file mode 100644
index 0000000..16ddc06
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_wait.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for pthread_barrier_wait ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_wait(pthread_barrier_t *b);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 49f4a1b..9ba0a06 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -185,6 +185,55 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ mbstowcs
+ SRCS
+ mbstowcs.cpp
+ HDRS
+ mbstowcs.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.src.__support.macros.null_check
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
+ mbsrtowcs
+ SRCS
+ mbsrtowcs.cpp
+ HDRS
+ mbsrtowcs.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
+ mbsnrtowcs
+ SRCS
+ mbsnrtowcs.cpp
+ HDRS
+ mbsnrtowcs.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
wcstombs
SRCS
wcstombs.cpp
diff --git a/libc/src/wchar/mbsnrtowcs.cpp b/libc/src/wchar/mbsnrtowcs.cpp
new file mode 100644
index 0000000..28e0ff3
--- /dev/null
+++ b/libc/src/wchar/mbsnrtowcs.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of mbsnrtowcs --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsnrtowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs,
+ (wchar_t *__restrict dst, const char **__restrict src,
+ size_t nmc, size_t len, mbstate_t *__restrict ps)) {
+ static internal::mbstate internal_mbstate;
+ // If destination is null, ignore len
+ len = dst == nullptr ? SIZE_MAX : len;
+ auto ret = internal::mbsnrtowcs(
+ dst, src, nmc, len,
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps));
+ if (!ret.has_value()) {
+ // Encoding failure
+ libc_errno = ret.error();
+ return -1;
+ }
+ return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h
new file mode 100644
index 0000000..0d66b95
--- /dev/null
+++ b/libc/src/wchar/mbsnrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsnrtowcs ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t nmc, size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000..82ca25a
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of mbsrtowcs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsrtowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
+ (wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate_t *__restrict ps)) {
+ static internal::mbstate internal_mbstate;
+ // If destination is null, ignore len
+ len = dst == nullptr ? SIZE_MAX : len;
+ auto ret = internal::mbsnrtowcs(
+ dst, src, SIZE_MAX, len,
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps));
+ if (!ret.has_value()) {
+ // Encoding failure
+ libc_errno = ret.error();
+ return -1;
+ }
+ return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h
new file mode 100644
index 0000000..f8d4cc2
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsrtowcs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp
new file mode 100644
index 0000000..43e953c
--- /dev/null
+++ b/libc/src/wchar/mbstowcs.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of mbstowcs ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbstowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbstowcs,
+ (wchar_t *__restrict pwcs, const char *__restrict s,
+ size_t n)) {
+ LIBC_CRASH_ON_NULLPTR(s);
+ // If destination is null, ignore n
+ n = pwcs == nullptr ? SIZE_MAX : n;
+ static internal::mbstate internal_mbstate;
+ const char *temp = s;
+ auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate);
+
+ if (!ret.has_value()) {
+ // Encoding failure
+ libc_errno = ret.error();
+ return -1;
+ }
+ return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h
new file mode 100644
index 0000000..7d08a83
--- /dev/null
+++ b/libc/src/wchar/mbstowcs.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for mbstowcs --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h
index e0218c7..55a3cee 100644
--- a/libc/src/wchar/wchar_utils.h
+++ b/libc/src/wchar/wchar_utils.h
@@ -17,13 +17,10 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-// returns true if the character exists in the string
-LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) {
- for (int n = 0; str[n]; ++n) {
- if (str[n] == c)
- return true;
- }
- return false;
+LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) {
+ for (; *s && *s != c; ++s)
+ ;
+ return (*s == c) ? s : nullptr;
}
// bool should be true for wcscspn for complimentary span
@@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2,
bool not_match_set) {
size_t i = 0;
for (; s1[i]; ++i) {
- bool in_set = wcschr(s1[i], s2);
+ bool in_set = internal::wcschr(s2, s1[i]);
if (in_set == not_match_set)
return i;
}
diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
index defc2ce..8ac4916 100644
--- a/libc/src/wchar/wcschr.cpp
+++ b/libc/src/wchar/wcschr.cpp
@@ -11,15 +11,14 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
- for (; *s && *s != c; ++s)
- ;
- if (*s == c)
- return s;
- return nullptr;
+ LIBC_CRASH_ON_NULLPTR(s);
+ return internal::wcschr(s, c);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index a00ba99..f329b73 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -11,17 +11,10 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/macros/null_check.h"
+#include "src/wchar/wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
-bool contains_char(const wchar_t *str, wchar_t target) {
- for (; *str != L'\0'; str++)
- if (*str == target)
- return true;
-
- return false;
-}
-
LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
(const wchar_t *src, const wchar_t *breakset)) {
LIBC_CRASH_ON_NULLPTR(src);
@@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
// currently O(n * m), can be further optimized to O(n + m) with a hash set
for (int src_idx = 0; src[src_idx] != 0; src_idx++)
- if (contains_char(breakset, src[src_idx]))
+ if (internal::wcschr(breakset, src[src_idx]))
return src + src_idx;
return nullptr;
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 291efc1..ed4f0aa 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -10,18 +10,12 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
+#include "wchar_utils.h"
namespace LIBC_NAMESPACE_DECL {
-bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
- for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
- if (wc == *delim_ptr)
- return true;
- return false;
-}
-
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
- (wchar_t *__restrict str, const wchar_t *__restrict delim,
+ (wchar_t *__restrict str, const wchar_t *__restrict delims,
wchar_t **__restrict context)) {
if (str == nullptr) {
if (*context == nullptr)
@@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
str = *context;
}
- wchar_t *tok_start, *tok_end;
- for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
- ++tok_start)
- ;
+ wchar_t *tok_start = str;
+ while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
+ ++tok_start;
- for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
- ++tok_end)
- ;
+ wchar_t *tok_end = tok_start;
+ while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
+ ++tok_end;
if (*tok_end != L'\0') {
*tok_end = L'\0';