diff options
Diffstat (limited to 'libc/src')
53 files changed, 1730 insertions, 776 deletions
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h index e6fad1b..e999ece 100644 --- a/libc/src/__support/FPUtil/cast.h +++ b/libc/src/__support/FPUtil/cast.h @@ -66,9 +66,9 @@ cast(InType x) { cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN); DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x); return xd.template as<OutType, /*ShouldSignalExceptions=*/true>(); + } else { + return static_cast<OutType>(x); } - - return static_cast<OutType>(x); } } // namespace LIBC_NAMESPACE::fputil diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 2b78c4d..bd0a55c 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -266,38 +266,31 @@ struct Slab { // Randomly walks the bitfield until it finds a free bit. Allocations attempt // to put lanes right next to each other for better caching and convergence. - void *allocate(uint64_t lane_mask, uint64_t uniform) { + void *allocate(uint64_t uniform, uint32_t reserved) { uint32_t chunk_size = get_chunk_size(); uint32_t state = impl::entropy(); - // The uniform mask represents which lanes contain a uniform target pointer. - // We attempt to place these next to each other. - void *result = nullptr; - uint32_t after = ~0u; - uint32_t old_index = 0; - for (uint64_t mask = lane_mask; mask; - mask = gpu::ballot(lane_mask, !result)) { - if (result) - continue; - - // We try using any known empty bits from the previous attempt first. - uint32_t start = gpu::shuffle( - mask, cpp::countr_zero(uniform & mask), - ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after) - : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD)); + // Try to find the empty bit in the bitfield to finish the allocation. We + // start at the number of allocations as this is guaranteed to be available + // until the user starts freeing memory. + uint64_t lane_mask = gpu::get_lane_mask(); + uint32_t start = gpu::shuffle( + lane_mask, cpp::countr_zero(uniform & lane_mask), reserved); + for (;;) { + uint64_t lane_mask = gpu::get_lane_mask(); // Each lane tries to claim one bit in a single contiguous mask. - uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id()); + uint32_t id = impl::lane_count(uniform & lane_mask, gpu::get_lane_id()); uint32_t index = (start + id) % usable_bits(chunk_size); uint32_t slot = index / BITS_IN_WORD; uint32_t bit = index % BITS_IN_WORD; // Get the mask of bits destined for the same slot and coalesce it. uint32_t leader = impl::get_leader_id( - uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0), + uniform & gpu::ballot(lane_mask, !id || index % BITS_IN_WORD == 0), gpu::get_lane_id()); - uint32_t length = cpp::popcount(uniform & mask) - - impl::lane_count(uniform & mask, leader); + uint32_t length = cpp::popcount(uniform & lane_mask) - + impl::lane_count(uniform & lane_mask, leader); uint32_t bitmask = static_cast<uint32_t>( (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1) @@ -307,18 +300,23 @@ struct Slab { if (gpu::get_lane_id() == leader) before = cpp::AtomicRef(get_bitfield()[slot]) .fetch_or(bitmask, cpp::MemoryOrder::RELAXED); - before = gpu::shuffle(mask, leader, before); - if (~before & (1 << bit)) - result = ptr_from_index(index, chunk_size); - else - sleep_briefly(); + before = gpu::shuffle(lane_mask, leader, before); + if (~before & (1 << bit)) { + cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); + return ptr_from_index(index, chunk_size); + } - after = before | bitmask; - old_index = index; + // If the previous operation found an empty bit we move there, otherwise + // we generate new random index to start at. + uint32_t after = before | bitmask; + start = gpu::shuffle( + gpu::get_lane_mask(), + cpp::countr_zero(uniform & gpu::get_lane_mask()), + ~after ? __builtin_align_down(index, BITS_IN_WORD) + + cpp::countr_zero(~after) + : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD)); + sleep_briefly(); } - - cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); - return result; } // Deallocates memory by resetting its corresponding bit in the bitfield. @@ -460,11 +458,13 @@ public: result->initialize(uniform); if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform))) finalize(result, cpp::popcount(uniform), count); + count = + gpu::shuffle(gpu::get_lane_mask(), cpp::countr_zero(uniform), count); } if (!impl::is_sentinel(count)) count = count - cpp::popcount(uniform) + - impl::lane_count(uniform, gpu::get_lane_id()) + 1; + impl::lane_count(uniform, gpu::get_lane_id()); return result; } @@ -505,7 +505,8 @@ static cpp::Atomic<uint32_t> indices[] = { #undef S // Tries to find a slab in the table that can support the given chunk size. -static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { +static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform, + uint32_t &reserved) { // We start at the index of the last successful allocation for this kind. uint32_t chunk_id = impl::get_chunk_id(chunk_size); uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED); @@ -518,7 +519,6 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { if (!offset || slots[index].use_count() < Slab::available_chunks(chunk_size)) { uint64_t lane_mask = gpu::get_lane_mask(); - uint32_t reserved = 0; Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); @@ -536,13 +536,13 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) { // If we find a slab with a matching chunk size then we store the result. // Otherwise, we need to free the claimed lock and continue. In the case // of out-of-memory we receive a sentinel value and return a failure. - if (slab && reserved <= Slab::available_chunks(chunk_size) && + if (slab && reserved < Slab::available_chunks(chunk_size) && slab->get_chunk_size() == chunk_size) { if (index != start) indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED); uniform = uniform & gpu::get_lane_mask(); return slab; - } else if (slab && (reserved > Slab::available_chunks(chunk_size) || + } else if (slab && (reserved >= Slab::available_chunks(chunk_size) || slab->get_chunk_size() != chunk_size)) { slots[index].unlock(gpu::get_lane_mask(), gpu::get_lane_mask() & uniform); @@ -578,12 +578,12 @@ void *allocate(uint64_t size) { // Try to find a slab for the rounded up chunk size and allocate from it. uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size)); uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size); - Slab *slab = find_slab(chunk_size, uniform); - if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab))) + uint32_t reserved = 0; + Slab *slab = find_slab(chunk_size, uniform, reserved); + if (!slab) return nullptr; - uint64_t lane_mask = gpu::get_lane_mask(); - void *ptr = slab->allocate(lane_mask, uniform); + void *ptr = slab->allocate(uniform, reserved); return ptr; } diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 13f46a1..bbb07b6 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -155,6 +155,95 @@ add_header_library( ) add_header_library( + asinhf16 + HDRS + asinhf16.h + DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( + atan_utils + HDRS + atan_utils.h + DEPENDS + libc.src.__support.integer_literals + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.optimization +) + +add_header_library( + atan + HDRS + atan.h + DEPENDS + .atan_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.macros.optimization +) + +add_header_library( + atan2 + HDRS + atan2.h + DEPENDS + .atan_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.macros.optimization +) + +add_header_library( + atanf + HDRS + atanf.h + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization +) + +add_header_library( + atanf16 + HDRS + atanf16.h + DEPENDS + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h diff --git a/libc/src/__support/math/asinhf16.h b/libc/src/__support/math/asinhf16.h new file mode 100644 index 0000000..3c5171e --- /dev/null +++ b/libc/src/__support/math/asinhf16.h @@ -0,0 +1,121 @@ +//===-- Implementation header for asinhf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 asinhf16(float16 x) { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr size_t N_EXCEPTS = 8; + + constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + + // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ) + {0x3769, 0x372a, 1, 0, 1}, + // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ) + {0x3b5b, 0x3a96, 1, 0, 0}, + // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ) + {0x4b1f, 0x42b3, 1, 0, 0}, + // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ) + {0x4c9b, 0x4336, 1, 0, 1}, + // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ) + {0xb769, 0xb72a, 0, 1, 1}, + // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ) + {0xbb5b, 0xba96, 0, 1, 0}, + // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ) + {0xcb1f, 0xc2b3, 0, 1, 0}, + // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ) + {0xcc9b, 0xc336, 0, 1, 1}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace acoshf_internal; + using FPBits = fputil::FPBits<float16>; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + + if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Handle exceptional values + if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + float xf = x; + const float SIGN[2] = {1.0f, -1.0f}; + float x_sign = SIGN[x_u >> 15]; + + // |x| <= 0.25 + if (LIBC_UNLIKELY(x_abs <= 0x3400)) { + // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain + // rounding types. + if (LIBC_UNLIKELY(x_abs < 0x29c6)) { + int rounding = fputil::quick_get_round(); + if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0) + return fputil::cast<float16>(xf + 0x1p-24f); + if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0) + return fputil::cast<float16>(xf - 0x1p-24f); + return fputil::cast<float16>(xf); + } + + float x_sq = xf * xf; + // Generated by Sollya with: + // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]); + // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f + // for better accuracy. + float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f, + -0x1.6c53dep-5f, 0x1.bd114ep-5f); + + return fputil::cast<float16>(xf * p); + } + + // General case: asinh(x) = ln(x + sqrt(x^2 + 1)) + float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f)); + return fputil::cast<float16>( + x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H diff --git a/libc/src/__support/math/atan.h b/libc/src/__support/math/atan.h new file mode 100644 index 0000000..62190b0 --- /dev/null +++ b/libc/src/__support/math/atan.h @@ -0,0 +1,189 @@ +//===-- Implementation header for atan --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H + +#include "atan_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +// To compute atan(x), we divided it into the following cases: +// * |x| < 2^-26: +// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply +// return atan(x) = x - sign(x) * epsilon. +// * 2^-26 <= |x| < 1: +// We perform range reduction mod 2^-6 = 1/64 as follow: +// Let k = 2^(-6) * round(|x| * 2^6), then +// atan(x) = sign(x) * atan(|x|) +// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)). +// We store atan(k) in a look up table, and perform intermediate steps in +// double-double. +// * 1 < |x| < 2^53: +// First we perform the transformation y = 1/|x|: +// atan(x) = sign(x) * (pi/2 - atan(1/|x|)) +// = sign(x) * (pi/2 - atan(y)). +// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the +// previous case: +// Let k = 2^(-6) * round(y * 2^6), then +// atan(y) = atan(k) + atan((y - k) / (1 + y*k)) +// = atan(k) + atan((1/|x| - k) / (1 + k/|x|) +// = atan(k) + atan((1 - k*|x|) / (|x| + k)). +// * |x| >= 2^53: +// Using the reciprocal transformation: +// atan(x) = sign(x) * (pi/2 - atan(1/|x|)). +// We have that: +// atan(1/|x|) <= 1/|x| <= 2^-53, +// which is smaller than ulp(pi/2) / 2. +// So we can return: +// atan(x) = sign(x) * (pi/2 - epsilon) + +LIBC_INLINE static constexpr double atan(double x) { + + using namespace atan_internal; + using FPBits = fputil::FPBits<double>; + + constexpr double IS_NEG[2] = {1.0, -1.0}; + constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, + 0x1.921fb54442d18p0}; + constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, + -0x1.921fb54442d18p0}; + + FPBits xbits(x); + bool x_sign = xbits.is_neg(); + xbits = xbits.abs(); + uint64_t x_abs = xbits.uintval(); + int x_exp = + static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS; + + // |x| < 1. + if (x_exp < 0) { + if (LIBC_UNLIKELY(x_exp < -26)) { +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return x; +#else + if (x == 0.0) + return x; + // |x| < 2^-26 + return fputil::multiply_add(-0x1.0p-54, x, x); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + double x_d = xbits.get_val(); + // k = 2^-6 * round(2^6 * |x|) + double k = fputil::nearest_integer(0x1.0p6 * x_d); + unsigned idx = static_cast<unsigned>(k); + k *= 0x1.0p-6; + + // numerator = |x| - k + DoubleDouble num, den; + num.lo = 0.0; + num.hi = x_d - k; + + // denominator = 1 - k * |x| + den.hi = fputil::multiply_add(x_d, k, 1.0); + DoubleDouble prod = fputil::exact_mult(x_d, k); + // Using Dekker's 2SUM algorithm to compute the lower part. + den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo; + + // x_r = (|x| - k) / (1 + k * |x|) + DoubleDouble x_r = fputil::div(num, den); + + // Approximating atan(x_r) using Taylor polynomial. + DoubleDouble p = atan_eval(x_r); + + // atan(x) = sign(x) * (atan(k) + atan(x_r)) + // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) )) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo))); +#else + + DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi); + double c1 = c0.lo + (ATAN_I[idx].lo + p.lo); + double r = IS_NEG[x_sign] * (c0.hi + c1); + + return r; +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + // |x| >= 2^53 or x is NaN. + if (LIBC_UNLIKELY(x_exp >= 53)) { + // x is nan + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + // |x| >= 2^53 + // atan(x) ~ sign(x) * pi/2. + if (x_exp >= 53) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return IS_NEG[x_sign] * PI_OVER_2.hi; +#else + return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi, + IS_NEG[x_sign] * PI_OVER_2.lo); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } + + double x_d = xbits.get_val(); + double y = 1.0 / x_d; + + // k = 2^-6 * round(2^6 / |x|) + double k = fputil::nearest_integer(0x1.0p6 * y); + unsigned idx = static_cast<unsigned>(k); + k *= 0x1.0p-6; + + // denominator = |x| + k + DoubleDouble den = fputil::exact_add(x_d, k); + // numerator = 1 - k * |x| + DoubleDouble num; + num.hi = fputil::multiply_add(-x_d, k, 1.0); + DoubleDouble prod = fputil::exact_mult(x_d, k); + // Using Dekker's 2SUM algorithm to compute the lower part. + num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo; + + // x_r = (1/|x| - k) / (1 - k/|x|) + // = (1 - k * |x|) / (|x| - k) + DoubleDouble x_r = fputil::div(num, den); + + // Approximating atan(x_r) using Taylor polynomial. + DoubleDouble p = atan_eval(x_r); + + // atan(x) = sign(x) * (pi/2 - atan(1/|x|)) + // = sign(x) * (pi/2 - atan(k) - atan(x_r)) + // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k))) +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo; + return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part)); +#else + DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi); + DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi); + double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo); + + double r = IS_NEG[!x_sign] * (c1.hi + c2); + + return r; +#endif +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H diff --git a/libc/src/__support/math/atan2.h b/libc/src/__support/math/atan2.h new file mode 100644 index 0000000..90ed926 --- /dev/null +++ b/libc/src/__support/math/atan2.h @@ -0,0 +1,209 @@ +//===-- Implementation header for atan2 -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2_H + +#include "atan_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +// There are several range reduction steps we can take for atan2(y, x) as +// follow: + +// * Range reduction 1: signness +// atan2(y, x) will return a number between -PI and PI representing the angle +// forming by the 0x axis and the vector (x, y) on the 0xy-plane. +// In particular, we have that: +// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) +// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) +// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) +// Since atan function is odd, we can use the formula: +// atan(-u) = -atan(u) +// to adjust the above conditions a bit further: +// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) +// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) +// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) +// Which can be simplified to: +// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 +// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 + +// * Range reduction 2: reciprocal +// Now that the argument inside atan is positive, we can use the formula: +// atan(1/x) = pi/2 - atan(x) +// to make the argument inside atan <= 1 as follow: +// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x +// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| +// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x +// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| + +// * Range reduction 3: look up table. +// After the previous two range reduction steps, we reduce the problem to +// compute atan(u) with 0 <= u <= 1, or to be precise: +// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). +// An accurate polynomial approximation for the whole [0, 1] input range will +// require a very large degree. To make it more efficient, we reduce the input +// range further by finding an integer idx such that: +// | n/d - idx/64 | <= 1/128. +// In particular, +// idx := round(2^6 * n/d) +// Then for the fast pass, we find a polynomial approximation for: +// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) * Q(n/d - idx/64) +// For the accurate pass, we use the addition formula: +// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (n*idx)/(64*d)) ) +// = atan( (n - d*(idx/64))/(d + n*(idx/64)) ) +// And for the fast pass, we use degree-9 Taylor polynomial to compute the RHS: +// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 +// with absolute errors bounded by: +// |atan(u) - P(u)| < |u|^11 / 11 < 2^-80 +// and relative errors bounded by: +// |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73. + +LIBC_INLINE static constexpr double atan2(double y, double x) { + using namespace atan_internal; + using FPBits = fputil::FPBits<double>; + + constexpr double IS_NEG[2] = {1.0, -1.0}; + constexpr DoubleDouble ZERO = {0.0, 0.0}; + constexpr DoubleDouble MZERO = {-0.0, -0.0}; + constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p+1}; + constexpr DoubleDouble MPI = {-0x1.1a62633145c07p-53, -0x1.921fb54442d18p+1}; + constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, + 0x1.921fb54442d18p0}; + constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, + -0x1.921fb54442d18p0}; + constexpr DoubleDouble PI_OVER_4 = {0x1.1a62633145c07p-55, + 0x1.921fb54442d18p-1}; + constexpr DoubleDouble THREE_PI_OVER_4 = {0x1.a79394c9e8a0ap-54, + 0x1.2d97c7f3321d2p+1}; + // Adjustment for constant term: + // CONST_ADJ[x_sign][y_sign][recip] + constexpr DoubleDouble CONST_ADJ[2][2][2] = { + {{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}}, + {{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}}; + + FPBits x_bits(x), y_bits(y); + bool x_sign = x_bits.sign().is_neg(); + bool y_sign = y_bits.sign().is_neg(); + x_bits = x_bits.abs(); + y_bits = y_bits.abs(); + uint64_t x_abs = x_bits.uintval(); + uint64_t y_abs = y_bits.uintval(); + bool recip = x_abs < y_abs; + uint64_t min_abs = recip ? x_abs : y_abs; + uint64_t max_abs = !recip ? x_abs : y_abs; + unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); + unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); + + double num = FPBits(min_abs).get_val(); + double den = FPBits(max_abs).get_val(); + + // Check for exceptional cases, whether inputs are 0, inf, nan, or close to + // overflow, or close to underflow. + if (LIBC_UNLIKELY(max_exp > 0x7ffU - 128U || min_exp < 128U)) { + if (x_bits.is_nan() || y_bits.is_nan()) { + if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + unsigned x_except = x == 0.0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); + unsigned y_except = y == 0.0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); + + // Exceptional cases: + // EXCEPT[y_except][x_except][x_is_neg] + // with x_except & y_except: + // 0: zero + // 1: finite, non-zero + // 2: infinity + constexpr DoubleDouble EXCEPTS[3][3][2] = { + {{ZERO, PI}, {ZERO, PI}, {ZERO, PI}}, + {{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}}, + {{PI_OVER_2, PI_OVER_2}, + {PI_OVER_2, PI_OVER_2}, + {PI_OVER_4, THREE_PI_OVER_4}}, + }; + + if ((x_except != 1) || (y_except != 1)) { + DoubleDouble r = EXCEPTS[y_except][x_except][x_sign]; + return fputil::multiply_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo); + } + bool scale_up = min_exp < 128U; + bool scale_down = max_exp > 0x7ffU - 128U; + // At least one input is denormal, multiply both numerator and denominator + // by some large enough power of 2 to normalize denormal inputs. + if (scale_up) { + num *= 0x1.0p64; + if (!scale_down) + den *= 0x1.0p64; + } else if (scale_down) { + den *= 0x1.0p-64; + if (!scale_up) + num *= 0x1.0p-64; + } + + min_abs = FPBits(num).uintval(); + max_abs = FPBits(den).uintval(); + min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); + max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); + } + + double final_sign = IS_NEG[(x_sign != y_sign) != recip]; + DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; + unsigned exp_diff = max_exp - min_exp; + // We have the following bound for normalized n and d: + // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). + if (LIBC_UNLIKELY(exp_diff > 54)) { + return fputil::multiply_add(final_sign, const_term.hi, + final_sign * (const_term.lo + num / den)); + } + + double k = fputil::nearest_integer(64.0 * num / den); + unsigned idx = static_cast<unsigned>(k); + // k = idx / 64 + k *= 0x1.0p-6; + + // Range reduction: + // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) + // = atan((n - d * k/64)) / (d + n * k/64)) + DoubleDouble num_k = fputil::exact_mult(num, k); + DoubleDouble den_k = fputil::exact_mult(den, k); + + // num_dd = n - d * k + DoubleDouble num_dd = fputil::exact_add(num - den_k.hi, -den_k.lo); + // den_dd = d + n * k + DoubleDouble den_dd = fputil::exact_add(den, num_k.hi); + den_dd.lo += num_k.lo; + + // q = (n - d * k) / (d + n * k) + DoubleDouble q = fputil::div(num_dd, den_dd); + // p ~ atan(q) + DoubleDouble p = atan_eval(q); + + DoubleDouble r = fputil::add(const_term, fputil::add(ATAN_I[idx], p)); + r.hi *= final_sign; + r.lo *= final_sign; + + return r.hi + r.lo; +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2_H diff --git a/libc/src/math/generic/atan_utils.h b/libc/src/__support/math/atan_utils.h index 24c7271..9e8d7d6 100644 --- a/libc/src/math/generic/atan_utils.h +++ b/libc/src/__support/math/atan_utils.h @@ -18,7 +18,7 @@ namespace LIBC_NAMESPACE_DECL { -namespace { +namespace atan_internal { using DoubleDouble = fputil::DoubleDouble; using Float128 = fputil::DyadicFloat<128>; @@ -29,7 +29,7 @@ using Float128 = fputil::DyadicFloat<128>; // b = round(atan(i/64) - a, D, RN); // print("{", b, ",", a, "},"); // }; -constexpr DoubleDouble ATAN_I[65] = { +static constexpr DoubleDouble ATAN_I[65] = { {0.0, 0.0}, {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7}, {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6}, @@ -110,7 +110,8 @@ constexpr DoubleDouble ATAN_I[65] = { // + x_lo * (1 - x_hi^2 + x_hi^4) // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by: // |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66. -[[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) { +[[maybe_unused]] LIBC_INLINE static DoubleDouble +atan_eval(const DoubleDouble &x) { DoubleDouble p; p.hi = x.hi; double x_hi_sq = x.hi * x.hi; @@ -142,7 +143,7 @@ constexpr DoubleDouble ATAN_I[65] = { // b = 2^ll + a; // print("{Sign::POS, ", 2^(ll - 128), ",", b, "},"); // }; -constexpr Float128 ATAN_I_F128[65] = { +static constexpr Float128 ATAN_I_F128[65] = { {Sign::POS, 0, 0_u128}, {Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128}, {Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128}, @@ -215,7 +216,7 @@ constexpr Float128 ATAN_I_F128[65] = { // [0, 2^-7]); // > dirtyinfnorm(atan(x) - P, [0, 2^-7]); // 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122 -constexpr Float128 ATAN_POLY_F128[] = { +static constexpr Float128 ATAN_POLY_F128[] = { {Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128}, {Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128}, {Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128}, @@ -225,7 +226,8 @@ constexpr Float128 ATAN_POLY_F128[] = { }; // Approximate atan for |x| <= 2^-7. -[[maybe_unused]] Float128 atan_eval(const Float128 &x) { +[[maybe_unused]] LIBC_INLINE static constexpr Float128 +atan_eval(const Float128 &x) { Float128 x_sq = fputil::quick_mul(x, x); Float128 x3 = fputil::quick_mul(x, x_sq); Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1], @@ -234,7 +236,7 @@ constexpr Float128 ATAN_POLY_F128[] = { return fputil::multiply_add(x3, p, x); } -} // anonymous namespace +} // namespace atan_internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h new file mode 100644 index 0000000..92799dc --- /dev/null +++ b/libc/src/__support/math/atanf.h @@ -0,0 +1,129 @@ +//===-- Implementation header for atanf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H + +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float atanf(float x) { + using namespace inv_trigf_utils_internal; + using FPBits = typename fputil::FPBits<float>; + + constexpr double FINAL_SIGN[2] = {1.0, -1.0}; + constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0, + -0x1.921fb54442d18p0}; + + FPBits x_bits(x); + Sign sign = x_bits.sign(); + x_bits.set_sign(Sign::POS); + uint32_t x_abs = x_bits.uintval(); + + // x is inf or nan, |x| < 2^-4 or |x|= > 16. + if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) { + double x_d = static_cast<double>(x); + double const_term = 0.0; + if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) { + // atan(+-Inf) = +-pi/2. + if (x_bits.is_inf()) { + volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()]; + return static_cast<float>(sign_pi_over_2); + } + if (x_bits.is_nan()) + return x; + // x >= 16 + x_d = -1.0 / x_d; + const_term = SIGNED_PI_OVER_2[sign.is_neg()]; + } + // 0 <= x < 1/16; + if (LIBC_UNLIKELY(x_bits.is_zero())) + return x; + // x <= 2^-12; + if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) { +#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + return fputil::multiply_add(x, -0x1.0p-25f, x); +#else + double x_d = static_cast<double>(x); + return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d)); +#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT + } + // Use Taylor polynomial: + // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11). + constexpr double ATAN_TAYLOR[6] = { + 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, + -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, + }; + double x2 = x_d * x_d; + double x4 = x2 * x2; + double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); + double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); + double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]); + double p = fputil::polyeval(x4, c0, c1, c2); + double r = fputil::multiply_add(x_d, p, const_term); + return static_cast<float>(r); + } + + // Range reduction steps: + // 1) atan(x) = sign(x) * atan(|x|) + // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|) + // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32. + // 4) Then we use polynomial approximation: + // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16) + // = P(x - k/16) + double x_d = 0, const_term = 0, final_sign = 0; + int idx = 0; + + if (x_abs > 0x3f80'0000U) { + // |x| > 1, we need to invert x, so we will perform range reduction in + // double precision. + x_d = 1.0 / static_cast<double>(x_bits.get_val()); + double k_d = fputil::nearest_integer(x_d * 0x1.0p4); + x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d); + idx = static_cast<int>(k_d); + final_sign = FINAL_SIGN[sign.is_pos()]; + // Adjust constant term of the polynomial by +- pi/2. + const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0], + SIGNED_PI_OVER_2[sign.is_neg()]); + } else { + // Exceptional value: + if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4 + return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f) + : fputil::round_result_slightly_up(-0x1.1a6386p-4f); + } + // Perform range reduction in single precision. + float x_f = x_bits.get_val(); + float k_f = fputil::nearest_integer(x_f * 0x1.0p4f); + x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f); + x_d = static_cast<double>(x_f); + idx = static_cast<int>(k_f); + final_sign = FINAL_SIGN[sign.is_neg()]; + const_term = final_sign * ATAN_COEFFS[idx][0]; + } + + double p = atan_eval(x_d, idx); + double r = fputil::multiply_add(final_sign * x_d, p, const_term); + + return static_cast<float>(r); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H diff --git a/libc/src/__support/math/atanf16.h b/libc/src/__support/math/atanf16.h new file mode 100644 index 0000000..f75d145 --- /dev/null +++ b/libc/src/__support/math/atanf16.h @@ -0,0 +1,119 @@ +//===-- Implementation header for atanf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 atanf16(float16 x) { + // Generated by Solly using the following command: + // > round(pi/2, SG, RN); + constexpr float PI_2 = 0x1.921fb6p0; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr size_t N_EXCEPTS = 6; + + constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANF16_EXCEPTS{{ + // (input, RZ output, RU offset, RD offset, RN offset) + {0x2745, 0x2744, 1, 0, 1}, + {0x3099, 0x3090, 1, 0, 1}, + {0x3c6c, 0x3aae, 1, 0, 1}, + {0x466e, 0x3daa, 1, 0, 1}, + {0x48ae, 0x3ddb, 1, 0, 0}, + {0x5619, 0x3e3d, 1, 0, 1}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using FPBits = fputil::FPBits<float16>; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + bool x_sign = x_u >> 15; + float sign = (x_sign ? -1.0 : 1.0); + + // |x| >= +/-inf + if (LIBC_UNLIKELY(x_abs >= 0x7c00)) { + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + + // atanf16(+/-inf) = +/-pi/2 + return fputil::cast<float16>(sign * PI_2); + } + + float xf = x; + float xsq = xf * xf; +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Handle exceptional values + if (auto r = ATANF16_EXCEPTS.lookup_odd(x_abs, x_sign); + LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif + + // |x| <= 0x1p0, |x| <= 1 + if (x_abs <= 0x3c00) { + // atanf16(+/-0) = +/-0 + if (LIBC_UNLIKELY(x_abs == 0)) + return x; + + // Degree-14 minimax odd polynomial of atan(x) generated by Sollya with: + // > P = fpminimax(atan(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|SG...|], + // [0, 1]); + float result = fputil::polyeval( + xsq, 0x1.fffffcp-1f, -0x1.55519ep-2f, 0x1.98f6a8p-3f, -0x1.1f0a92p-3f, + 0x1.95b654p-4f, -0x1.e65492p-5f, 0x1.8c0c36p-6f, -0x1.32316ep-8f); + return fputil::cast<float16>(xf * result); + } + + // If |x| > 1 + // y = atan(x) = sign(x) * atan(|x|) + // atan(|x|) = pi/2 - atan(1/|x|) + // Recall, 1/|x| < 1 + float x_inv_sq = 1.0f / xsq; + float x_inv = fputil::sqrt<float>(x_inv_sq); + + // Degree-14 minimax odd polynomial of atan(x) generated by Sollya with: + // > P = fpminimax(atan(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|SG...|], + // [0, 1]); + float interm = + fputil::polyeval(x_inv_sq, 0x1.fffffcp-1f, -0x1.55519ep-2f, + 0x1.98f6a8p-3f, -0x1.1f0a92p-3f, 0x1.95b654p-4f, + -0x1.e65492p-5f, 0x1.8c0c36p-6f, -0x1.32316ep-8f); + + return fputil::cast<float16>(sign * + fputil::multiply_add(x_inv, -interm, PI_2)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF16_H diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index cbb7886..14aaad2 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -124,3 +124,14 @@ add_object_library( libc.src.__support.threads.linux.raw_mutex libc.src.__support.CPP.mutex ) + +add_object_library( + barrier + HDRS + barrier.h + SRCS + barrier.cpp + DEPENDS + libc.src.__support.threads.CndVar + libc.src.__support.threads.mutex +) diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp new file mode 100644 index 0000000..cf7207b5 --- /dev/null +++ b/libc/src/__support/threads/linux/barrier.cpp @@ -0,0 +1,85 @@ +//===-- Implementation of Barrier class ------------- ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/linux/barrier.h" +#include "hdr/errno_macros.h" +#include "src/__support/threads/CndVar.h" +#include "src/__support/threads/mutex.h" + +namespace LIBC_NAMESPACE_DECL { + +int Barrier::init(Barrier *b, + [[maybe_unused]] const pthread_barrierattr_t *attr, + unsigned count) { + LIBC_ASSERT(attr == nullptr); // TODO implement barrierattr + if (count == 0) + return EINVAL; + + b->expected = count; + b->waiting = 0; + b->blocking = true; + + int err; + err = CndVar::init(&b->entering); + if (err != 0) + return err; + + err = CndVar::init(&b->exiting); + if (err != 0) + return err; + + auto mutex_err = Mutex::init(&b->m, false, false, false, false); + if (mutex_err != MutexError::NONE) + return EAGAIN; + + return 0; +} + +int Barrier::wait() { + m.lock(); + + // if the barrier is emptying out threads, wait until it finishes + while (!blocking) + entering.wait(&m); + waiting++; + + if (waiting < expected) { + // block threads until waiting = expected + while (blocking) + exiting.wait(&m); + } else { + // this is the last thread to call wait(), so lets wake everyone up + blocking = false; + exiting.broadcast(); + } + waiting--; + + if (waiting == 0) { + // all threads have exited the barrier, let's let the ones waiting to enter + // continue + blocking = true; + entering.broadcast(); + m.unlock(); + + // POSIX dictates that the barrier should return a special value to just one + // thread, so we can arbitrarily choose this thread + return PTHREAD_BARRIER_SERIAL_THREAD; + } + m.unlock(); + + return 0; +} + +int Barrier::destroy(Barrier *b) { + CndVar::destroy(&b->entering); + CndVar::destroy(&b->exiting); + Mutex::destroy(&b->m); + return 0; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h new file mode 100644 index 0000000..f0655bf --- /dev/null +++ b/libc/src/__support/threads/linux/barrier.h @@ -0,0 +1,50 @@ +//===-- A platform independent abstraction layer for barriers --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H +#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H + +#include "hdr/pthread_macros.h" +#include "include/llvm-libc-types/pthread_barrier_t.h" +#include "include/llvm-libc-types/pthread_barrierattr_t.h" +#include "src/__support/threads/CndVar.h" +#include "src/__support/threads/mutex.h" + +namespace LIBC_NAMESPACE_DECL { + +// NOTE: if the size of this class changes, you must ensure that the size of +// pthread_barrier_t (found in include/llvm-libc/types/pthread_barrier_t.h) is +// the same size +class Barrier { +private: + unsigned expected; + unsigned waiting; + bool blocking; + CndVar entering; + CndVar exiting; + Mutex m; + +public: + static int init(Barrier *b, const pthread_barrierattr_t *attr, + unsigned count); + static int destroy(Barrier *b); + int wait(); +}; + +static_assert( + sizeof(Barrier) == sizeof(pthread_barrier_t), + "The public pthread_barrier_t type cannot accommodate the internal " + "barrier type."); + +static_assert(alignof(Barrier) == alignof(pthread_barrier_t), + "The public pthread_barrier_t type has a different alignment " + "than the internal barrier type."); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index e363ad3..aed1d53 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -60,14 +60,31 @@ add_object_library( SRCS mbrtowc.cpp DEPENDS - libc.hdr.errno_macros - libc.hdr.types.wchar_t - libc.hdr.types.size_t - libc.src.__support.common - libc.src.__support.error_or - libc.src.__support.macros.config - .character_converter - .mbstate + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + .character_converter + .mbstate +) + +add_header_library( + mbsnrtowcs + HDRS + mbsnrtowcs.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.types.wchar_t + libc.hdr.types.size_t + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + libc.src.__support.macros.null_check + .character_converter + .mbstate + .string_converter ) add_header_library( diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h new file mode 100644 index 0000000..54e3152 --- /dev/null +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -0,0 +1,66 @@ +//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H + +#include "hdr/errno_macros.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/character_converter.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/string_converter.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, + const char **__restrict src, + size_t nmc, size_t len, + mbstate *__restrict ps) { + LIBC_CRASH_ON_NULLPTR(src); + // Checking if mbstate is valid + CharacterConverter char_conv(ps); + if (!char_conv.isValidState()) + return Error(EINVAL); + + StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps, + len, nmc); + size_t dst_idx = 0; + ErrorOr<char32_t> converted = str_conv.popUTF32(); + while (converted.has_value()) { + if (dst != nullptr) + dst[dst_idx] = converted.value(); + // null terminator should not be counted in return value + if (converted.value() == L'\0') { + if (dst != nullptr) + *src = nullptr; + return dst_idx; + } + dst_idx++; + converted = str_conv.popUTF32(); + } + + if (converted.error() == -1) { // if we hit conversion limit + if (dst != nullptr) + *src += str_conv.getSourceIndex(); + return dst_idx; + } + + return Error(converted.error()); +} + +} // namespace internal + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 455ad34..0522e0e 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -189,6 +189,7 @@ add_math_entrypoint_object(fabsf) add_math_entrypoint_object(fabsl) add_math_entrypoint_object(fabsf16) add_math_entrypoint_object(fabsf128) +add_math_entrypoint_object(fabsbf16) add_math_entrypoint_object(fadd) add_math_entrypoint_object(faddl) diff --git a/libc/src/math/fabsbf16.h b/libc/src/math/fabsbf16.h new file mode 100644 index 0000000..4993668 --- /dev/null +++ b/libc/src/math/fabsbf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for fabsbf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FABSBF16_H +#define LLVM_LIBC_SRC_MATH_FABSBF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 fabsbf16(bfloat16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_FABSBF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f91feacb..6bcb1e2 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -698,6 +698,19 @@ add_entrypoint_object( ) add_entrypoint_object( + fabsbf16 + SRCS + fabsbf16.cpp + HDRS + ../fabsbf16.h + DEPENDS + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( fadd SRCS fadd.cpp @@ -3899,18 +3912,7 @@ add_entrypoint_object( HDRS ../asinhf16.h DEPENDS - .explogxf - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.asinhf16 ) add_entrypoint_object( @@ -4018,19 +4020,6 @@ add_entrypoint_object( libc.src.errno.errno ) -add_header_library( - atan_utils - HDRS - atan_utils.h - DEPENDS - libc.src.__support.integer_literals - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.optimization -) - add_entrypoint_object( atanf SRCS @@ -4038,14 +4027,7 @@ add_entrypoint_object( HDRS ../atanf.h DEPENDS - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.math.inv_trigf_utils + libc.src.__support.math.atanf ) add_entrypoint_object( @@ -4055,17 +4037,7 @@ add_entrypoint_object( HDRS ../atanf16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.atanf16 ) add_entrypoint_object( @@ -4077,13 +4049,7 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 DEPENDS - .atan_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.macros.optimization + libc.src.__support.math.atan ) add_entrypoint_object( @@ -4113,13 +4079,7 @@ add_entrypoint_object( HDRS ../atan2.h DEPENDS - .atan_utils - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.macros.optimization + libc.src.__support.math.atan2 ) add_entrypoint_object( @@ -4129,7 +4089,7 @@ add_entrypoint_object( HDRS ../atan2l.h DEPENDS - .atan2 + libc.src.__support.math.atan2 ) add_entrypoint_object( @@ -4139,7 +4099,7 @@ add_entrypoint_object( HDRS ../atan2f128.h DEPENDS - .atan_utils + libc.src.__support.math.atan_utils libc.src.__support.integer_literals libc.src.__support.uint128 libc.src.__support.FPUtil.dyadic_float diff --git a/libc/src/math/generic/asinhf16.cpp b/libc/src/math/generic/asinhf16.cpp index 0a0b471..d517e63 100644 --- a/libc/src/math/generic/asinhf16.cpp +++ b/libc/src/math/generic/asinhf16.cpp @@ -7,102 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/asinhf16.h" -#include "explogxf.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/asinhf16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 8; - -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - - // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ) - {0x3769, 0x372a, 1, 0, 1}, - // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ) - {0x3b5b, 0x3a96, 1, 0, 0}, - // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ) - {0x4b1f, 0x42b3, 1, 0, 0}, - // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ) - {0x4c9b, 0x4336, 1, 0, 1}, - // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ) - {0xb769, 0xb72a, 0, 1, 1}, - // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ) - {0xbb5b, 0xba96, 0, 1, 0}, - // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ) - {0xcb1f, 0xc2b3, 0, 1, 0}, - // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ) - {0xcc9b, 0xc336, 0, 1, 1}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { - using namespace acoshf_internal; - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - - if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Handle exceptional values - if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - float xf = x; - const float SIGN[2] = {1.0f, -1.0f}; - float x_sign = SIGN[x_u >> 15]; - - // |x| <= 0.25 - if (LIBC_UNLIKELY(x_abs <= 0x3400)) { - // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain - // rounding types. - if (LIBC_UNLIKELY(x_abs < 0x29c6)) { - int rounding = fputil::quick_get_round(); - if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0) - return fputil::cast<float16>(xf + 0x1p-24f); - if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0) - return fputil::cast<float16>(xf - 0x1p-24f); - return fputil::cast<float16>(xf); - } - - float x_sq = xf * xf; - // Generated by Sollya with: - // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]); - // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f - // for better accuracy. - float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f, - -0x1.6c53dep-5f, 0x1.bd114ep-5f); - - return fputil::cast<float16>(xf * p); - } - - // General case: asinh(x) = ln(x + sqrt(x^2 + 1)) - float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f)); - return fputil::cast<float16>( - x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term))); -} +LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { return math::asinhf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atan.cpp b/libc/src/math/generic/atan.cpp index cbca605..93bf2e1 100644 --- a/libc/src/math/generic/atan.cpp +++ b/libc/src/math/generic/atan.cpp @@ -7,173 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atan.h" -#include "atan_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/atan.h" namespace LIBC_NAMESPACE_DECL { -// To compute atan(x), we divided it into the following cases: -// * |x| < 2^-26: -// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply -// return atan(x) = x - sign(x) * epsilon. -// * 2^-26 <= |x| < 1: -// We perform range reduction mod 2^-6 = 1/64 as follow: -// Let k = 2^(-6) * round(|x| * 2^6), then -// atan(x) = sign(x) * atan(|x|) -// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)). -// We store atan(k) in a look up table, and perform intermediate steps in -// double-double. -// * 1 < |x| < 2^53: -// First we perform the transformation y = 1/|x|: -// atan(x) = sign(x) * (pi/2 - atan(1/|x|)) -// = sign(x) * (pi/2 - atan(y)). -// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the -// previous case: -// Let k = 2^(-6) * round(y * 2^6), then -// atan(y) = atan(k) + atan((y - k) / (1 + y*k)) -// = atan(k) + atan((1/|x| - k) / (1 + k/|x|) -// = atan(k) + atan((1 - k*|x|) / (|x| + k)). -// * |x| >= 2^53: -// Using the reciprocal transformation: -// atan(x) = sign(x) * (pi/2 - atan(1/|x|)). -// We have that: -// atan(1/|x|) <= 1/|x| <= 2^-53, -// which is smaller than ulp(pi/2) / 2. -// So we can return: -// atan(x) = sign(x) * (pi/2 - epsilon) - -LLVM_LIBC_FUNCTION(double, atan, (double x)) { - using FPBits = fputil::FPBits<double>; - - constexpr double IS_NEG[2] = {1.0, -1.0}; - constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, - 0x1.921fb54442d18p0}; - constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, - -0x1.921fb54442d18p0}; - - FPBits xbits(x); - bool x_sign = xbits.is_neg(); - xbits = xbits.abs(); - uint64_t x_abs = xbits.uintval(); - int x_exp = - static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS; - - // |x| < 1. - if (x_exp < 0) { - if (LIBC_UNLIKELY(x_exp < -26)) { -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return x; -#else - if (x == 0.0) - return x; - // |x| < 2^-26 - return fputil::multiply_add(-0x1.0p-54, x, x); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - double x_d = xbits.get_val(); - // k = 2^-6 * round(2^6 * |x|) - double k = fputil::nearest_integer(0x1.0p6 * x_d); - unsigned idx = static_cast<unsigned>(k); - k *= 0x1.0p-6; - - // numerator = |x| - k - DoubleDouble num, den; - num.lo = 0.0; - num.hi = x_d - k; - - // denominator = 1 - k * |x| - den.hi = fputil::multiply_add(x_d, k, 1.0); - DoubleDouble prod = fputil::exact_mult(x_d, k); - // Using Dekker's 2SUM algorithm to compute the lower part. - den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo; - - // x_r = (|x| - k) / (1 + k * |x|) - DoubleDouble x_r = fputil::div(num, den); - - // Approximating atan(x_r) using Taylor polynomial. - DoubleDouble p = atan_eval(x_r); - - // atan(x) = sign(x) * (atan(k) + atan(x_r)) - // = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) )) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo))); -#else - - DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi); - double c1 = c0.lo + (ATAN_I[idx].lo + p.lo); - double r = IS_NEG[x_sign] * (c0.hi + c1); - - return r; -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - // |x| >= 2^53 or x is NaN. - if (LIBC_UNLIKELY(x_exp >= 53)) { - // x is nan - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - // |x| >= 2^53 - // atan(x) ~ sign(x) * pi/2. - if (x_exp >= 53) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return IS_NEG[x_sign] * PI_OVER_2.hi; -#else - return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi, - IS_NEG[x_sign] * PI_OVER_2.lo); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS - } - - double x_d = xbits.get_val(); - double y = 1.0 / x_d; - - // k = 2^-6 * round(2^6 / |x|) - double k = fputil::nearest_integer(0x1.0p6 * y); - unsigned idx = static_cast<unsigned>(k); - k *= 0x1.0p-6; - - // denominator = |x| + k - DoubleDouble den = fputil::exact_add(x_d, k); - // numerator = 1 - k * |x| - DoubleDouble num; - num.hi = fputil::multiply_add(-x_d, k, 1.0); - DoubleDouble prod = fputil::exact_mult(x_d, k); - // Using Dekker's 2SUM algorithm to compute the lower part. - num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo; - - // x_r = (1/|x| - k) / (1 - k/|x|) - // = (1 - k * |x|) / (|x| - k) - DoubleDouble x_r = fputil::div(num, den); - - // Approximating atan(x_r) using Taylor polynomial. - DoubleDouble p = atan_eval(x_r); - - // atan(x) = sign(x) * (pi/2 - atan(1/|x|)) - // = sign(x) * (pi/2 - atan(k) - atan(x_r)) - // = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k))) -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo; - return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part)); -#else - DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi); - DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi); - double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo); - - double r = IS_NEG[!x_sign] * (c1.hi + c2); - - return r; -#endif -} +LLVM_LIBC_FUNCTION(double, atan, (double x)) { return math::atan(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp index aa770de..4aaa63d 100644 --- a/libc/src/math/generic/atan2.cpp +++ b/libc/src/math/generic/atan2.cpp @@ -7,194 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2.h" -#include "atan_utils.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/atan2.h" namespace LIBC_NAMESPACE_DECL { -// There are several range reduction steps we can take for atan2(y, x) as -// follow: - -// * Range reduction 1: signness -// atan2(y, x) will return a number between -PI and PI representing the angle -// forming by the 0x axis and the vector (x, y) on the 0xy-plane. -// In particular, we have that: -// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) -// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) -// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) -// Since atan function is odd, we can use the formula: -// atan(-u) = -atan(u) -// to adjust the above conditions a bit further: -// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) -// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) -// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) -// Which can be simplified to: -// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 -// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 - -// * Range reduction 2: reciprocal -// Now that the argument inside atan is positive, we can use the formula: -// atan(1/x) = pi/2 - atan(x) -// to make the argument inside atan <= 1 as follow: -// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x -// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| -// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x -// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| - -// * Range reduction 3: look up table. -// After the previous two range reduction steps, we reduce the problem to -// compute atan(u) with 0 <= u <= 1, or to be precise: -// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). -// An accurate polynomial approximation for the whole [0, 1] input range will -// require a very large degree. To make it more efficient, we reduce the input -// range further by finding an integer idx such that: -// | n/d - idx/64 | <= 1/128. -// In particular, -// idx := round(2^6 * n/d) -// Then for the fast pass, we find a polynomial approximation for: -// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) * Q(n/d - idx/64) -// For the accurate pass, we use the addition formula: -// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (n*idx)/(64*d)) ) -// = atan( (n - d*(idx/64))/(d + n*(idx/64)) ) -// And for the fast pass, we use degree-9 Taylor polynomial to compute the RHS: -// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 -// with absolute errors bounded by: -// |atan(u) - P(u)| < |u|^11 / 11 < 2^-80 -// and relative errors bounded by: -// |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73. - LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) { - using FPBits = fputil::FPBits<double>; - - constexpr double IS_NEG[2] = {1.0, -1.0}; - constexpr DoubleDouble ZERO = {0.0, 0.0}; - constexpr DoubleDouble MZERO = {-0.0, -0.0}; - constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p+1}; - constexpr DoubleDouble MPI = {-0x1.1a62633145c07p-53, -0x1.921fb54442d18p+1}; - constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54, - 0x1.921fb54442d18p0}; - constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54, - -0x1.921fb54442d18p0}; - constexpr DoubleDouble PI_OVER_4 = {0x1.1a62633145c07p-55, - 0x1.921fb54442d18p-1}; - constexpr DoubleDouble THREE_PI_OVER_4 = {0x1.a79394c9e8a0ap-54, - 0x1.2d97c7f3321d2p+1}; - // Adjustment for constant term: - // CONST_ADJ[x_sign][y_sign][recip] - constexpr DoubleDouble CONST_ADJ[2][2][2] = { - {{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}}, - {{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}}; - - FPBits x_bits(x), y_bits(y); - bool x_sign = x_bits.sign().is_neg(); - bool y_sign = y_bits.sign().is_neg(); - x_bits = x_bits.abs(); - y_bits = y_bits.abs(); - uint64_t x_abs = x_bits.uintval(); - uint64_t y_abs = y_bits.uintval(); - bool recip = x_abs < y_abs; - uint64_t min_abs = recip ? x_abs : y_abs; - uint64_t max_abs = !recip ? x_abs : y_abs; - unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); - unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); - - double num = FPBits(min_abs).get_val(); - double den = FPBits(max_abs).get_val(); - - // Check for exceptional cases, whether inputs are 0, inf, nan, or close to - // overflow, or close to underflow. - if (LIBC_UNLIKELY(max_exp > 0x7ffU - 128U || min_exp < 128U)) { - if (x_bits.is_nan() || y_bits.is_nan()) { - if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - unsigned x_except = x == 0.0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); - unsigned y_except = y == 0.0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); - - // Exceptional cases: - // EXCEPT[y_except][x_except][x_is_neg] - // with x_except & y_except: - // 0: zero - // 1: finite, non-zero - // 2: infinity - constexpr DoubleDouble EXCEPTS[3][3][2] = { - {{ZERO, PI}, {ZERO, PI}, {ZERO, PI}}, - {{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}}, - {{PI_OVER_2, PI_OVER_2}, - {PI_OVER_2, PI_OVER_2}, - {PI_OVER_4, THREE_PI_OVER_4}}, - }; - - if ((x_except != 1) || (y_except != 1)) { - DoubleDouble r = EXCEPTS[y_except][x_except][x_sign]; - return fputil::multiply_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo); - } - bool scale_up = min_exp < 128U; - bool scale_down = max_exp > 0x7ffU - 128U; - // At least one input is denormal, multiply both numerator and denominator - // by some large enough power of 2 to normalize denormal inputs. - if (scale_up) { - num *= 0x1.0p64; - if (!scale_down) - den *= 0x1.0p64; - } else if (scale_down) { - den *= 0x1.0p-64; - if (!scale_up) - num *= 0x1.0p-64; - } - - min_abs = FPBits(num).uintval(); - max_abs = FPBits(den).uintval(); - min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); - max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); - } - - double final_sign = IS_NEG[(x_sign != y_sign) != recip]; - DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; - unsigned exp_diff = max_exp - min_exp; - // We have the following bound for normalized n and d: - // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). - if (LIBC_UNLIKELY(exp_diff > 54)) { - return fputil::multiply_add(final_sign, const_term.hi, - final_sign * (const_term.lo + num / den)); - } - - double k = fputil::nearest_integer(64.0 * num / den); - unsigned idx = static_cast<unsigned>(k); - // k = idx / 64 - k *= 0x1.0p-6; - - // Range reduction: - // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) - // = atan((n - d * k/64)) / (d + n * k/64)) - DoubleDouble num_k = fputil::exact_mult(num, k); - DoubleDouble den_k = fputil::exact_mult(den, k); - - // num_dd = n - d * k - DoubleDouble num_dd = fputil::exact_add(num - den_k.hi, -den_k.lo); - // den_dd = d + n * k - DoubleDouble den_dd = fputil::exact_add(den, num_k.hi); - den_dd.lo += num_k.lo; - - // q = (n - d * k) / (d + n * k) - DoubleDouble q = fputil::div(num_dd, den_dd); - // p ~ atan(q) - DoubleDouble p = atan_eval(q); - - DoubleDouble r = fputil::add(const_term, fputil::add(ATAN_I[idx], p)); - r.hi *= final_sign; - r.lo *= final_sign; - - return r.hi + r.lo; + return math::atan2(y, x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atan2f128.cpp b/libc/src/math/generic/atan2f128.cpp index a3aba0b..8838d94 100644 --- a/libc/src/math/generic/atan2f128.cpp +++ b/libc/src/math/generic/atan2f128.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2f128.h" -#include "atan_utils.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" @@ -16,6 +15,7 @@ #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/types.h" +#include "src/__support/math/atan_utils.h" #include "src/__support/uint128.h" namespace LIBC_NAMESPACE_DECL { @@ -103,6 +103,7 @@ static constexpr Float128 CONST_ADJ[2][2][2] = { // |(atan(u) - P(u)) / P(u)| < 2^-114. LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) { + using namespace atan_internal; using FPBits = fputil::FPBits<float128>; using Float128 = fputil::DyadicFloat<128>; diff --git a/libc/src/math/generic/atan2l.cpp b/libc/src/math/generic/atan2l.cpp index 47a2e985..a7824c6 100644 --- a/libc/src/math/generic/atan2l.cpp +++ b/libc/src/math/generic/atan2l.cpp @@ -9,7 +9,7 @@ #include "src/math/atan2l.h" #include "src/__support/common.h" #include "src/__support/macros/properties/types.h" -#include "src/math/atan2.h" +#include "src/__support/math/atan2.h" namespace LIBC_NAMESPACE_DECL { @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(long double, atan2l, (long double y, long double x)) { #if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) return static_cast<long double>( - atan2(static_cast<double>(y), static_cast<double>(x))); + math::atan2(static_cast<double>(y), static_cast<double>(x))); #else #error "Extended precision is not yet supported" #endif diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index 22f962e..acd32f0 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -7,116 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanf.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/math/inv_trigf_utils.h" +#include "src/__support/math/atanf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, atanf, (float x)) { - using namespace inv_trigf_utils_internal; - using FPBits = typename fputil::FPBits<float>; - - constexpr double FINAL_SIGN[2] = {1.0, -1.0}; - constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0, - -0x1.921fb54442d18p0}; - - FPBits x_bits(x); - Sign sign = x_bits.sign(); - x_bits.set_sign(Sign::POS); - uint32_t x_abs = x_bits.uintval(); - - // x is inf or nan, |x| < 2^-4 or |x|= > 16. - if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) { - double x_d = static_cast<double>(x); - double const_term = 0.0; - if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) { - // atan(+-Inf) = +-pi/2. - if (x_bits.is_inf()) { - volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()]; - return static_cast<float>(sign_pi_over_2); - } - if (x_bits.is_nan()) - return x; - // x >= 16 - x_d = -1.0 / x_d; - const_term = SIGNED_PI_OVER_2[sign.is_neg()]; - } - // 0 <= x < 1/16; - if (LIBC_UNLIKELY(x_bits.is_zero())) - return x; - // x <= 2^-12; - if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) { -#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) - return fputil::multiply_add(x, -0x1.0p-25f, x); -#else - double x_d = static_cast<double>(x); - return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d)); -#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT - } - // Use Taylor polynomial: - // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11). - constexpr double ATAN_TAYLOR[6] = { - 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, - -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, - }; - double x2 = x_d * x_d; - double x4 = x2 * x2; - double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); - double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); - double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]); - double p = fputil::polyeval(x4, c0, c1, c2); - double r = fputil::multiply_add(x_d, p, const_term); - return static_cast<float>(r); - } - - // Range reduction steps: - // 1) atan(x) = sign(x) * atan(|x|) - // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|) - // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32. - // 4) Then we use polynomial approximation: - // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16) - // = P(x - k/16) - double x_d, const_term, final_sign; - int idx; - - if (x_abs > 0x3f80'0000U) { - // |x| > 1, we need to invert x, so we will perform range reduction in - // double precision. - x_d = 1.0 / static_cast<double>(x_bits.get_val()); - double k_d = fputil::nearest_integer(x_d * 0x1.0p4); - x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d); - idx = static_cast<int>(k_d); - final_sign = FINAL_SIGN[sign.is_pos()]; - // Adjust constant term of the polynomial by +- pi/2. - const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0], - SIGNED_PI_OVER_2[sign.is_neg()]); - } else { - // Exceptional value: - if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4 - return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f) - : fputil::round_result_slightly_up(-0x1.1a6386p-4f); - } - // Perform range reduction in single precision. - float x_f = x_bits.get_val(); - float k_f = fputil::nearest_integer(x_f * 0x1.0p4f); - x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f); - x_d = static_cast<double>(x_f); - idx = static_cast<int>(k_f); - final_sign = FINAL_SIGN[sign.is_neg()]; - const_term = final_sign * ATAN_COEFFS[idx][0]; - } - - double p = atan_eval(x_d, idx); - double r = fputil::multiply_add(final_sign * x_d, p, const_term); - - return static_cast<float>(r); -} +LLVM_LIBC_FUNCTION(float, atanf, (float x)) { return math::atanf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atanf16.cpp b/libc/src/math/generic/atanf16.cpp index 9b6ec65..7191c42 100644 --- a/libc/src/math/generic/atanf16.cpp +++ b/libc/src/math/generic/atanf16.cpp @@ -7,101 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanf16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/optimization.h" +#include "src/__support/math/atanf16.h" namespace LIBC_NAMESPACE_DECL { -// Generated by Solly using the following command: -// > round(pi/2, SG, RN); -static constexpr float PI_2 = 0x1.921fb6p0; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 6; - -static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ATANF16_EXCEPTS{{ - // (input, RZ output, RU offset, RD offset, RN offset) - {0x2745, 0x2744, 1, 0, 1}, - {0x3099, 0x3090, 1, 0, 1}, - {0x3c6c, 0x3aae, 1, 0, 1}, - {0x466e, 0x3daa, 1, 0, 1}, - {0x48ae, 0x3ddb, 1, 0, 0}, - {0x5619, 0x3e3d, 1, 0, 1}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, atanf16, (float16 x)) { - using FPBits = fputil::FPBits<float16>; - FPBits xbits(x); - - uint16_t x_u = xbits.uintval(); - uint16_t x_abs = x_u & 0x7fff; - bool x_sign = x_u >> 15; - float sign = (x_sign ? -1.0 : 1.0); - - // |x| >= +/-inf - if (LIBC_UNLIKELY(x_abs >= 0x7c00)) { - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - - // atanf16(+/-inf) = +/-pi/2 - return fputil::cast<float16>(sign * PI_2); - } - - float xf = x; - float xsq = xf * xf; -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Handle exceptional values - if (auto r = ATANF16_EXCEPTS.lookup_odd(x_abs, x_sign); - LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif - - // |x| <= 0x1p0, |x| <= 1 - if (x_abs <= 0x3c00) { - // atanf16(+/-0) = +/-0 - if (LIBC_UNLIKELY(x_abs == 0)) - return x; - - // Degree-14 minimax odd polynomial of atan(x) generated by Sollya with: - // > P = fpminimax(atan(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|SG...|], - // [0, 1]); - float result = fputil::polyeval( - xsq, 0x1.fffffcp-1f, -0x1.55519ep-2f, 0x1.98f6a8p-3f, -0x1.1f0a92p-3f, - 0x1.95b654p-4f, -0x1.e65492p-5f, 0x1.8c0c36p-6f, -0x1.32316ep-8f); - return fputil::cast<float16>(xf * result); - } - - // If |x| > 1 - // y = atan(x) = sign(x) * atan(|x|) - // atan(|x|) = pi/2 - atan(1/|x|) - // Recall, 1/|x| < 1 - float x_inv_sq = 1.0f / xsq; - float x_inv = fputil::sqrt<float>(x_inv_sq); - - // Degree-14 minimax odd polynomial of atan(x) generated by Sollya with: - // > P = fpminimax(atan(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|SG...|], - // [0, 1]); - float interm = - fputil::polyeval(x_inv_sq, 0x1.fffffcp-1f, -0x1.55519ep-2f, - 0x1.98f6a8p-3f, -0x1.1f0a92p-3f, 0x1.95b654p-4f, - -0x1.e65492p-5f, 0x1.8c0c36p-6f, -0x1.32316ep-8f); - - return fputil::cast<float16>(sign * - fputil::multiply_add(x_inv, -interm, PI_2)); -} +LLVM_LIBC_FUNCTION(float16, atanf16, (float16 x)) { return math::atanf16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/fabsbf16.cpp b/libc/src/math/generic/fabsbf16.cpp new file mode 100644 index 0000000..ea39719 --- /dev/null +++ b/libc/src/math/generic/fabsbf16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of fabsbf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fabsbf16.h" + +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, fabsbf16, (bfloat16 x)) { return fputil::abs(x); } + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt index c5db6fa..fe31e6a 100644 --- a/libc/src/pthread/CMakeLists.txt +++ b/libc/src/pthread/CMakeLists.txt @@ -273,6 +273,40 @@ add_entrypoint_object( ) add_entrypoint_object( + pthread_barrier_init + SRCS + pthread_barrier_init.cpp + HDRS + pthread_barrier_init.h + DEPENDS + libc.src.errno.errno + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( + pthread_barrier_destroy + SRCS + pthread_barrier_destroy.cpp + HDRS + pthread_barrier_destroy.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( + pthread_barrier_wait + SRCS + pthread_barrier_wait.cpp + HDRS + pthread_barrier_wait.h + DEPENDS + libc.include.pthread + libc.src.__support.threads.linux.barrier +) + +add_entrypoint_object( pthread_mutex_init SRCS pthread_mutex_init.cpp diff --git a/libc/src/pthread/pthread_barrier_destroy.cpp b/libc/src/pthread/pthread_barrier_destroy.cpp new file mode 100644 index 0000000..82de8f2 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_destroy.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of the pthread_barrier_destroy function ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_destroy.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_destroy, (pthread_barrier_t * b)) { + return Barrier::destroy(reinterpret_cast<Barrier *>(b)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_destroy.h b/libc/src/pthread/pthread_barrier_destroy.h new file mode 100644 index 0000000..e27552c --- /dev/null +++ b/libc/src/pthread/pthread_barrier_destroy.h @@ -0,0 +1,21 @@ +//===-- Implementation header for pthread_barrier_destroy --------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_destroy(pthread_barrier_t *b); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H diff --git a/libc/src/pthread/pthread_barrier_init.cpp b/libc/src/pthread/pthread_barrier_init.cpp new file mode 100644 index 0000000..2e92238 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_init.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of the pthread_barrier_init function ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_init.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "hdr/types/pthread_barrierattr_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_init, + (pthread_barrier_t * b, + const pthread_barrierattr_t *__restrict attr, + unsigned count)) { + return Barrier::init(reinterpret_cast<Barrier *>(b), attr, count); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_init.h b/libc/src/pthread/pthread_barrier_init.h new file mode 100644 index 0000000..bb17f3f --- /dev/null +++ b/libc/src/pthread/pthread_barrier_init.h @@ -0,0 +1,24 @@ +//===-- Implementation header for pthread_barrier_init ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H + +#include "hdr/types/pthread_barrier_t.h" +#include "hdr/types/pthread_barrierattr_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_init(pthread_barrier_t *b, + const pthread_barrierattr_t *__restrict attr, + unsigned count); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H diff --git a/libc/src/pthread/pthread_barrier_wait.cpp b/libc/src/pthread/pthread_barrier_wait.cpp new file mode 100644 index 0000000..dbd1333 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_wait.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of the pthread_barrier_wait function ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "pthread_barrier_wait.h" + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/threads/linux/barrier.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, pthread_barrier_wait, (pthread_barrier_t * b)) { + return reinterpret_cast<Barrier *>(b)->wait(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/pthread/pthread_barrier_wait.h b/libc/src/pthread/pthread_barrier_wait.h new file mode 100644 index 0000000..16ddc06 --- /dev/null +++ b/libc/src/pthread/pthread_barrier_wait.h @@ -0,0 +1,21 @@ +//===-- Implementation header for pthread_barrier_wait ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H +#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H + +#include "hdr/types/pthread_barrier_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int pthread_barrier_wait(pthread_barrier_t *b); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H diff --git a/libc/src/sched/CMakeLists.txt b/libc/src/sched/CMakeLists.txt index e6c37d3..d1d1de0 100644 --- a/libc/src/sched/CMakeLists.txt +++ b/libc/src/sched/CMakeLists.txt @@ -3,6 +3,13 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) endif() add_entrypoint_object( + getcpu + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.getcpu +) + +add_entrypoint_object( sched_getaffinity ALIAS DEPENDS diff --git a/libc/src/sched/getcpu.h b/libc/src/sched/getcpu.h new file mode 100644 index 0000000..4c90e64 --- /dev/null +++ b/libc/src/sched/getcpu.h @@ -0,0 +1,20 @@ +//===-- Implementation header for getcpu ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SCHED_GETCPU_H +#define LLVM_LIBC_SRC_SCHED_GETCPU_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int getcpu(unsigned int *cpu, unsigned int *node); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_SCHED_GETCPU_H diff --git a/libc/src/sched/linux/CMakeLists.txt b/libc/src/sched/linux/CMakeLists.txt index e690e76..bb50002 100644 --- a/libc/src/sched/linux/CMakeLists.txt +++ b/libc/src/sched/linux/CMakeLists.txt @@ -1,4 +1,16 @@ add_entrypoint_object( + getcpu + SRCS + getcpu.cpp + HDRS + ../getcpu.h + DEPENDS + libc.include.sched + libc.src.__support.OSUtil.osutil + libc.src.errno.errno +) + +add_entrypoint_object( sched_getaffinity SRCS sched_getaffinity.cpp @@ -6,7 +18,9 @@ add_entrypoint_object( ../sched_getaffinity.h DEPENDS libc.hdr.stdint_proxy - libc.include.sched + libc.hdr.types.cpu_set_t + libc.hdr.types.pid_t + libc.hdr.types.size_t libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -18,7 +32,9 @@ add_entrypoint_object( HDRS ../sched_setaffinity.h DEPENDS - libc.include.sched + libc.hdr.types.cpu_set_t + libc.hdr.types.pid_t + libc.hdr.types.size_t libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -30,7 +46,8 @@ add_entrypoint_object( HDRS ../sched_getcpucount.h DEPENDS - libc.include.sched + libc.hdr.types.cpu_set_t + libc.hdr.types.size_t ) add_entrypoint_object( @@ -94,7 +111,7 @@ add_entrypoint_object( HDRS ../sched_getscheduler.h DEPENDS - libc.include.sched + libc.hdr.types.pid_t libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno @@ -131,8 +148,9 @@ add_entrypoint_object( HDRS ../sched_rr_get_interval.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.types.struct_timespec libc.include.sys_syscall - libc.include.sched libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sched/linux/getcpu.cpp b/libc/src/sched/linux/getcpu.cpp new file mode 100644 index 0000000..a34b693 --- /dev/null +++ b/libc/src/sched/linux/getcpu.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of getcpu ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/sched/getcpu.h" + +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" + +#include <sys/syscall.h> // For syscall numbers. + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, getcpu, (unsigned int *cpu, unsigned int *node)) { + int ret = LIBC_NAMESPACE::syscall_impl<int>(SYS_getcpu, cpu, node, nullptr); + if (ret < 0) { + libc_errno = -ret; + return -1; + } + return 0; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sched/linux/sched_getaffinity.cpp b/libc/src/sched/linux/sched_getaffinity.cpp index 4a5e91a..d652f7f7 100644 --- a/libc/src/sched/linux/sched_getaffinity.cpp +++ b/libc/src/sched/linux/sched_getaffinity.cpp @@ -14,7 +14,9 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include <sched.h> +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/pid_t.h" +#include "hdr/types/size_t.h" #include <sys/syscall.h> // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/linux/sched_getcpucount.cpp b/libc/src/sched/linux/sched_getcpucount.cpp index 7ae166e..dcc2338 100644 --- a/libc/src/sched/linux/sched_getcpucount.cpp +++ b/libc/src/sched/linux/sched_getcpucount.cpp @@ -12,7 +12,8 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include <sched.h> +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/size_t.h" #include <stddef.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/linux/sched_getscheduler.cpp b/libc/src/sched/linux/sched_getscheduler.cpp index d8e0296..10625f2 100644 --- a/libc/src/sched/linux/sched_getscheduler.cpp +++ b/libc/src/sched/linux/sched_getscheduler.cpp @@ -13,6 +13,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "hdr/types/pid_t.h" #include <sys/syscall.h> // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/linux/sched_rr_get_interval.cpp b/libc/src/sched/linux/sched_rr_get_interval.cpp index 5668d596b..eecbaa4 100644 --- a/libc/src/sched/linux/sched_rr_get_interval.cpp +++ b/libc/src/sched/linux/sched_rr_get_interval.cpp @@ -13,6 +13,8 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "hdr/types/pid_t.h" +#include "hdr/types/struct_timespec.h" #include <sys/syscall.h> // For syscall numbers. #ifdef SYS_sched_rr_get_interval_time64 diff --git a/libc/src/sched/linux/sched_setaffinity.cpp b/libc/src/sched/linux/sched_setaffinity.cpp index 93e930d..3c7ed91 100644 --- a/libc/src/sched/linux/sched_setaffinity.cpp +++ b/libc/src/sched/linux/sched_setaffinity.cpp @@ -13,7 +13,9 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include <sched.h> +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/pid_t.h" +#include "hdr/types/size_t.h" #include <sys/syscall.h> // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_getaffinity.h b/libc/src/sched/sched_getaffinity.h index 52ec5bc..8623089 100644 --- a/libc/src/sched/sched_getaffinity.h +++ b/libc/src/sched/sched_getaffinity.h @@ -10,7 +10,10 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_GETAFFINITY_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/pid_t.h" +#include "hdr/types/size_t.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_getcpucount.h b/libc/src/sched/sched_getcpucount.h index 8f35301..0667d8c 100644 --- a/libc/src/sched/sched_getcpucount.h +++ b/libc/src/sched/sched_getcpucount.h @@ -10,7 +10,8 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_GETCPUCOUNT_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/cpu_set_t.h" #include <stddef.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_getscheduler.h b/libc/src/sched/sched_getscheduler.h index d29e902..6407dbf 100644 --- a/libc/src/sched/sched_getscheduler.h +++ b/libc/src/sched/sched_getscheduler.h @@ -10,7 +10,8 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_GETSCHEDULER_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/pid_t.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_rr_get_interval.h b/libc/src/sched/sched_rr_get_interval.h index ff09329..4195c14 100644 --- a/libc/src/sched/sched_rr_get_interval.h +++ b/libc/src/sched/sched_rr_get_interval.h @@ -10,7 +10,9 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_RR_GET_INTERVAL_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/pid_t.h" +#include "hdr/types/struct_timespec.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_setaffinity.h b/libc/src/sched/sched_setaffinity.h index cb2303d..f6739ab 100644 --- a/libc/src/sched/sched_setaffinity.h +++ b/libc/src/sched/sched_setaffinity.h @@ -10,7 +10,10 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_SETAFFINITY_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/pid_t.h" +#include "hdr/types/size_t.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 49f4a1b..9ba0a06 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -185,6 +185,55 @@ add_entrypoint_object( ) add_entrypoint_object( + mbstowcs + SRCS + mbstowcs.cpp + HDRS + mbstowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.macros.null_check + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( + mbsrtowcs + SRCS + mbsrtowcs.cpp + HDRS + mbsrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( + mbsnrtowcs + SRCS + mbsnrtowcs.cpp + HDRS + mbsnrtowcs.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.src.__support.wchar.mbsnrtowcs +) + +add_entrypoint_object( wcstombs SRCS wcstombs.cpp diff --git a/libc/src/wchar/mbsnrtowcs.cpp b/libc/src/wchar/mbsnrtowcs.cpp new file mode 100644 index 0000000..28e0ff3 --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.cpp @@ -0,0 +1,39 @@ +//===-- Implementation of mbsnrtowcs --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsnrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, nmc, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast<internal::mbstate *>(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h new file mode 100644 index 0000000..0d66b95 --- /dev/null +++ b/libc/src/wchar/mbsnrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsnrtowcs ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t nmc, size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp new file mode 100644 index 0000000..82ca25a --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.cpp @@ -0,0 +1,39 @@ +//===-- Implementation of mbsrtowcs ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbsrtowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbsrtowcs, + (wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps)) { + static internal::mbstate internal_mbstate; + // If destination is null, ignore len + len = dst == nullptr ? SIZE_MAX : len; + auto ret = internal::mbsnrtowcs( + dst, src, SIZE_MAX, len, + ps == nullptr ? &internal_mbstate + : reinterpret_cast<internal::mbstate *>(ps)); + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h new file mode 100644 index 0000000..f8d4cc2 --- /dev/null +++ b/libc/src/wchar/mbsrtowcs.h @@ -0,0 +1,24 @@ +//===-- Implementation header for mbsrtowcs -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t len, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp new file mode 100644 index 0000000..43e953c --- /dev/null +++ b/libc/src/wchar/mbstowcs.cpp @@ -0,0 +1,40 @@ +//===-- Implementation of mbstowcs ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/mbstowcs.h" + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" +#include "src/__support/wchar/mbsnrtowcs.h" +#include "src/__support/wchar/mbstate.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, mbstowcs, + (wchar_t *__restrict pwcs, const char *__restrict s, + size_t n)) { + LIBC_CRASH_ON_NULLPTR(s); + // If destination is null, ignore n + n = pwcs == nullptr ? SIZE_MAX : n; + static internal::mbstate internal_mbstate; + const char *temp = s; + auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate); + + if (!ret.has_value()) { + // Encoding failure + libc_errno = ret.error(); + return -1; + } + return ret.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h new file mode 100644 index 0000000..7d08a83 --- /dev/null +++ b/libc/src/wchar/mbstowcs.h @@ -0,0 +1,22 @@ +//===-- Implementation header for mbstowcs --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H +#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H |