diff options
Diffstat (limited to 'libc')
62 files changed, 1501 insertions, 154 deletions
diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt index 683c746..af9f687 100644 --- a/libc/config/baremetal/aarch64/entrypoints.txt +++ b/libc/config/baremetal/aarch64/entrypoints.txt @@ -757,6 +757,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -765,6 +771,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index f8ecc2e..ce8d7c0 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -760,6 +760,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -768,6 +774,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 679bfb3..7eeec24 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -760,6 +760,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -768,6 +774,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/darwin/aarch64/entrypoints.txt b/libc/config/darwin/aarch64/entrypoints.txt index 72b0265..de4b4df 100644 --- a/libc/config/darwin/aarch64/entrypoints.txt +++ b/libc/config/darwin/aarch64/entrypoints.txt @@ -590,6 +590,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -598,6 +604,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt index b5ab1ee..f668e8a 100644 --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -233,6 +233,12 @@ set(TARGET_LIBM_ENTRYPOINTS list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt index 77a13a6..f7b8b2f 100644 --- a/libc/config/gpu/amdgpu/entrypoints.txt +++ b/libc/config/gpu/amdgpu/entrypoints.txt @@ -616,6 +616,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt index 61c9c71..23afb40 100644 --- a/libc/config/gpu/nvptx/entrypoints.txt +++ b/libc/config/gpu/nvptx/entrypoints.txt @@ -617,6 +617,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index fbdf8fb..62a3ae9 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -844,6 +844,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -852,6 +858,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LLVM_LIBC_FULL_BUILD) list(APPEND TARGET_LIBC_ENTRYPOINTS # assert.h entrypoints diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index e3f5fee..eedf184 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -460,6 +460,12 @@ set(TARGET_LIBM_ENTRYPOINTS list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index ba67ddd..cd56979 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -863,6 +863,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -871,6 +877,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 066dc21..2b0cf33 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -895,6 +895,12 @@ endif() list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 @@ -903,6 +909,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS libc.src.math.truncbf16 ) +if(LIBC_TYPES_HAS_FLOAT128) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C++23 mixed bfloat16 and _Float128 entrypoints + libc.src.math.bf16addf128 + libc.src.math.bf16subf128 + ) +endif() + if(LIBC_COMPILER_HAS_FIXED_POINT) list(APPEND TARGET_LIBM_ENTRYPOINTS # stdfix.h _Fract and _Accum entrypoints diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index 994078c..1b1db5e 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -306,6 +306,12 @@ set(TARGET_LIBM_ENTRYPOINTS list(APPEND TARGET_LIBM_ENTRYPOINTS # bfloat16 entrypoints + libc.src.math.bf16add + libc.src.math.bf16addf + libc.src.math.bf16addl + libc.src.math.bf16sub + libc.src.math.bf16subf + libc.src.math.bf16subl libc.src.math.ceilbf16 libc.src.math.fabsbf16 libc.src.math.floorbf16 diff --git a/libc/shared/math.h b/libc/shared/math.h index 3714f38..ea645f0 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -31,6 +31,7 @@ #include "math/atanhf.h" #include "math/atanhf16.h" #include "math/cbrt.h" +#include "math/cbrtf.h" #include "math/erff.h" #include "math/exp.h" #include "math/exp10.h" diff --git a/libc/shared/math/cbrtf.h b/libc/shared/math/cbrtf.h new file mode 100644 index 0000000..09b86be --- /dev/null +++ b/libc/shared/math/cbrtf.h @@ -0,0 +1,23 @@ +//===-- Shared cbrtf function -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SHARED_MATH_CBRTF_H +#define LIBC_SHARED_MATH_CBRTF_H + +#include "shared/libc_common.h" +#include "src/__support/math/cbrtf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::cbrtf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_SHARED_MATH_CBRTF_H diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt index f8fdfeb..72a7879 100644 --- a/libc/src/__support/GPU/CMakeLists.txt +++ b/libc/src/__support/GPU/CMakeLists.txt @@ -9,6 +9,12 @@ add_header_library( utils.h ) +add_header_library( + fixedstack + HDRS + fixedstack.h +) + add_object_library( allocator SRCS @@ -23,4 +29,5 @@ add_object_library( libc.src.__support.CPP.bit libc.src.__support.CPP.new .utils + .fixedstack ) diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 250bebd..534a309 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -20,6 +20,7 @@ #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/new.h" +#include "src/__support/GPU/fixedstack.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "src/__support/threads/sleep.h" @@ -39,6 +40,9 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1; // The number of times to attempt claiming an in-progress slab allocation. constexpr static uint32_t MAX_TRIES = 1024; +// The number of previously allocated slabs we will keep in memory. +constexpr static uint32_t CACHED_SLABS = 8; + static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two"); namespace impl { @@ -185,20 +189,35 @@ struct Slab { struct alignas(MIN_SIZE) Header { uint32_t chunk_size; uint32_t global_index; + uint32_t cached_chunk_size; }; // Initialize the slab with its chunk size and index in the global table for // use when freeing. Slab(uint32_t chunk_size, uint32_t global_index) { Header *header = reinterpret_cast<Header *>(memory); + header->cached_chunk_size = cpp::numeric_limits<uint32_t>::max(); header->chunk_size = chunk_size; header->global_index = global_index; } + // Reset the memory with a new index and chunk size, not thread safe. + Slab *reset(uint32_t chunk_size, uint32_t global_index) { + Header *header = reinterpret_cast<Header *>(memory); + header->cached_chunk_size = header->chunk_size; + header->chunk_size = chunk_size; + header->global_index = global_index; + return this; + } + // Set the necessary bitfield bytes to zero in parallel using many lanes. This // must be called before the bitfield can be accessed safely, memory is not // guaranteed to be zero initialized in the current implementation. void initialize(uint64_t uniform) { + // If this is a re-used slab the memory is already set to zero. + if (get_cached_chunk_size() <= get_chunk_size()) + return; + uint32_t size = (bitfield_bytes(get_chunk_size()) + sizeof(uint32_t) - 1) / sizeof(uint32_t); impl::uniform_memset(get_bitfield(), 0, size, uniform); @@ -236,6 +255,11 @@ struct Slab { return reinterpret_cast<const Header *>(memory)->chunk_size; } + // Get the chunk size that was previously used. + uint32_t get_cached_chunk_size() const { + return reinterpret_cast<const Header *>(memory)->cached_chunk_size; + } + // Get the location in the memory where we will store the global index. uint32_t get_global_index() const { return reinterpret_cast<const Header *>(memory)->global_index; @@ -337,6 +361,9 @@ struct Slab { uint8_t memory[SLAB_SIZE]; }; +// A global cache of previously allocated slabs for efficient reuse. +static FixedStack<Slab *, CACHED_SLABS> slab_cache; + /// A wait-free guard around a pointer resource to be created dynamically if /// space is available and freed once there are no more users. struct GuardPtr { @@ -408,6 +435,11 @@ private: reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()), cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) { count = cpp::numeric_limits<uint32_t>::max(); + + Slab *cached = nullptr; + if (slab_cache.pop(cached)) + return cached->reset(cpp::forward<Args>(args)...); + void *raw = impl::rpc_allocate(sizeof(Slab)); if (!raw) return nullptr; @@ -475,8 +507,10 @@ public: if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(mask)) && ref.release(cpp::popcount(mask))) { Slab *p = ptr.load(cpp::MemoryOrder::RELAXED); - p->~Slab(); - impl::rpc_free(p); + if (!slab_cache.push(p)) { + p->~Slab(); + impl::rpc_free(p); + } cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE); ptr.store(nullptr, cpp::MemoryOrder::RELAXED); } diff --git a/libc/src/__support/GPU/fixedstack.h b/libc/src/__support/GPU/fixedstack.h new file mode 100644 index 0000000..6ceaa2f --- /dev/null +++ b/libc/src/__support/GPU/fixedstack.h @@ -0,0 +1,111 @@ +//===-- A lock-free data structure for a fixed capacity stack ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H +#define LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H + +#include "src/__support/CPP/atomic.h" +#include "src/__support/threads/sleep.h" + +#include <stdint.h> + +namespace LIBC_NAMESPACE_DECL { + +// A lock-free fixed size stack backed by an underlying array of data. It +// supports push and pop operations in a completely lock-free manner. +template <typename T, uint32_t CAPACITY> struct alignas(16) FixedStack { + // The index is stored as a 20-bit value and cannot index into any more. + static_assert(CAPACITY < 1024 * 1024, "Invalid buffer size"); + + // The head of the free and used stacks. Represents as a 20-bit index combined + // with a 44-bit ABA tag that is updated in a single atomic operation. + uint64_t free; + uint64_t used; + + // The stack is a linked list of indices into the underlying data + uint32_t next[CAPACITY]; + T data[CAPACITY]; + + // Get the 20-bit index into the underlying array from the head. + LIBC_INLINE static constexpr uint32_t get_node(uint64_t head) { + return static_cast<uint32_t>(head & 0xfffff); + } + + // Increment the old ABA tag and merge it into the new index. + LIBC_INLINE static constexpr uint64_t make_head(uint64_t orig, + uint32_t node) { + return static_cast<uint64_t>(node) | (((orig >> 20ul) + 1ul) << 20ul); + } + + // Attempts to pop data from the given stack by making it point to the next + // node. We repeatedly attempt to write to the head using compare-and-swap, + // expecting that it has not been changed by any other thread. + LIBC_INLINE uint32_t pop_impl(cpp::AtomicRef<uint64_t> head) { + uint64_t orig = head.load(cpp::MemoryOrder::RELAXED); + + for (;;) { + if (get_node(orig) == CAPACITY) + return CAPACITY; + + uint32_t node = + cpp::AtomicRef(next[get_node(orig)]).load(cpp::MemoryOrder::RELAXED); + if (head.compare_exchange_strong(orig, make_head(orig, node), + cpp::MemoryOrder::ACQUIRE, + cpp::MemoryOrder::RELAXED)) + break; + } + return get_node(orig); + } + + // Attempts to push data to the given stack by making it point to the new + // node. We repeatedly attempt to write to the head using compare-and-swap, + // expecting that it has not been changed by any other thread. + LIBC_INLINE uint32_t push_impl(cpp::AtomicRef<uint64_t> head, uint32_t node) { + uint64_t orig = head.load(cpp::MemoryOrder::RELAXED); + for (;;) { + next[node] = get_node(orig); + if (head.compare_exchange_strong(orig, make_head(orig, node), + cpp::MemoryOrder::RELEASE, + cpp::MemoryOrder::RELAXED)) + break; + } + return get_node(head.load(cpp::MemoryOrder::RELAXED)); + } + +public: + // Initialize the free stack to be full and the used stack to be empty. We use + // the capacity of the stack as a sentinel value. + LIBC_INLINE constexpr FixedStack() : free(0), used(CAPACITY), data{} { + for (uint32_t i = 0; i < CAPACITY; ++i) + next[i] = i + 1; + } + + LIBC_INLINE bool push(const T &val) { + uint32_t node = pop_impl(cpp::AtomicRef(free)); + if (node == CAPACITY) + return false; + + data[node] = val; + push_impl(cpp::AtomicRef(used), node); + return true; + } + + LIBC_INLINE bool pop(T &val) { + uint32_t node = pop_impl(cpp::AtomicRef(used)); + if (node == CAPACITY) + return false; + + val = data[node]; + push_impl(cpp::AtomicRef(free), node); + return true; + } +}; + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index e1076ed..fe928a8 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -347,6 +347,17 @@ add_header_library( ) add_header_library( + cbrtf + HDRS + cbrtf.h + DEPENDS + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization +) + +add_header_library( erff HDRS erff.h diff --git a/libc/src/__support/math/cbrtf.h b/libc/src/__support/math/cbrtf.h new file mode 100644 index 0000000..f82892b --- /dev/null +++ b/libc/src/__support/math/cbrtf.h @@ -0,0 +1,161 @@ +//===-- Implementation header for cbrtf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC___SUPPORT_MATH_CBRTF_H +#define LIBC_SRC___SUPPORT_MATH_CBRTF_H + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float cbrtf(float x) { + // Look up table for 2^(i/3) for i = 0, 1, 2. + constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}; + + // Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1 + // generated by Sollya with: + // > for i from 0 to 15 do { + // P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]); + // print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",", + // coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", + // coeff(P, 6), "},"); + // }; + // Then (1 + x)^(1/3) ~ 1 + x * P(x). + constexpr double COEFFS[16][7] = { + {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5, + -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6, + 0x1.047f75e0aff14p-6}, + {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5, + -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6, + 0x1.716eca1d6e3bcp-7}, + {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5, + -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6, + 0x1.0aeca34893785p-7}, + {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5, + -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6, + 0x1.88410674c6a5dp-8}, + {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5, + -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7, + 0x1.249bb51a1c498p-8}, + {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5, + -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7, + 0x1.ba9230918fa2ep-9}, + {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5, + -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7, + 0x1.52e3e17c71069p-9}, + {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5, + -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7, + 0x1.0677d9af6aad4p-9}, + {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5, + -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7, + 0x1.9ad325dc7adcbp-10}, + {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5, + -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8, + 0x1.449f5ee175c69p-10}, + {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5, + -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8, + 0x1.02cb36389bd79p-10}, + {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5, + -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8, + 0x1.a008ae91f5936p-11}, + {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5, + -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8, + 0x1.50f8174cdb6e9p-11}, + {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5, + -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9, + 0x1.12e0e94e8586dp-11}, + {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5, + -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9, + 0x1.c3726535f1fc6p-12}, + {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5, + -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9, + 0x1.75077abf18d84p-12}, + }; + + using FloatBits = typename fputil::FPBits<float>; + using DoubleBits = typename fputil::FPBits<double>; + + FloatBits x_bits(x); + + uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff; + uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN; + + if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) { + // x is 0, Inf, or NaN. + // Make sure it works for FTZ/DAZ modes. + return x + x; + } + + double xd = static_cast<double>(x); + DoubleBits xd_bits(xd); + + // When using biased exponent of x in double precision, + // x_e = real_exponent_of_x + 1023 + // Then: + // x_e / 3 = real_exponent_of_x / 3 + 1023/3 + // = real_exponent_of_x / 3 + 341 + // So to make it the correct biased exponent of x^(1/3), we add + // 1023 - 341 = 682 + // to the quotient x_e / 3. + unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent()); + unsigned out_e = (x_e / 3 + 682) | sign_bit; + unsigned shift_e = x_e % 3; + + // Set x_m = 2^(x_e % 3) * (1.mantissa) + uint64_t x_m = xd_bits.get_mantissa(); + // Use the leading 4 bits for look up table + unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4)); + + x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS) + << DoubleBits::FRACTION_LEN; + + double x_reduced = DoubleBits(x_m).get_val(); + double dx = x_reduced - 1.0; + + double dx_sq = dx * dx; + double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0); + double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]); + double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]); + double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]); + + double dx_4 = dx_sq * dx_sq; + double p0 = fputil::multiply_add(dx_sq, c1, c0); + double p1 = fputil::multiply_add(dx_sq, c3, c2); + + double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e]; + + uint64_t r_m = DoubleBits(r).get_mantissa(); + // Check if the output is exact. To be exact, the smallest 1-bit of the + // output has to be at least 2^-7 or higher. So we check the lowest 44 bits + // to see if they are within 2^(-52 + 3) errors from all zeros, then the + // result cube root is exact. + if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) { + if ((r_m & 0xfffffffffff) <= 8) + r_m &= 0xffff'ffff'ffff'ffe0; + else + r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20; + fputil::clear_except_if_required(FE_INEXACT); + } + // Adjust exponent and sign. + uint64_t r_bits = + r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN); + + return static_cast<float>(DoubleBits(r_bits).get_val()); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_SRC___SUPPORT_MATH_CBRTF_H diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index c3840d3..660c3681 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -563,3 +563,13 @@ add_math_entrypoint_object(ufromfpxf) add_math_entrypoint_object(ufromfpxl) add_math_entrypoint_object(ufromfpxf16) add_math_entrypoint_object(ufromfpxf128) + +add_math_entrypoint_object(bf16add) +add_math_entrypoint_object(bf16addf) +add_math_entrypoint_object(bf16addl) +add_math_entrypoint_object(bf16addf128) + +add_math_entrypoint_object(bf16sub) +add_math_entrypoint_object(bf16subf) +add_math_entrypoint_object(bf16subl) +add_math_entrypoint_object(bf16subf128) diff --git a/libc/src/math/bf16add.h b/libc/src/math/bf16add.h new file mode 100644 index 0000000..a29970e --- /dev/null +++ b/libc/src/math/bf16add.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16add -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADD_H +#define LLVM_LIBC_SRC_MATH_BF16ADD_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16add(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADD_H diff --git a/libc/src/math/bf16addf.h b/libc/src/math/bf16addf.h new file mode 100644 index 0000000..80a5e2a --- /dev/null +++ b/libc/src/math/bf16addf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF_H +#define LLVM_LIBC_SRC_MATH_BF16ADDF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDF_H diff --git a/libc/src/math/bf16addf128.h b/libc/src/math/bf16addf128.h new file mode 100644 index 0000000..3c2f3a1 --- /dev/null +++ b/libc/src/math/bf16addf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF128_H +#define LLVM_LIBC_SRC_MATH_BF16ADDF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDF128_H diff --git a/libc/src/math/bf16addl.h b/libc/src/math/bf16addl.h new file mode 100644 index 0000000..a9e7d68 --- /dev/null +++ b/libc/src/math/bf16addl.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16addl ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16ADDL_H +#define LLVM_LIBC_SRC_MATH_BF16ADDL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16addl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16ADDL_H diff --git a/libc/src/math/bf16sub.h b/libc/src/math/bf16sub.h new file mode 100644 index 0000000..8108e914 --- /dev/null +++ b/libc/src/math/bf16sub.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16sub -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUB_H +#define LLVM_LIBC_SRC_MATH_BF16SUB_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16sub(double x, double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUB_H diff --git a/libc/src/math/bf16subf.h b/libc/src/math/bf16subf.h new file mode 100644 index 0000000..1bd79bf --- /dev/null +++ b/libc/src/math/bf16subf.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF_H +#define LLVM_LIBC_SRC_MATH_BF16SUBF_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subf(float x, float y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBF_H diff --git a/libc/src/math/bf16subf128.h b/libc/src/math/bf16subf128.h new file mode 100644 index 0000000..19590e8 --- /dev/null +++ b/libc/src/math/bf16subf128.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subf128 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF128_H +#define LLVM_LIBC_SRC_MATH_BF16SUBF128_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subf128(float128 x, float128 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBF128_H diff --git a/libc/src/math/bf16subl.h b/libc/src/math/bf16subl.h new file mode 100644 index 0000000..13b2093 --- /dev/null +++ b/libc/src/math/bf16subl.h @@ -0,0 +1,21 @@ +//===-- Implementation header for bf16subl ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_BF16SUBL_H +#define LLVM_LIBC_SRC_MATH_BF16SUBL_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +bfloat16 bf16subl(long double x, long double y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_BF16SUBL_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 0bec7dd..5aeacc8 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4819,11 +4819,7 @@ add_entrypoint_object( HDRS ../cbrtf.h DEPENDS - libc.hdr.fenv_macros - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.macros.optimization + libc.src.__support.math.cbrtf ) add_entrypoint_object( @@ -4915,3 +4911,116 @@ add_header_library( libc.src.__support.math.expf16_utils libc.src.__support.math.exp10_float16_constants ) + +add_entrypoint_object( + bf16add + SRCS + bf16add.cpp + HDRS + ../bf16add.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addf + SRCS + bf16addf.cpp + HDRS + ../bf16addf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addl + SRCS + bf16addl.cpp + HDRS + ../bf16addl.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16addf128 + SRCS + bf16addf128.cpp + HDRS + ../bf16addf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + + +add_entrypoint_object( + bf16sub + SRCS + bf16sub.cpp + HDRS + ../bf16sub.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subf + SRCS + bf16subf.cpp + HDRS + ../bf16subf.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subl + SRCS + bf16subl.cpp + HDRS + ../bf16subl.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) + +add_entrypoint_object( + bf16subf128 + SRCS + bf16subf128.cpp + HDRS + ../bf16subf128.h + DEPENDS + libc.src.__support.common + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.FPUtil.generic.add_sub + libc.src.__support.macros.config + libc.src.__support.macros.properties.types +) diff --git a/libc/src/math/generic/bf16add.cpp b/libc/src/math/generic/bf16add.cpp new file mode 100644 index 0000000..257596a --- /dev/null +++ b/libc/src/math/generic/bf16add.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16add function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16add.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16add, (double x, double y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addf.cpp b/libc/src/math/generic/bf16addf.cpp new file mode 100644 index 0000000..65e6cbf --- /dev/null +++ b/libc/src/math/generic/bf16addf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addf, (float x, float y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addf128.cpp b/libc/src/math/generic/bf16addf128.cpp new file mode 100644 index 0000000..03f70af --- /dev/null +++ b/libc/src/math/generic/bf16addf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addf128, (float128 x, float128 y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16addl.cpp b/libc/src/math/generic/bf16addl.cpp new file mode 100644 index 0000000..c212195 --- /dev/null +++ b/libc/src/math/generic/bf16addl.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16addl function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16addl.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16addl, (long double x, long double y)) { + return fputil::generic::add<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16sub.cpp b/libc/src/math/generic/bf16sub.cpp new file mode 100644 index 0000000..65eb209 --- /dev/null +++ b/libc/src/math/generic/bf16sub.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16sub function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16sub.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16sub, (double x, double y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subf.cpp b/libc/src/math/generic/bf16subf.cpp new file mode 100644 index 0000000..6bba4be --- /dev/null +++ b/libc/src/math/generic/bf16subf.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subf function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subf.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subf, (float x, float y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subf128.cpp b/libc/src/math/generic/bf16subf128.cpp new file mode 100644 index 0000000..e5fe107 --- /dev/null +++ b/libc/src/math/generic/bf16subf128.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subf128 function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subf128.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subf128, (float128 x, float128 y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/bf16subl.cpp b/libc/src/math/generic/bf16subl.cpp new file mode 100644 index 0000000..d3a970c --- /dev/null +++ b/libc/src/math/generic/bf16subl.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of bf16subl function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/bf16subl.h" +#include "src/__support/FPUtil/bfloat16.h" +#include "src/__support/FPUtil/generic/add_sub.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(bfloat16, bf16subl, (long double x, long double y)) { + return fputil::generic::sub<bfloat16>(x, y); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp index 71b23c4..0bd8f71 100644 --- a/libc/src/math/generic/cbrtf.cpp +++ b/libc/src/math/generic/cbrtf.cpp @@ -7,153 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/cbrtf.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/cbrtf.h" namespace LIBC_NAMESPACE_DECL { -namespace { - -// Look up table for 2^(i/3) for i = 0, 1, 2. -constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}; - -// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1 -// generated by Sollya with: -// > for i from 0 to 15 do { -// P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]); -// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",", -// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", -// coeff(P, 6), "},"); -// }; -// Then (1 + x)^(1/3) ~ 1 + x * P(x). -constexpr double COEFFS[16][7] = { - {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5, - -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6, - 0x1.047f75e0aff14p-6}, - {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5, - -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6, - 0x1.716eca1d6e3bcp-7}, - {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5, - -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6, - 0x1.0aeca34893785p-7}, - {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5, - -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6, - 0x1.88410674c6a5dp-8}, - {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5, - -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7, - 0x1.249bb51a1c498p-8}, - {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5, - -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7, - 0x1.ba9230918fa2ep-9}, - {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5, - -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7, - 0x1.52e3e17c71069p-9}, - {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5, - -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7, - 0x1.0677d9af6aad4p-9}, - {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5, - -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7, - 0x1.9ad325dc7adcbp-10}, - {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5, - -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8, - 0x1.449f5ee175c69p-10}, - {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5, - -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8, - 0x1.02cb36389bd79p-10}, - {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5, - -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8, - 0x1.a008ae91f5936p-11}, - {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5, - -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8, - 0x1.50f8174cdb6e9p-11}, - {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5, - -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9, - 0x1.12e0e94e8586dp-11}, - {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5, - -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9, - 0x1.c3726535f1fc6p-12}, - {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5, - -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9, - 0x1.75077abf18d84p-12}, -}; - -} // anonymous namespace - -LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { - using FloatBits = typename fputil::FPBits<float>; - using DoubleBits = typename fputil::FPBits<double>; - - FloatBits x_bits(x); - - uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff; - uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN; - - if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) { - // x is 0, Inf, or NaN. - // Make sure it works for FTZ/DAZ modes. - return x + x; - } - - double xd = static_cast<double>(x); - DoubleBits xd_bits(xd); - - // When using biased exponent of x in double precision, - // x_e = real_exponent_of_x + 1023 - // Then: - // x_e / 3 = real_exponent_of_x / 3 + 1023/3 - // = real_exponent_of_x / 3 + 341 - // So to make it the correct biased exponent of x^(1/3), we add - // 1023 - 341 = 682 - // to the quotient x_e / 3. - unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent()); - unsigned out_e = (x_e / 3 + 682) | sign_bit; - unsigned shift_e = x_e % 3; - - // Set x_m = 2^(x_e % 3) * (1.mantissa) - uint64_t x_m = xd_bits.get_mantissa(); - // Use the leading 4 bits for look up table - unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4)); - - x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS) - << DoubleBits::FRACTION_LEN; - - double x_reduced = DoubleBits(x_m).get_val(); - double dx = x_reduced - 1.0; - - double dx_sq = dx * dx; - double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0); - double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]); - double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]); - double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]); - - double dx_4 = dx_sq * dx_sq; - double p0 = fputil::multiply_add(dx_sq, c1, c0); - double p1 = fputil::multiply_add(dx_sq, c3, c2); - - double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e]; - - uint64_t r_m = DoubleBits(r).get_mantissa(); - // Check if the output is exact. To be exact, the smallest 1-bit of the - // output has to be at least 2^-7 or higher. So we check the lowest 44 bits - // to see if they are within 2^(-52 + 3) errors from all zeros, then the - // result cube root is exact. - if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) { - if ((r_m & 0xfffffffffff) <= 8) - r_m &= 0xffff'ffff'ffff'ffe0; - else - r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20; - fputil::clear_except_if_required(FE_INEXACT); - } - // Adjust exponent and sign. - uint64_t r_bits = - r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN); - - return static_cast<float>(DoubleBits(r_bits).get_val()); -} +LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { return math::cbrtf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt index e066830..1fb175b 100644 --- a/libc/test/integration/src/__support/GPU/CMakeLists.txt +++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt @@ -27,3 +27,16 @@ add_integration_test( LOADER_ARGS --threads 64 ) + +add_libc_test( + fixedstack_test + SUITE + libc-support-gpu-tests + SRCS + fixedstack_test.cpp + DEPENDS + libc.src.__support.GPU.fixedstack + LOADER_ARGS + --threads 32 + --blocks 16 +) diff --git a/libc/test/integration/src/__support/GPU/fixedstack_test.cpp b/libc/test/integration/src/__support/GPU/fixedstack_test.cpp new file mode 100644 index 0000000..fde51df --- /dev/null +++ b/libc/test/integration/src/__support/GPU/fixedstack_test.cpp @@ -0,0 +1,44 @@ +//===-- Integration test for the lock-free stack --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/GPU/fixedstack.h" +#include "src/__support/GPU/utils.h" +#include "test/IntegrationTest/test.h" + +using namespace LIBC_NAMESPACE; + +static FixedStack<uint32_t, 2048> global_stack; + +void run() { + // We need enough space in the stack as threads in flight can temporarily + // consume memory before they finish comitting it back to the stack. + ASSERT_EQ(gpu::get_num_blocks() * gpu::get_num_threads(), 512); + + uint32_t val; + uint32_t num_threads = static_cast<uint32_t>(gpu::get_num_threads()); + for (int i = 0; i < 256; ++i) { + EXPECT_TRUE(global_stack.push(UINT32_MAX)) + EXPECT_TRUE(global_stack.pop(val)) + ASSERT_TRUE(val < num_threads || val == UINT32_MAX); + } + + EXPECT_TRUE(global_stack.push(static_cast<uint32_t>(gpu::get_thread_id()))); + EXPECT_TRUE(global_stack.push(static_cast<uint32_t>(gpu::get_thread_id()))); + EXPECT_TRUE(global_stack.pop(val)); + ASSERT_TRUE(val < num_threads || val == UINT32_MAX); + + // Fill the rest of the stack with the default value. + while (!global_stack.push(UINT32_MAX)) + ; +} + +TEST_MAIN(int argc, char **argv, char **envp) { + run(); + + return 0; +} diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index f5ea510..9685aea 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -27,6 +27,7 @@ add_fp_unittest( libc.src.__support.math.atanhf libc.src.__support.math.atanhf16 libc.src.__support.math.cbrt + libc.src.__support.math.cbrtf libc.src.__support.math.erff libc.src.__support.math.exp libc.src.__support.math.exp10 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 3d64e5e..5e57c49e 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -49,6 +49,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat) { EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atan2f(0.0f, 0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanf(0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanhf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::cbrtf(0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f)); diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 43cde0d..a74f9fe 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2972,6 +2972,118 @@ add_fp_unittest( libc.src.__support.macros.properties.types ) +add_fp_unittest( + bf16add_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16add_test.cpp + HDRS + AddTest.h + DEPENDS + libc.src.math.bf16add + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16addf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16addf_test.cpp + HDRS + AddTest.h + DEPENDS + libc.src.math.bf16addf + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16addl_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16addl_test.cpp + HDRS + AddTest.h + DEPENDS + libc.src.math.bf16addl + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16addf128_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16addf128_test.cpp + HDRS + AddTest.h + DEPENDS + libc.src.math.bf16addf128 + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16sub_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16sub_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.math.bf16sub + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16subf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16subf_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.math.bf16subf + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16subl_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16subl_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.math.bf16subl + libc.src.__support.FPUtil.bfloat16 +) + +add_fp_unittest( + bf16subf128_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + bf16subf128_test.cpp + HDRS + SubTest.h + DEPENDS + libc.src.math.bf16subf128 + libc.src.__support.FPUtil.bfloat16 +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/bf16add_test.cpp b/libc/test/src/math/bf16add_test.cpp new file mode 100644 index 0000000..9e9c594 --- /dev/null +++ b/libc/test/src/math/bf16add_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16add ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16add.h" + +LIST_ADD_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16add) diff --git a/libc/test/src/math/bf16addf128_test.cpp b/libc/test/src/math/bf16addf128_test.cpp new file mode 100644 index 0000000..46f7ad3 --- /dev/null +++ b/libc/test/src/math/bf16addf128_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addf128 -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addf128.h" + +LIST_ADD_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16addf128) diff --git a/libc/test/src/math/bf16addf_test.cpp b/libc/test/src/math/bf16addf_test.cpp new file mode 100644 index 0000000..06d56cf --- /dev/null +++ b/libc/test/src/math/bf16addf_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addf --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addf.h" + +LIST_ADD_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16addf) diff --git a/libc/test/src/math/bf16addl_test.cpp b/libc/test/src/math/bf16addl_test.cpp new file mode 100644 index 0000000..bf54827 --- /dev/null +++ b/libc/test/src/math/bf16addl_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addl --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addl.h" + +LIST_ADD_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16addl) diff --git a/libc/test/src/math/bf16sub_test.cpp b/libc/test/src/math/bf16sub_test.cpp new file mode 100644 index 0000000..4a793dc --- /dev/null +++ b/libc/test/src/math/bf16sub_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16sub ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16sub.h" + +LIST_SUB_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16sub) diff --git a/libc/test/src/math/bf16subf128_test.cpp b/libc/test/src/math/bf16subf128_test.cpp new file mode 100644 index 0000000..25d6711 --- /dev/null +++ b/libc/test/src/math/bf16subf128_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subf128 -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subf128.h" + +LIST_SUB_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16subf128) diff --git a/libc/test/src/math/bf16subf_test.cpp b/libc/test/src/math/bf16subf_test.cpp new file mode 100644 index 0000000..e8c7440 --- /dev/null +++ b/libc/test/src/math/bf16subf_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subf --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subf.h" + +LIST_SUB_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16subf) diff --git a/libc/test/src/math/bf16subl_test.cpp b/libc/test/src/math/bf16subl_test.cpp new file mode 100644 index 0000000..2997369 --- /dev/null +++ b/libc/test/src/math/bf16subl_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subl --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subl.h" + +LIST_SUB_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16subl) diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 5f497c6..dc1850a 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -5465,3 +5465,131 @@ add_fp_unittest( libc.src.__support.macros.properties.os libc.src.__support.macros.properties.types ) + +add_fp_unittest( + bf16add_test + SUITE + libc-math-smoke-tests + SRCS + bf16add_test.cpp + HDRS + AddTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16add + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16addf_test + SUITE + libc-math-smoke-tests + SRCS + bf16addf_test.cpp + HDRS + AddTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16addf + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16addl_test + SUITE + libc-math-smoke-tests + SRCS + bf16addl_test.cpp + HDRS + AddTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16addl + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16addf128_test + SUITE + libc-math-smoke-tests + SRCS + bf16addf128_test.cpp + HDRS + AddTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16addf128 + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16sub_test + SUITE + libc-math-smoke-tests + SRCS + bf16sub_test.cpp + HDRS + SubTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16sub + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16subf_test + SUITE + libc-math-smoke-tests + SRCS + bf16subf_test.cpp + HDRS + SubTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16subf + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16subl_test + SUITE + libc-math-smoke-tests + SRCS + bf16subl_test.cpp + HDRS + SubTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16subl + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) + +add_fp_unittest( + bf16subf128_test + SUITE + libc-math-smoke-tests + SRCS + bf16subf128_test.cpp + HDRS + SubTest.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.math.bf16subf128 + libc.src.__support.FPUtil.bfloat16 + libc.src.__support.macros.properties.os +) diff --git a/libc/test/src/math/smoke/bf16add_test.cpp b/libc/test/src/math/smoke/bf16add_test.cpp new file mode 100644 index 0000000..9e9c594 --- /dev/null +++ b/libc/test/src/math/smoke/bf16add_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16add ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16add.h" + +LIST_ADD_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16add) diff --git a/libc/test/src/math/smoke/bf16addf128_test.cpp b/libc/test/src/math/smoke/bf16addf128_test.cpp new file mode 100644 index 0000000..46f7ad3 --- /dev/null +++ b/libc/test/src/math/smoke/bf16addf128_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addf128 -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addf128.h" + +LIST_ADD_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16addf128) diff --git a/libc/test/src/math/smoke/bf16addf_test.cpp b/libc/test/src/math/smoke/bf16addf_test.cpp new file mode 100644 index 0000000..06d56cf --- /dev/null +++ b/libc/test/src/math/smoke/bf16addf_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addf --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addf.h" + +LIST_ADD_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16addf) diff --git a/libc/test/src/math/smoke/bf16addl_test.cpp b/libc/test/src/math/smoke/bf16addl_test.cpp new file mode 100644 index 0000000..bf54827 --- /dev/null +++ b/libc/test/src/math/smoke/bf16addl_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16addl --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16addl.h" + +LIST_ADD_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16addl) diff --git a/libc/test/src/math/smoke/bf16sub_test.cpp b/libc/test/src/math/smoke/bf16sub_test.cpp new file mode 100644 index 0000000..4a793dc --- /dev/null +++ b/libc/test/src/math/smoke/bf16sub_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16sub ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16sub.h" + +LIST_SUB_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16sub) diff --git a/libc/test/src/math/smoke/bf16subf128_test.cpp b/libc/test/src/math/smoke/bf16subf128_test.cpp new file mode 100644 index 0000000..25d6711 --- /dev/null +++ b/libc/test/src/math/smoke/bf16subf128_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subf128 -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subf128.h" + +LIST_SUB_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16subf128) diff --git a/libc/test/src/math/smoke/bf16subf_test.cpp b/libc/test/src/math/smoke/bf16subf_test.cpp new file mode 100644 index 0000000..e8c7440 --- /dev/null +++ b/libc/test/src/math/smoke/bf16subf_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subf --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subf.h" + +LIST_SUB_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16subf) diff --git a/libc/test/src/math/smoke/bf16subl_test.cpp b/libc/test/src/math/smoke/bf16subl_test.cpp new file mode 100644 index 0000000..2997369 --- /dev/null +++ b/libc/test/src/math/smoke/bf16subl_test.cpp @@ -0,0 +1,14 @@ +//===-- Unittests for bf16subl --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubTest.h" + +#include "src/__support/FPUtil/bfloat16.h" +#include "src/math/bf16subl.h" + +LIST_SUB_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16subl) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index ae12a83..57e818c 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -411,6 +411,21 @@ template void explain_binary_operation_one_output_error( #endif template void explain_binary_operation_one_output_error( Operation, const BinaryInput<bfloat16> &, bfloat16, double, RoundingMode); +template void +explain_binary_operation_one_output_error(Operation, const BinaryInput<float> &, + bfloat16, double, RoundingMode); +template void explain_binary_operation_one_output_error( + Operation, const BinaryInput<double> &, bfloat16, double, RoundingMode); +template void +explain_binary_operation_one_output_error(Operation, + const BinaryInput<long double> &, + bfloat16, double, RoundingMode); +#if defined(LIBC_TYPES_HAS_FLOAT128) && \ + defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE) +template void explain_binary_operation_one_output_error( + Operation, const BinaryInput<float128> &, bfloat16, double, RoundingMode); +#endif // defined(LIBC_TYPES_HAS_FLOAT128) && + // defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE) template <typename InputType, typename OutputType> void explain_ternary_operation_one_output_error( @@ -648,6 +663,26 @@ template bool compare_binary_operation_one_output(Operation, const BinaryInput<bfloat16> &, bfloat16, double, RoundingMode); + +template bool compare_binary_operation_one_output(Operation, + const BinaryInput<float> &, + bfloat16, double, + RoundingMode); +template bool compare_binary_operation_one_output(Operation, + const BinaryInput<double> &, + bfloat16, double, + RoundingMode); +template bool +compare_binary_operation_one_output(Operation, const BinaryInput<long double> &, + bfloat16, double, RoundingMode); +#if defined(LIBC_TYPES_HAS_FLOAT128) && \ + defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE) +template bool compare_binary_operation_one_output(Operation, + const BinaryInput<float128> &, + bfloat16, double, + RoundingMode); +#endif // defined(LIBC_TYPES_HAS_FLOAT128) && + // defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE) template <typename InputType, typename OutputType> bool compare_ternary_operation_one_output(Operation op, const TernaryInput<InputType> &input, diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py index b054ed4..2118db6 100644 --- a/libc/utils/hdrgen/hdrgen/header.py +++ b/libc/utils/hdrgen/hdrgen/header.py @@ -204,7 +204,7 @@ class HeaderFile: current_guard = None for function in self.functions: - if function.guard == None: + if function.guard == None and current_guard == None: content.append(str(function) + " __NOEXCEPT;") content.append("") else: @@ -221,7 +221,8 @@ class HeaderFile: content.append(f"#endif // {current_guard}") content.append("") current_guard = function.guard - content.append(f"#ifdef {current_guard}") + if current_guard is not None: + content.append(f"#ifdef {current_guard}") content.append(str(function) + " __NOEXCEPT;") content.append("") if current_guard != None: |