aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/config/baremetal/aarch64/entrypoints.txt14
-rw-r--r--libc/config/baremetal/arm/entrypoints.txt14
-rw-r--r--libc/config/baremetal/riscv/entrypoints.txt14
-rw-r--r--libc/config/darwin/aarch64/entrypoints.txt14
-rw-r--r--libc/config/darwin/x86_64/entrypoints.txt6
-rw-r--r--libc/config/gpu/amdgpu/entrypoints.txt6
-rw-r--r--libc/config/gpu/nvptx/entrypoints.txt6
-rw-r--r--libc/config/linux/aarch64/entrypoints.txt14
-rw-r--r--libc/config/linux/arm/entrypoints.txt6
-rw-r--r--libc/config/linux/riscv/entrypoints.txt14
-rw-r--r--libc/config/linux/x86_64/entrypoints.txt14
-rw-r--r--libc/config/windows/entrypoints.txt6
-rw-r--r--libc/shared/math.h1
-rw-r--r--libc/shared/math/cbrtf.h23
-rw-r--r--libc/src/__support/GPU/CMakeLists.txt7
-rw-r--r--libc/src/__support/GPU/allocator.cpp38
-rw-r--r--libc/src/__support/GPU/fixedstack.h111
-rw-r--r--libc/src/__support/math/CMakeLists.txt11
-rw-r--r--libc/src/__support/math/cbrtf.h161
-rw-r--r--libc/src/math/CMakeLists.txt10
-rw-r--r--libc/src/math/bf16add.h21
-rw-r--r--libc/src/math/bf16addf.h21
-rw-r--r--libc/src/math/bf16addf128.h21
-rw-r--r--libc/src/math/bf16addl.h21
-rw-r--r--libc/src/math/bf16sub.h21
-rw-r--r--libc/src/math/bf16subf.h21
-rw-r--r--libc/src/math/bf16subf128.h21
-rw-r--r--libc/src/math/bf16subl.h21
-rw-r--r--libc/src/math/generic/CMakeLists.txt119
-rw-r--r--libc/src/math/generic/bf16add.cpp21
-rw-r--r--libc/src/math/generic/bf16addf.cpp21
-rw-r--r--libc/src/math/generic/bf16addf128.cpp21
-rw-r--r--libc/src/math/generic/bf16addl.cpp21
-rw-r--r--libc/src/math/generic/bf16sub.cpp21
-rw-r--r--libc/src/math/generic/bf16subf.cpp21
-rw-r--r--libc/src/math/generic/bf16subf128.cpp21
-rw-r--r--libc/src/math/generic/bf16subl.cpp21
-rw-r--r--libc/src/math/generic/cbrtf.cpp147
-rw-r--r--libc/test/integration/src/__support/GPU/CMakeLists.txt13
-rw-r--r--libc/test/integration/src/__support/GPU/fixedstack_test.cpp44
-rw-r--r--libc/test/shared/CMakeLists.txt1
-rw-r--r--libc/test/shared/shared_math_test.cpp1
-rw-r--r--libc/test/src/math/CMakeLists.txt112
-rw-r--r--libc/test/src/math/bf16add_test.cpp14
-rw-r--r--libc/test/src/math/bf16addf128_test.cpp14
-rw-r--r--libc/test/src/math/bf16addf_test.cpp14
-rw-r--r--libc/test/src/math/bf16addl_test.cpp14
-rw-r--r--libc/test/src/math/bf16sub_test.cpp14
-rw-r--r--libc/test/src/math/bf16subf128_test.cpp14
-rw-r--r--libc/test/src/math/bf16subf_test.cpp14
-rw-r--r--libc/test/src/math/bf16subl_test.cpp14
-rw-r--r--libc/test/src/math/smoke/CMakeLists.txt128
-rw-r--r--libc/test/src/math/smoke/bf16add_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16addf128_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16addf_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16addl_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16sub_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16subf128_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16subf_test.cpp14
-rw-r--r--libc/test/src/math/smoke/bf16subl_test.cpp14
-rw-r--r--libc/utils/MPFRWrapper/MPFRUtils.cpp35
-rw-r--r--libc/utils/hdrgen/hdrgen/header.py5
62 files changed, 1501 insertions, 154 deletions
diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index 683c746..af9f687 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -757,6 +757,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -765,6 +771,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index f8ecc2e..ce8d7c0 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -760,6 +760,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -768,6 +774,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index 679bfb3..7eeec24 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -760,6 +760,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -768,6 +774,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/darwin/aarch64/entrypoints.txt b/libc/config/darwin/aarch64/entrypoints.txt
index 72b0265..de4b4df 100644
--- a/libc/config/darwin/aarch64/entrypoints.txt
+++ b/libc/config/darwin/aarch64/entrypoints.txt
@@ -590,6 +590,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -598,6 +604,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt
index b5ab1ee..f668e8a 100644
--- a/libc/config/darwin/x86_64/entrypoints.txt
+++ b/libc/config/darwin/x86_64/entrypoints.txt
@@ -233,6 +233,12 @@ set(TARGET_LIBM_ENTRYPOINTS
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt
index 77a13a6..f7b8b2f 100644
--- a/libc/config/gpu/amdgpu/entrypoints.txt
+++ b/libc/config/gpu/amdgpu/entrypoints.txt
@@ -616,6 +616,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt
index 61c9c71..23afb40 100644
--- a/libc/config/gpu/nvptx/entrypoints.txt
+++ b/libc/config/gpu/nvptx/entrypoints.txt
@@ -617,6 +617,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index fbdf8fb..62a3ae9 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -844,6 +844,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -852,6 +858,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LLVM_LIBC_FULL_BUILD)
list(APPEND TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index e3f5fee..eedf184 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -460,6 +460,12 @@ set(TARGET_LIBM_ENTRYPOINTS
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index ba67ddd..cd56979 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -863,6 +863,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -871,6 +877,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 066dc21..2b0cf33 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -895,6 +895,12 @@ endif()
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
@@ -903,6 +909,14 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncbf16
)
+if(LIBC_TYPES_HAS_FLOAT128)
+ list(APPEND TARGET_LIBM_ENTRYPOINTS
+ # math.h C++23 mixed bfloat16 and _Float128 entrypoints
+ libc.src.math.bf16addf128
+ libc.src.math.bf16subf128
+ )
+endif()
+
if(LIBC_COMPILER_HAS_FIXED_POINT)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 994078c..1b1db5e 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -306,6 +306,12 @@ set(TARGET_LIBM_ENTRYPOINTS
list(APPEND TARGET_LIBM_ENTRYPOINTS
# bfloat16 entrypoints
+ libc.src.math.bf16add
+ libc.src.math.bf16addf
+ libc.src.math.bf16addl
+ libc.src.math.bf16sub
+ libc.src.math.bf16subf
+ libc.src.math.bf16subl
libc.src.math.ceilbf16
libc.src.math.fabsbf16
libc.src.math.floorbf16
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 3714f38..ea645f0 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -31,6 +31,7 @@
#include "math/atanhf.h"
#include "math/atanhf16.h"
#include "math/cbrt.h"
+#include "math/cbrtf.h"
#include "math/erff.h"
#include "math/exp.h"
#include "math/exp10.h"
diff --git a/libc/shared/math/cbrtf.h b/libc/shared/math/cbrtf.h
new file mode 100644
index 0000000..09b86be
--- /dev/null
+++ b/libc/shared/math/cbrtf.h
@@ -0,0 +1,23 @@
+//===-- Shared cbrtf function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SHARED_MATH_CBRTF_H
+#define LIBC_SHARED_MATH_CBRTF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/cbrtf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::cbrtf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_SHARED_MATH_CBRTF_H
diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt
index f8fdfeb..72a7879 100644
--- a/libc/src/__support/GPU/CMakeLists.txt
+++ b/libc/src/__support/GPU/CMakeLists.txt
@@ -9,6 +9,12 @@ add_header_library(
utils.h
)
+add_header_library(
+ fixedstack
+ HDRS
+ fixedstack.h
+)
+
add_object_library(
allocator
SRCS
@@ -23,4 +29,5 @@ add_object_library(
libc.src.__support.CPP.bit
libc.src.__support.CPP.new
.utils
+ .fixedstack
)
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 250bebd..534a309 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -20,6 +20,7 @@
#include "src/__support/CPP/atomic.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/new.h"
+#include "src/__support/GPU/fixedstack.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/threads/sleep.h"
@@ -39,6 +40,9 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
// The number of times to attempt claiming an in-progress slab allocation.
constexpr static uint32_t MAX_TRIES = 1024;
+// The number of previously allocated slabs we will keep in memory.
+constexpr static uint32_t CACHED_SLABS = 8;
+
static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
namespace impl {
@@ -185,20 +189,35 @@ struct Slab {
struct alignas(MIN_SIZE) Header {
uint32_t chunk_size;
uint32_t global_index;
+ uint32_t cached_chunk_size;
};
// Initialize the slab with its chunk size and index in the global table for
// use when freeing.
Slab(uint32_t chunk_size, uint32_t global_index) {
Header *header = reinterpret_cast<Header *>(memory);
+ header->cached_chunk_size = cpp::numeric_limits<uint32_t>::max();
header->chunk_size = chunk_size;
header->global_index = global_index;
}
+ // Reset the memory with a new index and chunk size, not thread safe.
+ Slab *reset(uint32_t chunk_size, uint32_t global_index) {
+ Header *header = reinterpret_cast<Header *>(memory);
+ header->cached_chunk_size = header->chunk_size;
+ header->chunk_size = chunk_size;
+ header->global_index = global_index;
+ return this;
+ }
+
// Set the necessary bitfield bytes to zero in parallel using many lanes. This
// must be called before the bitfield can be accessed safely, memory is not
// guaranteed to be zero initialized in the current implementation.
void initialize(uint64_t uniform) {
+ // If this is a re-used slab the memory is already set to zero.
+ if (get_cached_chunk_size() <= get_chunk_size())
+ return;
+
uint32_t size = (bitfield_bytes(get_chunk_size()) + sizeof(uint32_t) - 1) /
sizeof(uint32_t);
impl::uniform_memset(get_bitfield(), 0, size, uniform);
@@ -236,6 +255,11 @@ struct Slab {
return reinterpret_cast<const Header *>(memory)->chunk_size;
}
+ // Get the chunk size that was previously used.
+ uint32_t get_cached_chunk_size() const {
+ return reinterpret_cast<const Header *>(memory)->cached_chunk_size;
+ }
+
// Get the location in the memory where we will store the global index.
uint32_t get_global_index() const {
return reinterpret_cast<const Header *>(memory)->global_index;
@@ -337,6 +361,9 @@ struct Slab {
uint8_t memory[SLAB_SIZE];
};
+// A global cache of previously allocated slabs for efficient reuse.
+static FixedStack<Slab *, CACHED_SLABS> slab_cache;
+
/// A wait-free guard around a pointer resource to be created dynamically if
/// space is available and freed once there are no more users.
struct GuardPtr {
@@ -408,6 +435,11 @@ private:
reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()),
cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
count = cpp::numeric_limits<uint32_t>::max();
+
+ Slab *cached = nullptr;
+ if (slab_cache.pop(cached))
+ return cached->reset(cpp::forward<Args>(args)...);
+
void *raw = impl::rpc_allocate(sizeof(Slab));
if (!raw)
return nullptr;
@@ -475,8 +507,10 @@ public:
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(mask)) &&
ref.release(cpp::popcount(mask))) {
Slab *p = ptr.load(cpp::MemoryOrder::RELAXED);
- p->~Slab();
- impl::rpc_free(p);
+ if (!slab_cache.push(p)) {
+ p->~Slab();
+ impl::rpc_free(p);
+ }
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
ptr.store(nullptr, cpp::MemoryOrder::RELAXED);
}
diff --git a/libc/src/__support/GPU/fixedstack.h b/libc/src/__support/GPU/fixedstack.h
new file mode 100644
index 0000000..6ceaa2f
--- /dev/null
+++ b/libc/src/__support/GPU/fixedstack.h
@@ -0,0 +1,111 @@
+//===-- A lock-free data structure for a fixed capacity stack ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H
+#define LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H
+
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/threads/sleep.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// A lock-free fixed size stack backed by an underlying array of data. It
+// supports push and pop operations in a completely lock-free manner.
+template <typename T, uint32_t CAPACITY> struct alignas(16) FixedStack {
+ // The index is stored as a 20-bit value and cannot index into any more.
+ static_assert(CAPACITY < 1024 * 1024, "Invalid buffer size");
+
+ // The head of the free and used stacks. Represents as a 20-bit index combined
+ // with a 44-bit ABA tag that is updated in a single atomic operation.
+ uint64_t free;
+ uint64_t used;
+
+ // The stack is a linked list of indices into the underlying data
+ uint32_t next[CAPACITY];
+ T data[CAPACITY];
+
+ // Get the 20-bit index into the underlying array from the head.
+ LIBC_INLINE static constexpr uint32_t get_node(uint64_t head) {
+ return static_cast<uint32_t>(head & 0xfffff);
+ }
+
+ // Increment the old ABA tag and merge it into the new index.
+ LIBC_INLINE static constexpr uint64_t make_head(uint64_t orig,
+ uint32_t node) {
+ return static_cast<uint64_t>(node) | (((orig >> 20ul) + 1ul) << 20ul);
+ }
+
+ // Attempts to pop data from the given stack by making it point to the next
+ // node. We repeatedly attempt to write to the head using compare-and-swap,
+ // expecting that it has not been changed by any other thread.
+ LIBC_INLINE uint32_t pop_impl(cpp::AtomicRef<uint64_t> head) {
+ uint64_t orig = head.load(cpp::MemoryOrder::RELAXED);
+
+ for (;;) {
+ if (get_node(orig) == CAPACITY)
+ return CAPACITY;
+
+ uint32_t node =
+ cpp::AtomicRef(next[get_node(orig)]).load(cpp::MemoryOrder::RELAXED);
+ if (head.compare_exchange_strong(orig, make_head(orig, node),
+ cpp::MemoryOrder::ACQUIRE,
+ cpp::MemoryOrder::RELAXED))
+ break;
+ }
+ return get_node(orig);
+ }
+
+ // Attempts to push data to the given stack by making it point to the new
+ // node. We repeatedly attempt to write to the head using compare-and-swap,
+ // expecting that it has not been changed by any other thread.
+ LIBC_INLINE uint32_t push_impl(cpp::AtomicRef<uint64_t> head, uint32_t node) {
+ uint64_t orig = head.load(cpp::MemoryOrder::RELAXED);
+ for (;;) {
+ next[node] = get_node(orig);
+ if (head.compare_exchange_strong(orig, make_head(orig, node),
+ cpp::MemoryOrder::RELEASE,
+ cpp::MemoryOrder::RELAXED))
+ break;
+ }
+ return get_node(head.load(cpp::MemoryOrder::RELAXED));
+ }
+
+public:
+ // Initialize the free stack to be full and the used stack to be empty. We use
+ // the capacity of the stack as a sentinel value.
+ LIBC_INLINE constexpr FixedStack() : free(0), used(CAPACITY), data{} {
+ for (uint32_t i = 0; i < CAPACITY; ++i)
+ next[i] = i + 1;
+ }
+
+ LIBC_INLINE bool push(const T &val) {
+ uint32_t node = pop_impl(cpp::AtomicRef(free));
+ if (node == CAPACITY)
+ return false;
+
+ data[node] = val;
+ push_impl(cpp::AtomicRef(used), node);
+ return true;
+ }
+
+ LIBC_INLINE bool pop(T &val) {
+ uint32_t node = pop_impl(cpp::AtomicRef(used));
+ if (node == CAPACITY)
+ return false;
+
+ val = data[node];
+ push_impl(cpp::AtomicRef(free), node);
+ return true;
+ }
+};
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_GPU_FIXEDSTACK_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index e1076ed..fe928a8 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -347,6 +347,17 @@ add_header_library(
)
add_header_library(
+ cbrtf
+ HDRS
+ cbrtf.h
+ DEPENDS
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
erff
HDRS
erff.h
diff --git a/libc/src/__support/math/cbrtf.h b/libc/src/__support/math/cbrtf.h
new file mode 100644
index 0000000..f82892b
--- /dev/null
+++ b/libc/src/__support/math/cbrtf.h
@@ -0,0 +1,161 @@
+//===-- Implementation header for cbrtf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC___SUPPORT_MATH_CBRTF_H
+#define LIBC_SRC___SUPPORT_MATH_CBRTF_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float cbrtf(float x) {
+ // Look up table for 2^(i/3) for i = 0, 1, 2.
+ constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0};
+
+ // Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
+ // generated by Sollya with:
+ // > for i from 0 to 15 do {
+ // P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]);
+ // print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
+ // coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
+ // coeff(P, 6), "},");
+ // };
+ // Then (1 + x)^(1/3) ~ 1 + x * P(x).
+ constexpr double COEFFS[16][7] = {
+ {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5,
+ -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6,
+ 0x1.047f75e0aff14p-6},
+ {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5,
+ -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6,
+ 0x1.716eca1d6e3bcp-7},
+ {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5,
+ -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6,
+ 0x1.0aeca34893785p-7},
+ {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5,
+ -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6,
+ 0x1.88410674c6a5dp-8},
+ {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5,
+ -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7,
+ 0x1.249bb51a1c498p-8},
+ {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5,
+ -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7,
+ 0x1.ba9230918fa2ep-9},
+ {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5,
+ -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7,
+ 0x1.52e3e17c71069p-9},
+ {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5,
+ -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7,
+ 0x1.0677d9af6aad4p-9},
+ {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5,
+ -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7,
+ 0x1.9ad325dc7adcbp-10},
+ {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5,
+ -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8,
+ 0x1.449f5ee175c69p-10},
+ {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5,
+ -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8,
+ 0x1.02cb36389bd79p-10},
+ {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5,
+ -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8,
+ 0x1.a008ae91f5936p-11},
+ {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5,
+ -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8,
+ 0x1.50f8174cdb6e9p-11},
+ {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5,
+ -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9,
+ 0x1.12e0e94e8586dp-11},
+ {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5,
+ -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9,
+ 0x1.c3726535f1fc6p-12},
+ {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5,
+ -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9,
+ 0x1.75077abf18d84p-12},
+ };
+
+ using FloatBits = typename fputil::FPBits<float>;
+ using DoubleBits = typename fputil::FPBits<double>;
+
+ FloatBits x_bits(x);
+
+ uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
+ uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;
+
+ if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) {
+ // x is 0, Inf, or NaN.
+ // Make sure it works for FTZ/DAZ modes.
+ return x + x;
+ }
+
+ double xd = static_cast<double>(x);
+ DoubleBits xd_bits(xd);
+
+ // When using biased exponent of x in double precision,
+ // x_e = real_exponent_of_x + 1023
+ // Then:
+ // x_e / 3 = real_exponent_of_x / 3 + 1023/3
+ // = real_exponent_of_x / 3 + 341
+ // So to make it the correct biased exponent of x^(1/3), we add
+ // 1023 - 341 = 682
+ // to the quotient x_e / 3.
+ unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
+ unsigned out_e = (x_e / 3 + 682) | sign_bit;
+ unsigned shift_e = x_e % 3;
+
+ // Set x_m = 2^(x_e % 3) * (1.mantissa)
+ uint64_t x_m = xd_bits.get_mantissa();
+ // Use the leading 4 bits for look up table
+ unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4));
+
+ x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
+ << DoubleBits::FRACTION_LEN;
+
+ double x_reduced = DoubleBits(x_m).get_val();
+ double dx = x_reduced - 1.0;
+
+ double dx_sq = dx * dx;
+ double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0);
+ double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]);
+ double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]);
+ double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]);
+
+ double dx_4 = dx_sq * dx_sq;
+ double p0 = fputil::multiply_add(dx_sq, c1, c0);
+ double p1 = fputil::multiply_add(dx_sq, c3, c2);
+
+ double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e];
+
+ uint64_t r_m = DoubleBits(r).get_mantissa();
+ // Check if the output is exact. To be exact, the smallest 1-bit of the
+ // output has to be at least 2^-7 or higher. So we check the lowest 44 bits
+ // to see if they are within 2^(-52 + 3) errors from all zeros, then the
+ // result cube root is exact.
+ if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) {
+ if ((r_m & 0xfffffffffff) <= 8)
+ r_m &= 0xffff'ffff'ffff'ffe0;
+ else
+ r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20;
+ fputil::clear_except_if_required(FE_INEXACT);
+ }
+ // Adjust exponent and sign.
+ uint64_t r_bits =
+ r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);
+
+ return static_cast<float>(DoubleBits(r_bits).get_val());
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_SRC___SUPPORT_MATH_CBRTF_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index c3840d3..660c3681 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -563,3 +563,13 @@ add_math_entrypoint_object(ufromfpxf)
add_math_entrypoint_object(ufromfpxl)
add_math_entrypoint_object(ufromfpxf16)
add_math_entrypoint_object(ufromfpxf128)
+
+add_math_entrypoint_object(bf16add)
+add_math_entrypoint_object(bf16addf)
+add_math_entrypoint_object(bf16addl)
+add_math_entrypoint_object(bf16addf128)
+
+add_math_entrypoint_object(bf16sub)
+add_math_entrypoint_object(bf16subf)
+add_math_entrypoint_object(bf16subl)
+add_math_entrypoint_object(bf16subf128)
diff --git a/libc/src/math/bf16add.h b/libc/src/math/bf16add.h
new file mode 100644
index 0000000..a29970e
--- /dev/null
+++ b/libc/src/math/bf16add.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16add -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16ADD_H
+#define LLVM_LIBC_SRC_MATH_BF16ADD_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16add(double x, double y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16ADD_H
diff --git a/libc/src/math/bf16addf.h b/libc/src/math/bf16addf.h
new file mode 100644
index 0000000..80a5e2a
--- /dev/null
+++ b/libc/src/math/bf16addf.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16addf ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF_H
+#define LLVM_LIBC_SRC_MATH_BF16ADDF_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16addf(float x, float y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16ADDF_H
diff --git a/libc/src/math/bf16addf128.h b/libc/src/math/bf16addf128.h
new file mode 100644
index 0000000..3c2f3a1
--- /dev/null
+++ b/libc/src/math/bf16addf128.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16addf128 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16ADDF128_H
+#define LLVM_LIBC_SRC_MATH_BF16ADDF128_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16addf128(float128 x, float128 y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16ADDF128_H
diff --git a/libc/src/math/bf16addl.h b/libc/src/math/bf16addl.h
new file mode 100644
index 0000000..a9e7d68
--- /dev/null
+++ b/libc/src/math/bf16addl.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16addl ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16ADDL_H
+#define LLVM_LIBC_SRC_MATH_BF16ADDL_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16addl(long double x, long double y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16ADDL_H
diff --git a/libc/src/math/bf16sub.h b/libc/src/math/bf16sub.h
new file mode 100644
index 0000000..8108e914
--- /dev/null
+++ b/libc/src/math/bf16sub.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16sub -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16SUB_H
+#define LLVM_LIBC_SRC_MATH_BF16SUB_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16sub(double x, double y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16SUB_H
diff --git a/libc/src/math/bf16subf.h b/libc/src/math/bf16subf.h
new file mode 100644
index 0000000..1bd79bf
--- /dev/null
+++ b/libc/src/math/bf16subf.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16subf ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF_H
+#define LLVM_LIBC_SRC_MATH_BF16SUBF_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16subf(float x, float y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16SUBF_H
diff --git a/libc/src/math/bf16subf128.h b/libc/src/math/bf16subf128.h
new file mode 100644
index 0000000..19590e8
--- /dev/null
+++ b/libc/src/math/bf16subf128.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16subf128 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16SUBF128_H
+#define LLVM_LIBC_SRC_MATH_BF16SUBF128_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16subf128(float128 x, float128 y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16SUBF128_H
diff --git a/libc/src/math/bf16subl.h b/libc/src/math/bf16subl.h
new file mode 100644
index 0000000..13b2093
--- /dev/null
+++ b/libc/src/math/bf16subl.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for bf16subl ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_BF16SUBL_H
+#define LLVM_LIBC_SRC_MATH_BF16SUBL_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 bf16subl(long double x, long double y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_BF16SUBL_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 0bec7dd..5aeacc8 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4819,11 +4819,7 @@ add_entrypoint_object(
HDRS
../cbrtf.h
DEPENDS
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.macros.optimization
+ libc.src.__support.math.cbrtf
)
add_entrypoint_object(
@@ -4915,3 +4911,116 @@ add_header_library(
libc.src.__support.math.expf16_utils
libc.src.__support.math.exp10_float16_constants
)
+
+add_entrypoint_object(
+ bf16add
+ SRCS
+ bf16add.cpp
+ HDRS
+ ../bf16add.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16addf
+ SRCS
+ bf16addf.cpp
+ HDRS
+ ../bf16addf.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16addl
+ SRCS
+ bf16addl.cpp
+ HDRS
+ ../bf16addl.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16addf128
+ SRCS
+ bf16addf128.cpp
+ HDRS
+ ../bf16addf128.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+
+add_entrypoint_object(
+ bf16sub
+ SRCS
+ bf16sub.cpp
+ HDRS
+ ../bf16sub.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16subf
+ SRCS
+ bf16subf.cpp
+ HDRS
+ ../bf16subf.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16subl
+ SRCS
+ bf16subl.cpp
+ HDRS
+ ../bf16subl.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
+ bf16subf128
+ SRCS
+ bf16subf128.cpp
+ HDRS
+ ../bf16subf128.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.add_sub
+ libc.src.__support.macros.config
+ libc.src.__support.macros.properties.types
+)
diff --git a/libc/src/math/generic/bf16add.cpp b/libc/src/math/generic/bf16add.cpp
new file mode 100644
index 0000000..257596a
--- /dev/null
+++ b/libc/src/math/generic/bf16add.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16add function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16add.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16add, (double x, double y)) {
+ return fputil::generic::add<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16addf.cpp b/libc/src/math/generic/bf16addf.cpp
new file mode 100644
index 0000000..65e6cbf
--- /dev/null
+++ b/libc/src/math/generic/bf16addf.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16addf function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16addf.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16addf, (float x, float y)) {
+ return fputil::generic::add<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16addf128.cpp b/libc/src/math/generic/bf16addf128.cpp
new file mode 100644
index 0000000..03f70af
--- /dev/null
+++ b/libc/src/math/generic/bf16addf128.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16addf128 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16addf128.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16addf128, (float128 x, float128 y)) {
+ return fputil::generic::add<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16addl.cpp b/libc/src/math/generic/bf16addl.cpp
new file mode 100644
index 0000000..c212195
--- /dev/null
+++ b/libc/src/math/generic/bf16addl.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16addl function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16addl.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16addl, (long double x, long double y)) {
+ return fputil::generic::add<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16sub.cpp b/libc/src/math/generic/bf16sub.cpp
new file mode 100644
index 0000000..65eb209
--- /dev/null
+++ b/libc/src/math/generic/bf16sub.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16sub function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16sub.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16sub, (double x, double y)) {
+ return fputil::generic::sub<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16subf.cpp b/libc/src/math/generic/bf16subf.cpp
new file mode 100644
index 0000000..6bba4be
--- /dev/null
+++ b/libc/src/math/generic/bf16subf.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16subf function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16subf.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16subf, (float x, float y)) {
+ return fputil::generic::sub<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16subf128.cpp b/libc/src/math/generic/bf16subf128.cpp
new file mode 100644
index 0000000..e5fe107
--- /dev/null
+++ b/libc/src/math/generic/bf16subf128.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16subf128 function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16subf128.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16subf128, (float128 x, float128 y)) {
+ return fputil::generic::sub<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/bf16subl.cpp b/libc/src/math/generic/bf16subl.cpp
new file mode 100644
index 0000000..d3a970c
--- /dev/null
+++ b/libc/src/math/generic/bf16subl.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of bf16subl function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/bf16subl.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, bf16subl, (long double x, long double y)) {
+ return fputil::generic::sub<bfloat16>(x, y);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp
index 71b23c4..0bd8f71 100644
--- a/libc/src/math/generic/cbrtf.cpp
+++ b/libc/src/math/generic/cbrtf.cpp
@@ -7,153 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/cbrtf.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/cbrtf.h"
namespace LIBC_NAMESPACE_DECL {
-namespace {
-
-// Look up table for 2^(i/3) for i = 0, 1, 2.
-constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0};
-
-// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
-// generated by Sollya with:
-// > for i from 0 to 15 do {
-// P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]);
-// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
-// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
-// coeff(P, 6), "},");
-// };
-// Then (1 + x)^(1/3) ~ 1 + x * P(x).
-constexpr double COEFFS[16][7] = {
- {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5,
- -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6,
- 0x1.047f75e0aff14p-6},
- {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5,
- -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6,
- 0x1.716eca1d6e3bcp-7},
- {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5,
- -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6,
- 0x1.0aeca34893785p-7},
- {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5,
- -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6,
- 0x1.88410674c6a5dp-8},
- {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5,
- -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7,
- 0x1.249bb51a1c498p-8},
- {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5,
- -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7,
- 0x1.ba9230918fa2ep-9},
- {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5,
- -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7,
- 0x1.52e3e17c71069p-9},
- {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5,
- -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7,
- 0x1.0677d9af6aad4p-9},
- {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5,
- -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7,
- 0x1.9ad325dc7adcbp-10},
- {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5,
- -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8,
- 0x1.449f5ee175c69p-10},
- {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5,
- -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8,
- 0x1.02cb36389bd79p-10},
- {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5,
- -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8,
- 0x1.a008ae91f5936p-11},
- {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5,
- -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8,
- 0x1.50f8174cdb6e9p-11},
- {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5,
- -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9,
- 0x1.12e0e94e8586dp-11},
- {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5,
- -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9,
- 0x1.c3726535f1fc6p-12},
- {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5,
- -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9,
- 0x1.75077abf18d84p-12},
-};
-
-} // anonymous namespace
-
-LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) {
- using FloatBits = typename fputil::FPBits<float>;
- using DoubleBits = typename fputil::FPBits<double>;
-
- FloatBits x_bits(x);
-
- uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
- uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;
-
- if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) {
- // x is 0, Inf, or NaN.
- // Make sure it works for FTZ/DAZ modes.
- return x + x;
- }
-
- double xd = static_cast<double>(x);
- DoubleBits xd_bits(xd);
-
- // When using biased exponent of x in double precision,
- // x_e = real_exponent_of_x + 1023
- // Then:
- // x_e / 3 = real_exponent_of_x / 3 + 1023/3
- // = real_exponent_of_x / 3 + 341
- // So to make it the correct biased exponent of x^(1/3), we add
- // 1023 - 341 = 682
- // to the quotient x_e / 3.
- unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
- unsigned out_e = (x_e / 3 + 682) | sign_bit;
- unsigned shift_e = x_e % 3;
-
- // Set x_m = 2^(x_e % 3) * (1.mantissa)
- uint64_t x_m = xd_bits.get_mantissa();
- // Use the leading 4 bits for look up table
- unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4));
-
- x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
- << DoubleBits::FRACTION_LEN;
-
- double x_reduced = DoubleBits(x_m).get_val();
- double dx = x_reduced - 1.0;
-
- double dx_sq = dx * dx;
- double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0);
- double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]);
- double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]);
- double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]);
-
- double dx_4 = dx_sq * dx_sq;
- double p0 = fputil::multiply_add(dx_sq, c1, c0);
- double p1 = fputil::multiply_add(dx_sq, c3, c2);
-
- double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e];
-
- uint64_t r_m = DoubleBits(r).get_mantissa();
- // Check if the output is exact. To be exact, the smallest 1-bit of the
- // output has to be at least 2^-7 or higher. So we check the lowest 44 bits
- // to see if they are within 2^(-52 + 3) errors from all zeros, then the
- // result cube root is exact.
- if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) {
- if ((r_m & 0xfffffffffff) <= 8)
- r_m &= 0xffff'ffff'ffff'ffe0;
- else
- r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20;
- fputil::clear_except_if_required(FE_INEXACT);
- }
- // Adjust exponent and sign.
- uint64_t r_bits =
- r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);
-
- return static_cast<float>(DoubleBits(r_bits).get_val());
-}
+LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { return math::cbrtf(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/integration/src/__support/GPU/CMakeLists.txt b/libc/test/integration/src/__support/GPU/CMakeLists.txt
index e066830..1fb175b 100644
--- a/libc/test/integration/src/__support/GPU/CMakeLists.txt
+++ b/libc/test/integration/src/__support/GPU/CMakeLists.txt
@@ -27,3 +27,16 @@ add_integration_test(
LOADER_ARGS
--threads 64
)
+
+add_libc_test(
+ fixedstack_test
+ SUITE
+ libc-support-gpu-tests
+ SRCS
+ fixedstack_test.cpp
+ DEPENDS
+ libc.src.__support.GPU.fixedstack
+ LOADER_ARGS
+ --threads 32
+ --blocks 16
+)
diff --git a/libc/test/integration/src/__support/GPU/fixedstack_test.cpp b/libc/test/integration/src/__support/GPU/fixedstack_test.cpp
new file mode 100644
index 0000000..fde51df
--- /dev/null
+++ b/libc/test/integration/src/__support/GPU/fixedstack_test.cpp
@@ -0,0 +1,44 @@
+//===-- Integration test for the lock-free stack --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/GPU/fixedstack.h"
+#include "src/__support/GPU/utils.h"
+#include "test/IntegrationTest/test.h"
+
+using namespace LIBC_NAMESPACE;
+
+static FixedStack<uint32_t, 2048> global_stack;
+
+void run() {
+ // We need enough space in the stack as threads in flight can temporarily
+ // consume memory before they finish comitting it back to the stack.
+ ASSERT_EQ(gpu::get_num_blocks() * gpu::get_num_threads(), 512);
+
+ uint32_t val;
+ uint32_t num_threads = static_cast<uint32_t>(gpu::get_num_threads());
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_TRUE(global_stack.push(UINT32_MAX))
+ EXPECT_TRUE(global_stack.pop(val))
+ ASSERT_TRUE(val < num_threads || val == UINT32_MAX);
+ }
+
+ EXPECT_TRUE(global_stack.push(static_cast<uint32_t>(gpu::get_thread_id())));
+ EXPECT_TRUE(global_stack.push(static_cast<uint32_t>(gpu::get_thread_id())));
+ EXPECT_TRUE(global_stack.pop(val));
+ ASSERT_TRUE(val < num_threads || val == UINT32_MAX);
+
+ // Fill the rest of the stack with the default value.
+ while (!global_stack.push(UINT32_MAX))
+ ;
+}
+
+TEST_MAIN(int argc, char **argv, char **envp) {
+ run();
+
+ return 0;
+}
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index f5ea510..9685aea 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -27,6 +27,7 @@ add_fp_unittest(
libc.src.__support.math.atanhf
libc.src.__support.math.atanhf16
libc.src.__support.math.cbrt
+ libc.src.__support.math.cbrtf
libc.src.__support.math.erff
libc.src.__support.math.exp
libc.src.__support.math.exp10
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 3d64e5e..5e57c49e 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -49,6 +49,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat) {
EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atan2f(0.0f, 0.0f));
EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanf(0.0f));
EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanhf(0.0f));
+ EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::cbrtf(0.0f));
EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f));
EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f));
EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f));
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 43cde0d..a74f9fe 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -2972,6 +2972,118 @@ add_fp_unittest(
libc.src.__support.macros.properties.types
)
+add_fp_unittest(
+ bf16add_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16add_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.src.math.bf16add
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16addf_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16addf_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.src.math.bf16addf
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16addl_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16addl_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.src.math.bf16addl
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16addf128_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16addf128_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.src.math.bf16addf128
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16sub_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16sub_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.src.math.bf16sub
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16subf_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16subf_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.src.math.bf16subf
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16subl_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16subl_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.src.math.bf16subl
+ libc.src.__support.FPUtil.bfloat16
+)
+
+add_fp_unittest(
+ bf16subf128_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ bf16subf128_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.src.math.bf16subf128
+ libc.src.__support.FPUtil.bfloat16
+)
+
add_subdirectory(generic)
add_subdirectory(smoke)
diff --git a/libc/test/src/math/bf16add_test.cpp b/libc/test/src/math/bf16add_test.cpp
new file mode 100644
index 0000000..9e9c594
--- /dev/null
+++ b/libc/test/src/math/bf16add_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16add ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16add.h"
+
+LIST_ADD_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16add)
diff --git a/libc/test/src/math/bf16addf128_test.cpp b/libc/test/src/math/bf16addf128_test.cpp
new file mode 100644
index 0000000..46f7ad3
--- /dev/null
+++ b/libc/test/src/math/bf16addf128_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addf128 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addf128.h"
+
+LIST_ADD_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16addf128)
diff --git a/libc/test/src/math/bf16addf_test.cpp b/libc/test/src/math/bf16addf_test.cpp
new file mode 100644
index 0000000..06d56cf
--- /dev/null
+++ b/libc/test/src/math/bf16addf_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addf.h"
+
+LIST_ADD_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16addf)
diff --git a/libc/test/src/math/bf16addl_test.cpp b/libc/test/src/math/bf16addl_test.cpp
new file mode 100644
index 0000000..bf54827
--- /dev/null
+++ b/libc/test/src/math/bf16addl_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addl --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addl.h"
+
+LIST_ADD_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16addl)
diff --git a/libc/test/src/math/bf16sub_test.cpp b/libc/test/src/math/bf16sub_test.cpp
new file mode 100644
index 0000000..4a793dc
--- /dev/null
+++ b/libc/test/src/math/bf16sub_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16sub ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16sub.h"
+
+LIST_SUB_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16sub)
diff --git a/libc/test/src/math/bf16subf128_test.cpp b/libc/test/src/math/bf16subf128_test.cpp
new file mode 100644
index 0000000..25d6711
--- /dev/null
+++ b/libc/test/src/math/bf16subf128_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subf128 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subf128.h"
+
+LIST_SUB_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16subf128)
diff --git a/libc/test/src/math/bf16subf_test.cpp b/libc/test/src/math/bf16subf_test.cpp
new file mode 100644
index 0000000..e8c7440
--- /dev/null
+++ b/libc/test/src/math/bf16subf_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subf.h"
+
+LIST_SUB_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16subf)
diff --git a/libc/test/src/math/bf16subl_test.cpp b/libc/test/src/math/bf16subl_test.cpp
new file mode 100644
index 0000000..2997369
--- /dev/null
+++ b/libc/test/src/math/bf16subl_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subl --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subl.h"
+
+LIST_SUB_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16subl)
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 5f497c6..dc1850a 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -5465,3 +5465,131 @@ add_fp_unittest(
libc.src.__support.macros.properties.os
libc.src.__support.macros.properties.types
)
+
+add_fp_unittest(
+ bf16add_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16add_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16add
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16addf_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16addf_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16addf
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16addl_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16addl_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16addl
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16addf128_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16addf128_test.cpp
+ HDRS
+ AddTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16addf128
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16sub_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16sub_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16sub
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16subf_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16subf_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16subf
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16subl_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16subl_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16subl
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
+
+add_fp_unittest(
+ bf16subf128_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ bf16subf128_test.cpp
+ HDRS
+ SubTest.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.math.bf16subf128
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.macros.properties.os
+)
diff --git a/libc/test/src/math/smoke/bf16add_test.cpp b/libc/test/src/math/smoke/bf16add_test.cpp
new file mode 100644
index 0000000..9e9c594
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16add_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16add ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16add.h"
+
+LIST_ADD_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16add)
diff --git a/libc/test/src/math/smoke/bf16addf128_test.cpp b/libc/test/src/math/smoke/bf16addf128_test.cpp
new file mode 100644
index 0000000..46f7ad3
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16addf128_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addf128 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addf128.h"
+
+LIST_ADD_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16addf128)
diff --git a/libc/test/src/math/smoke/bf16addf_test.cpp b/libc/test/src/math/smoke/bf16addf_test.cpp
new file mode 100644
index 0000000..06d56cf
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16addf_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addf.h"
+
+LIST_ADD_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16addf)
diff --git a/libc/test/src/math/smoke/bf16addl_test.cpp b/libc/test/src/math/smoke/bf16addl_test.cpp
new file mode 100644
index 0000000..bf54827
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16addl_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16addl --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16addl.h"
+
+LIST_ADD_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16addl)
diff --git a/libc/test/src/math/smoke/bf16sub_test.cpp b/libc/test/src/math/smoke/bf16sub_test.cpp
new file mode 100644
index 0000000..4a793dc
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16sub_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16sub ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16sub.h"
+
+LIST_SUB_TESTS(bfloat16, double, LIBC_NAMESPACE::bf16sub)
diff --git a/libc/test/src/math/smoke/bf16subf128_test.cpp b/libc/test/src/math/smoke/bf16subf128_test.cpp
new file mode 100644
index 0000000..25d6711
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16subf128_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subf128 -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subf128.h"
+
+LIST_SUB_TESTS(bfloat16, float128, LIBC_NAMESPACE::bf16subf128)
diff --git a/libc/test/src/math/smoke/bf16subf_test.cpp b/libc/test/src/math/smoke/bf16subf_test.cpp
new file mode 100644
index 0000000..e8c7440
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16subf_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subf.h"
+
+LIST_SUB_TESTS(bfloat16, float, LIBC_NAMESPACE::bf16subf)
diff --git a/libc/test/src/math/smoke/bf16subl_test.cpp b/libc/test/src/math/smoke/bf16subl_test.cpp
new file mode 100644
index 0000000..2997369
--- /dev/null
+++ b/libc/test/src/math/smoke/bf16subl_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for bf16subl --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/bf16subl.h"
+
+LIST_SUB_TESTS(bfloat16, long double, LIBC_NAMESPACE::bf16subl)
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index ae12a83..57e818c 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -411,6 +411,21 @@ template void explain_binary_operation_one_output_error(
#endif
template void explain_binary_operation_one_output_error(
Operation, const BinaryInput<bfloat16> &, bfloat16, double, RoundingMode);
+template void
+explain_binary_operation_one_output_error(Operation, const BinaryInput<float> &,
+ bfloat16, double, RoundingMode);
+template void explain_binary_operation_one_output_error(
+ Operation, const BinaryInput<double> &, bfloat16, double, RoundingMode);
+template void
+explain_binary_operation_one_output_error(Operation,
+ const BinaryInput<long double> &,
+ bfloat16, double, RoundingMode);
+#if defined(LIBC_TYPES_HAS_FLOAT128) && \
+ defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE)
+template void explain_binary_operation_one_output_error(
+ Operation, const BinaryInput<float128> &, bfloat16, double, RoundingMode);
+#endif // defined(LIBC_TYPES_HAS_FLOAT128) &&
+ // defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE)
template <typename InputType, typename OutputType>
void explain_ternary_operation_one_output_error(
@@ -648,6 +663,26 @@ template bool compare_binary_operation_one_output(Operation,
const BinaryInput<bfloat16> &,
bfloat16, double,
RoundingMode);
+
+template bool compare_binary_operation_one_output(Operation,
+ const BinaryInput<float> &,
+ bfloat16, double,
+ RoundingMode);
+template bool compare_binary_operation_one_output(Operation,
+ const BinaryInput<double> &,
+ bfloat16, double,
+ RoundingMode);
+template bool
+compare_binary_operation_one_output(Operation, const BinaryInput<long double> &,
+ bfloat16, double, RoundingMode);
+#if defined(LIBC_TYPES_HAS_FLOAT128) && \
+ defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE)
+template bool compare_binary_operation_one_output(Operation,
+ const BinaryInput<float128> &,
+ bfloat16, double,
+ RoundingMode);
+#endif // defined(LIBC_TYPES_HAS_FLOAT128) &&
+ // defined(LIBC_TYPES_FLOAT128_IS_NOT_LONG_DOUBLE)
template <typename InputType, typename OutputType>
bool compare_ternary_operation_one_output(Operation op,
const TernaryInput<InputType> &input,
diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py
index b054ed4..2118db6 100644
--- a/libc/utils/hdrgen/hdrgen/header.py
+++ b/libc/utils/hdrgen/hdrgen/header.py
@@ -204,7 +204,7 @@ class HeaderFile:
current_guard = None
for function in self.functions:
- if function.guard == None:
+ if function.guard == None and current_guard == None:
content.append(str(function) + " __NOEXCEPT;")
content.append("")
else:
@@ -221,7 +221,8 @@ class HeaderFile:
content.append(f"#endif // {current_guard}")
content.append("")
current_guard = function.guard
- content.append(f"#ifdef {current_guard}")
+ if current_guard is not None:
+ content.append(f"#ifdef {current_guard}")
content.append(str(function) + " __NOEXCEPT;")
content.append("")
if current_guard != None: