112 files changed, 2893 insertions, 817 deletions
diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index c54d7d0..e766eb2 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -754,6 +754,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 80cd15e..336b1e6 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -757,6 +757,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index c9f8118..e92ec87 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -757,6 +757,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/darwin/aarch64/entrypoints.txt b/libc/config/darwin/aarch64/entrypoints.txt
index 3bfdcdb..03e00a3 100644
--- a/libc/config/darwin/aarch64/entrypoints.txt
+++ b/libc/config/darwin/aarch64/entrypoints.txt
@@ -588,6 +588,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt
index c55b6aa..00cedab 100644
--- a/libc/config/darwin/x86_64/entrypoints.txt
+++ b/libc/config/darwin/x86_64/entrypoints.txt
@@ -231,6 +231,11 @@ set(TARGET_LIBM_ENTRYPOINTS
     #libc.src.math.truncl
 )
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/gpu/amdgpu/entrypoints.txt b/libc/config/gpu/amdgpu/entrypoints.txt
index 463727b..e39819d 100644
--- a/libc/config/gpu/amdgpu/entrypoints.txt
+++ b/libc/config/gpu/amdgpu/entrypoints.txt
@@ -612,6 +612,11 @@ if(LIBC_TYPES_HAS_FLOAT16)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/gpu/nvptx/entrypoints.txt b/libc/config/gpu/nvptx/entrypoints.txt
index 13b77172..26e3b15 100644
--- a/libc/config/gpu/nvptx/entrypoints.txt
+++ b/libc/config/gpu/nvptx/entrypoints.txt
@@ -614,6 +614,11 @@ if(LIBC_TYPES_HAS_FLOAT16)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index b2abebe..d76cdc2 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -842,6 +842,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LLVM_LIBC_FULL_BUILD)
   list(APPEND TARGET_LIBC_ENTRYPOINTS
     # assert.h entrypoints
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 5865dc9..813c34d 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -458,6 +458,11 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ufromfpxl
 )
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 79077a5..190aef7 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -861,6 +861,11 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 5e8278e..3ec05a5 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -892,6 +892,12 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 if(LIBC_COMPILER_HAS_FIXED_POINT)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # stdfix.h _Fract and _Accum entrypoints
@@ -1050,6 +1056,9 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_join
     libc.src.pthread.pthread_key_create
     libc.src.pthread.pthread_key_delete
+    libc.src.pthread.pthread_barrier_init
+    libc.src.pthread.pthread_barrier_wait
+    libc.src.pthread.pthread_barrier_destroy
     libc.src.pthread.pthread_mutex_destroy
     libc.src.pthread.pthread_mutex_init
     libc.src.pthread.pthread_mutex_lock
@@ -1267,6 +1276,9 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.wchar.mbsinit
     libc.src.wchar.mbrtowc
     libc.src.wchar.mbtowc
+    libc.src.wchar.mbstowcs
+    libc.src.wchar.mbsrtowcs
+    libc.src.wchar.mbsnrtowcs
     libc.src.wchar.wcrtomb
     libc.src.wchar.wctomb
     libc.src.wchar.wcstombs
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 1802729..3160d57 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -304,6 +304,11 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.truncl
 )
 
+list(APPEND TARGET_LIBM_ENTRYPOINTS
+  # bfloat16 entrypoints
+  libc.src.math.fabsbf16
+)
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index 5fc25d0..f3f01c1 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -73,6 +73,15 @@ add_proxy_header_library(
 )
 
 add_proxy_header_library(
+  pthread_macros
+  HDRS
+    pthread_macros.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-macros.pthread_macros
+    libc.include.pthread
+)
+
+add_proxy_header_library(
   sched_macros
   HDRS
     sched_macros.h
diff --git a/libc/hdr/pthread_macros.h b/libc/hdr/pthread_macros.h
new file mode 100644
index 0000000..f913015
--- /dev/null
+++ b/libc/hdr/pthread_macros.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from pthread.h -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_PTHREAD_MACROS_H
+#define LLVM_LIBC_HDR_PTHREAD_MACROS_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-macros/pthread-macros.h"
+
+#else // Overlay mode
+
+#include <pthread.h>
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_PTHREAD_MACROS_H
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index c212363..1c1f242 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -242,6 +242,22 @@ add_proxy_header_library(
 )
 
 add_proxy_header_library(
+  pthread_barrier_t
+  HDRS
+    pthread_barrier_t.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.pthread_barrier_t
+)
+
+add_proxy_header_library(
+  pthread_barrierattr_t
+  HDRS
+    pthread_barrierattr_t.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.pthread_barrierattr_t
+)
+
+add_proxy_header_library(
   atexithandler_t
   HDRS
     atexithandler_t.h
diff --git a/libc/hdr/types/pthread_barrier_t.h b/libc/hdr/types/pthread_barrier_t.h
new file mode 100644
index 0000000..57bcdfc
--- /dev/null
+++ b/libc/hdr/types/pthread_barrier_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from pthread_barrier_t.h ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIER_T_H
+#define LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIER_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/pthread_barrier_t.h"
+
+#else // Overlay mode
+
+#error "Cannot overlay pthread_barrier_t"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIER_T_H
diff --git a/libc/hdr/types/pthread_barrierattr_t.h b/libc/hdr/types/pthread_barrierattr_t.h
new file mode 100644
index 0000000..d9d14c1
--- /dev/null
+++ b/libc/hdr/types/pthread_barrierattr_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from pthread_barrierattr_t.h -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIERATTR_T_H
+#define LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIERATTR_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/pthread_barrierattr_t.h"
+
+#else // Overlay mode
+
+#error "Cannot overlay pthread_barrierattr_t"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_PTHREAD_BARRIERATTR_T_H
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 120f385..74fcea0 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -392,6 +392,8 @@ add_header_macro(
     .llvm-libc-types.pthread_attr_t
     .llvm-libc-types.pthread_condattr_t
     .llvm-libc-types.pthread_key_t
+    .llvm-libc-types.pthread_barrier_t
+    .llvm-libc-types.pthread_barrierattr_t
     .llvm-libc-types.pthread_mutex_t
     .llvm-libc-types.pthread_mutexattr_t
     .llvm-libc-types.pthread_once_t
diff --git a/libc/include/llvm-libc-macros/pthread-macros.h b/libc/include/llvm-libc-macros/pthread-macros.h
index fcc6ef9..ce467b7 100644
--- a/libc/include/llvm-libc-macros/pthread-macros.h
+++ b/libc/include/llvm-libc-macros/pthread-macros.h
@@ -22,6 +22,8 @@
 #define PTHREAD_MUTEX_STALLED 0
 #define PTHREAD_MUTEX_ROBUST 1
 
+#define PTHREAD_BARRIER_SERIAL_THREAD -1
+
 #define PTHREAD_ONCE_INIT {0}
 
 #define PTHREAD_PROCESS_PRIVATE 0
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 4ccdde6..451beae 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -10,6 +10,7 @@ add_header(__exec_envp_t HDR __exec_envp_t.h)
 add_header(__futex_word HDR __futex_word.h)
 add_header(pid_t HDR pid_t.h)
 add_header(__mutex_type HDR __mutex_type.h DEPENDS .__futex_word .pid_t)
+add_header(__barrier_type HDR __barrier_type.h)
 add_header(__pthread_once_func_t HDR __pthread_once_func_t.h)
 add_header(__pthread_start_t HDR __pthread_start_t.h)
 add_header(__pthread_tss_dtor_t HDR __pthread_tss_dtor_t.h)
@@ -53,6 +54,8 @@ add_header(pthread_condattr_t HDR pthread_condattr_t.h DEPENDS .clockid_t)
 add_header(pthread_key_t HDR pthread_key_t.h)
 add_header(pthread_mutex_t HDR pthread_mutex_t.h DEPENDS .__futex_word .__mutex_type)
 add_header(pthread_mutexattr_t HDR pthread_mutexattr_t.h)
+add_header(pthread_barrier_t HDR pthread_barrier_t.h DEPENDS .__barrier_type)
+add_header(pthread_barrierattr_t HDR pthread_barrierattr_t.h)
 add_header(pthread_once_t HDR pthread_once_t.h DEPENDS .__futex_word)
 add_header(pthread_rwlock_t HDR pthread_rwlock_t.h DEPENDS .__futex_word .pid_t)
 add_header(pthread_rwlockattr_t HDR pthread_rwlockattr_t.h)
diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h
new file mode 100644
index 0000000..5971261
--- /dev/null
+++ b/libc/include/llvm-libc-types/__barrier_type.h
@@ -0,0 +1,21 @@
+//===-- Definition of __barrier_type type ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES__BARRIER_TYPE_H
+#define LLVM_LIBC_TYPES__BARRIER_TYPE_H
+
+typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) {
+  unsigned expected;
+  unsigned waiting;
+  bool blocking;
+  char entering[24 /* sizeof (CndVar) */];
+  char exiting[24 /* sizeof (CndVar) */];
+  char mutex[24 /* sizeof (Mutex) */];
+} __barrier_type;
+
+#endif // LLVM_LIBC_TYPES__BARRIER_TYPE_H
diff --git a/libc/include/llvm-libc-types/pthread_barrier_t.h b/libc/include/llvm-libc-types/pthread_barrier_t.h
new file mode 100644
index 0000000..86fbf7c
--- /dev/null
+++ b/libc/include/llvm-libc-types/pthread_barrier_t.h
@@ -0,0 +1,15 @@
+//===-- Definition of pthread_barrier_t type --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_PTHREAD_BARRIER_T_H
+#define LLVM_LIBC_TYPES_PTHREAD_BARRIER_T_H
+
+#include "__barrier_type.h"
+typedef __barrier_type pthread_barrier_t;
+
+#endif // LLVM_LIBC_TYPES_PTHREAD_BARRIER_T_H
diff --git a/libc/include/llvm-libc-types/pthread_barrierattr_t.h b/libc/include/llvm-libc-types/pthread_barrierattr_t.h
new file mode 100644
index 0000000..064be5b
--- /dev/null
+++ b/libc/include/llvm-libc-types/pthread_barrierattr_t.h
@@ -0,0 +1,16 @@
+//===-- Definition of pthread_barrierattr_t type --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H
+#define LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H
+
+typedef struct {
+  bool pshared;
+} pthread_barrierattr_t;
+
+#endif // LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H
diff --git a/libc/include/pthread.yaml b/libc/include/pthread.yaml
index 5b27e68..8afce20 100644
--- a/libc/include/pthread.yaml
+++ b/libc/include/pthread.yaml
@@ -6,6 +6,8 @@ types:
   - type_name: pthread_once_t
   - type_name: pthread_mutex_t
   - type_name: pthread_mutexattr_t
+  - type_name: pthread_barrier_t
+  - type_name: pthread_barrierattr_t
   - type_name: pthread_key_t
   - type_name: pthread_condattr_t
   - type_name: __pthread_tss_dtor_t
@@ -277,6 +279,26 @@ functions:
     arguments:
       - type: pthread_mutexattr_t *__restrict
       - type: int
+  - name: pthread_barrier_init
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: pthread_barrier_t *__restrict
+      - type: const pthread_barrierattr_t *__restrict
+      - type: int
+  - name: pthread_barrier_wait
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: pthread_barrier_t *
+  - name: pthread_barrier_destroy
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: pthread_barrier_t *
   - name: pthread_once
     standards:
       - POSIX
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 6e1f595..8178091 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -53,6 +53,33 @@ functions:
       - type: wchar_t *__restrict
       - type: const char *__restrict
       - type: size_t
+  - name: mbsnrtowcs
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const char **__restrict
+      - type: size_t
+      - type: size_t
+      - type: mbstate_t *__restrict
+  - name: mbsrtowcs
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const char **__restrict
+      - type: size_t
+      - type: mbstate_t *__restrict
+  - name: mbstowcs
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const char *__restrict
+      - type: size_t
   - name: mbsinit
     standards:
       - stdc
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 042daf6..2153664 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -19,6 +19,11 @@
 #include "math/acospif16.h"
 #include "math/asin.h"
 #include "math/asinf.h"
+#include "math/asinf16.h"
+#include "math/asinhf.h"
+#include "math/asinhf16.h"
+#include "math/atan.h"
+#include "math/atanf.h"
 #include "math/erff.h"
 #include "math/exp.h"
 #include "math/exp10.h"
diff --git a/libc/shared/math/asinf16.h b/libc/shared/math/asinf16.h
new file mode 100644
index 0000000..af5b2ec
--- /dev/null
+++ b/libc/shared/math/asinf16.h
@@ -0,0 +1,28 @@
+//===-- Shared asinf16 function ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINF16_H
+#define LLVM_LIBC_SHARED_MATH_ASINF16_H
+
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/asinf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINF16_H
diff --git a/libc/shared/math/asinhf.h b/libc/shared/math/asinhf.h
new file mode 100644
index 0000000..c4a5509
--- /dev/null
+++ b/libc/shared/math/asinhf.h
@@ -0,0 +1,23 @@
+//===-- Shared asinhf function ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINHF_H
+#define LLVM_LIBC_SHARED_MATH_ASINHF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/asinhf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinhf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINHF_H
diff --git a/libc/shared/math/asinhf16.h b/libc/shared/math/asinhf16.h
new file mode 100644
index 0000000..b8b007f
--- /dev/null
+++ b/libc/shared/math/asinhf16.h
@@ -0,0 +1,28 @@
+//===-- Shared asinhf16 function --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ASINHF16_H
+#define LLVM_LIBC_SHARED_MATH_ASINHF16_H
+
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/asinhf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::asinhf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_ASINHF16_H
diff --git a/libc/shared/math/atan.h b/libc/shared/math/atan.h
new file mode 100644
index 0000000..b9ba89b
--- /dev/null
+++ b/libc/shared/math/atan.h
@@ -0,0 +1,23 @@
+//===-- Shared atan function ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN_H
+#define LLVM_LIBC_SHARED_MATH_ATAN_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN_H
diff --git a/libc/shared/math/atanf.h b/libc/shared/math/atanf.h
new file mode 100644
index 0000000..858d727
--- /dev/null
+++ b/libc/shared/math/atanf.h
@@ -0,0 +1,23 @@
+//===-- Shared atanf function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATANF_H
+#define LLVM_LIBC_SHARED_MATH_ATANF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atanf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atanf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATANF_H
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h
index e6fad1b..e999ece 100644
--- a/libc/src/__support/FPUtil/cast.h
+++ b/libc/src/__support/FPUtil/cast.h
@@ -66,9 +66,9 @@ cast(InType x) {
         cpp::max(OutFPBits::FRACTION_LEN, InFPBits::FRACTION_LEN);
     DyadicFloat<cpp::bit_ceil(MAX_FRACTION_LEN)> xd(x);
     return xd.template as<OutType, /*ShouldSignalExceptions=*/true>();
+  } else {
+    return static_cast<OutType>(x);
   }
-
-  return static_cast<OutType>(x);
 }
 
 } // namespace LIBC_NAMESPACE::fputil
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 866aea7..8fff4cc 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -16,6 +16,7 @@
 
 #include "allocator.h"
 
+#include "src/__support/CPP/algorithm.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/new.h"
@@ -31,14 +32,12 @@ constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024;
 constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE;
 constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1;
 constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8;
+constexpr static uint32_t BITS_IN_DWORD = sizeof(uint64_t) * 8;
 constexpr static uint32_t MIN_SIZE = 16;
 constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
 
 // The number of times to attempt claiming an in-progress slab allocation.
-constexpr static uint32_t MAX_TRIES = 128;
-
-// A sentinel used to indicate an invalid but non-null pointer value.
-constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max();
+constexpr static uint32_t MAX_TRIES = 1024;
 
 static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
 
@@ -70,8 +69,8 @@ static void rpc_free(void *ptr) {
 
 // Convert a potentially disjoint bitmask into an increasing integer per-lane
 // for use with indexing between gpu lanes.
-static inline uint32_t lane_count(uint64_t lane_mask) {
-  return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1));
+static inline uint32_t lane_count(uint64_t lane_mask, uint32_t id) {
+  return cpp::popcount(lane_mask & ((uint64_t(1) << id) - 1));
 }
 
 // Obtain an initial value to seed a random number generator. We use the rounded
@@ -133,7 +132,8 @@ static inline constexpr T round_up(const T x) {
 void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
   uint64_t mask = gpu::get_lane_mask();
   uint32_t workers = cpp::popcount(uniform);
-  for (uint32_t i = impl::lane_count(mask & uniform); i < n; i += workers)
+  for (uint32_t i = impl::lane_count(mask & uniform, gpu::get_lane_id()); i < n;
+       i += workers)
     s[i] = c;
 }
 
@@ -142,10 +142,27 @@ static inline constexpr bool is_pow2(uint64_t x) {
   return x && (x & (x - 1)) == 0;
 }
 
-// Where this chunk size should start looking in the global array.
-static inline constexpr uint32_t start_index(uint32_t chunk_index) {
-  return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) /
-         impl::get_chunk_id(SLAB_SIZE / 2);
+// Where this chunk size should start looking in the global array. Small
+// allocations are much more likely than large ones, so we give them the most
+// space. We use a cubic easing function normalized on the possible chunks.
+static inline constexpr uint32_t get_start_index(uint32_t chunk_size) {
+  constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2);
+  uint64_t norm =
+      (1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk;
+  uint64_t bias = (norm * norm * norm) >> 32;
+  uint64_t inv = (1 << 16) - bias;
+  return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16);
+}
+
+// Returns the id of the lane below this one that acts as its leader.
+static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
+  uint64_t mask = id < BITS_IN_DWORD ? ~0ull << (id + 1) : 0;
+  return BITS_IN_DWORD - cpp::countl_zero(ballot & ~mask) - 1;
+}
+
+// We use a sentinal value to indicate a failed or in-progress allocation.
+template <typename T> bool is_sentinel(const T &x) {
+  return x == cpp::numeric_limits<T>::max();
 }
 
 } // namespace impl
@@ -264,28 +281,33 @@ struct Slab {
         continue;
 
       // We try using any known empty bits from the previous attempt first.
-      uint32_t start = gpu::shuffle(mask, cpp::countr_zero(uniform & mask),
-                                    ~after ? (old_index & ~(BITS_IN_WORD - 1)) +
-                                                 cpp::countr_zero(~after)
-                                           : impl::xorshift32(state));
+      uint32_t start = gpu::shuffle(
+          mask, cpp::countr_zero(uniform & mask),
+          ~after ? (old_index & ~(BITS_IN_WORD - 1)) + cpp::countr_zero(~after)
+                 : __builtin_align_down(impl::xorshift32(state), BITS_IN_WORD));
 
-      uint32_t id = impl::lane_count(uniform & mask);
+      // Each lane tries to claim one bit in a single contiguous mask.
+      uint32_t id = impl::lane_count(uniform & mask, gpu::get_lane_id());
       uint32_t index = (start + id) % usable_bits(chunk_size);
       uint32_t slot = index / BITS_IN_WORD;
       uint32_t bit = index % BITS_IN_WORD;
 
       // Get the mask of bits destined for the same slot and coalesce it.
-      uint64_t match = uniform & gpu::match_any(mask, slot);
-      uint32_t length = cpp::popcount(match);
-      uint32_t bitmask = gpu::shuffle(
-          mask, cpp::countr_zero(match),
-          static_cast<uint32_t>((uint64_t(1) << length) - 1) << bit);
+      uint32_t leader = impl::get_leader_id(
+          uniform & gpu::ballot(mask, !id || index % BITS_IN_WORD == 0),
+          gpu::get_lane_id());
+      uint32_t length = cpp::popcount(uniform & mask) -
+                        impl::lane_count(uniform & mask, leader);
+      uint32_t bitmask =
+          static_cast<uint32_t>(
+              (uint64_t(1) << cpp::min(length, BITS_IN_WORD)) - 1)
+          << bit;
 
       uint32_t before = 0;
-      if (gpu::get_lane_id() == static_cast<uint32_t>(cpp::countr_zero(match)))
+      if (gpu::get_lane_id() == leader)
         before = cpp::AtomicRef(get_bitfield()[slot])
                      .fetch_or(bitmask, cpp::MemoryOrder::RELAXED);
-      before = gpu::shuffle(mask, cpp::countr_zero(match), before);
+      before = gpu::shuffle(mask, leader, before);
       if (~before & (1 << bit))
         result = ptr_from_index(index, chunk_size);
       else
@@ -323,20 +345,20 @@ struct GuardPtr {
 private:
   struct RefCounter {
     // Indicates that the object is in its deallocation phase and thus invalid.
-    static constexpr uint64_t INVALID = uint64_t(1) << 63;
+    static constexpr uint32_t INVALID = uint32_t(1) << 31;
 
     // If a read preempts an unlock call we indicate this so the following
     // unlock call can swap out the helped bit and maintain exclusive ownership.
-    static constexpr uint64_t HELPED = uint64_t(1) << 62;
+    static constexpr uint32_t HELPED = uint32_t(1) << 30;
 
     // Resets the reference counter, cannot be reset to zero safely.
-    void reset(uint32_t n, uint64_t &count) {
+    void reset(uint32_t n, uint32_t &count) {
       counter.store(n, cpp::MemoryOrder::RELAXED);
       count = n;
     }
 
     // Acquire a slot in the reference counter if it is not invalid.
-    bool acquire(uint32_t n, uint64_t &count) {
+    bool acquire(uint32_t n, uint32_t &count) {
       count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n;
       return (count & INVALID) == 0;
     }
@@ -349,7 +371,7 @@ private:
       // another thread resurrected the counter and we quit, or a parallel read
       // helped us invalidating it. For the latter, claim that flag and return.
       if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
-        uint64_t expected = 0;
+        uint32_t expected = 0;
         if (counter.compare_exchange_strong(expected, INVALID,
                                             cpp::MemoryOrder::RELAXED,
                                             cpp::MemoryOrder::RELAXED))
@@ -372,28 +394,29 @@ private:
       return (val & INVALID) ? 0 : val;
     }
 
-    cpp::Atomic<uint64_t> counter{0};
+    cpp::Atomic<uint32_t> counter{0};
   };
 
-  cpp::Atomic<Slab *> ptr{nullptr};
-  RefCounter ref{};
+  cpp::Atomic<Slab *> ptr;
+  RefCounter ref;
 
   // Should be called be a single lane for each different pointer.
   template <typename... Args>
-  Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) {
+  Slab *try_lock_impl(uint32_t n, uint32_t &count, Args &&...args) {
     Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED);
     if (!expected &&
         ptr.compare_exchange_strong(
-            expected, reinterpret_cast<Slab *>(SENTINEL),
+            expected,
+            reinterpret_cast<Slab *>(cpp::numeric_limits<uintptr_t>::max()),
             cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
-      count = cpp::numeric_limits<uint64_t>::max();
+      count = cpp::numeric_limits<uint32_t>::max();
       void *raw = impl::rpc_allocate(sizeof(Slab));
       if (!raw)
         return nullptr;
       return new (raw) Slab(cpp::forward<Args>(args)...);
     }
 
-    if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL))
+    if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected)))
       return nullptr;
 
     if (!ref.acquire(n, count))
@@ -405,7 +428,7 @@ private:
 
   // Finalize the associated memory and signal that it is ready to use by
   // resetting the counter.
-  void finalize(Slab *mem, uint32_t n, uint64_t &count) {
+  void finalize(Slab *mem, uint32_t n, uint32_t &count) {
     cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
     ptr.store(mem, cpp::MemoryOrder::RELAXED);
     cpp::atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
@@ -418,7 +441,7 @@ public:
   // The uniform mask represents which lanes share the same pointer. For each
   // uniform value we elect a leader to handle it on behalf of the other lanes.
   template <typename... Args>
-  Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
+  Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint32_t &count,
                  Args &&...args) {
     count = 0;
     Slab *result = nullptr;
@@ -433,14 +456,17 @@ public:
 
     // We defer storing the newly allocated slab until now so that we can use
     // multiple lanes to initialize it and release it for use.
-    if (count == cpp::numeric_limits<uint64_t>::max()) {
+    if (impl::is_sentinel(count)) {
       result->initialize(uniform);
       if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
         finalize(result, cpp::popcount(uniform), count);
+      count =
+          gpu::shuffle(gpu::get_lane_mask(), cpp::countr_zero(uniform), count);
     }
 
-    if (count != cpp::numeric_limits<uint64_t>::max())
-      count = count - cpp::popcount(uniform) + impl::lane_count(uniform) + 1;
+    if (!impl::is_sentinel(count))
+      count = count - cpp::popcount(uniform) +
+              impl::lane_count(uniform, gpu::get_lane_id());
 
     return result;
   }
@@ -469,7 +495,7 @@ static GuardPtr slots[ARRAY_SIZE] = {};
 // Keep a cache of the last successful slot for each chunk size. Initialize it
 // to an even spread of the total size. Must be updated if the chunking scheme
 // changes.
-#define S(X) (impl::start_index(X))
+#define S(X) (impl::get_start_index(X))
 static cpp::Atomic<uint32_t> indices[] = {
     S(16),     S(32),     S(48),     S(64),     S(96),     S(112),    S(128),
     S(192),    S(224),    S(256),    S(384),    S(448),    S(512),    S(768),
@@ -481,26 +507,28 @@ static cpp::Atomic<uint32_t> indices[] = {
 #undef S
 
 // Tries to find a slab in the table that can support the given chunk size.
-static Slab *find_slab(uint32_t chunk_size) {
+static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
   // We start at the index of the last successful allocation for this kind.
   uint32_t chunk_id = impl::get_chunk_id(chunk_size);
   uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED);
-  uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
 
-  for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) {
+  for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) {
     uint32_t index =
-        !offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE;
+        !offset ? start
+                : (impl::get_start_index(chunk_size) + offset - 1) % ARRAY_SIZE;
 
-    if (slots[index].use_count() < Slab::available_chunks(chunk_size)) {
+    if (!offset ||
+        slots[index].use_count() < Slab::available_chunks(chunk_size)) {
       uint64_t lane_mask = gpu::get_lane_mask();
-      uint64_t reserved = 0;
+      uint32_t reserved = 0;
 
       Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask,
                                          reserved, chunk_size, index);
 
       // If there is a slab allocation in progress we retry a few times.
       for (uint32_t retries = 0;
-           retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) {
+           !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES;
+           retries++) {
         uint64_t lane_mask = gpu::get_lane_mask();
         slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved,
                                      chunk_size, index);
@@ -510,17 +538,21 @@ static Slab *find_slab(uint32_t chunk_size) {
       // If we find a slab with a matching chunk size then we store the result.
       // Otherwise, we need to free the claimed lock and continue. In the case
       // of out-of-memory we receive a sentinel value and return a failure.
-      if (slab && reserved <= Slab::available_chunks(chunk_size) &&
+      if (slab && reserved < Slab::available_chunks(chunk_size) &&
           slab->get_chunk_size() == chunk_size) {
         if (index != start)
           indices[chunk_id].store(index, cpp::MemoryOrder::RELAXED);
+        uniform = uniform & gpu::get_lane_mask();
         return slab;
-      } else if (slab && (reserved > Slab::available_chunks(chunk_size) ||
+      } else if (slab && (reserved >= Slab::available_chunks(chunk_size) ||
                           slab->get_chunk_size() != chunk_size)) {
         slots[index].unlock(gpu::get_lane_mask(),
                             gpu::get_lane_mask() & uniform);
-      } else if (!slab && reserved == SENTINEL) {
+      } else if (!slab && impl::is_sentinel(reserved)) {
+        uniform = uniform & gpu::get_lane_mask();
         return nullptr;
+      } else {
+        sleep_briefly();
       }
     }
   }
@@ -547,12 +579,12 @@ void *allocate(uint64_t size) {
 
   // Try to find a slab for the rounded up chunk size and allocate from it.
   uint32_t chunk_size = impl::get_chunk_size(static_cast<uint32_t>(size));
-  Slab *slab = find_slab(chunk_size);
-  if (!slab || slab == reinterpret_cast<Slab *>(SENTINEL))
+  uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);
+  Slab *slab = find_slab(chunk_size, uniform);
+  if (!slab || impl::is_sentinel(reinterpret_cast<uintptr_t>(slab)))
     return nullptr;
 
   uint64_t lane_mask = gpu::get_lane_mask();
-  uint64_t uniform = gpu::match_any(lane_mask, slab->get_global_index());
   void *ptr = slab->allocate(lane_mask, uniform);
   return ptr;
 }
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index b096c61..95acc962 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -141,6 +141,80 @@ add_header_library(
 )
 
 add_header_library(
+  asinhf
+  HDRS
+    asinhf.h
+  DEPENDS
+    .acoshf_utils
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.config
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
+  asinhf16
+  HDRS
+    asinhf16.h
+DEPENDS
+    .acoshf_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.config
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
+  atan_utils
+  HDRS
+    atan_utils.h
+DEPENDS
+    libc.src.__support.integer_literals
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
+  atan
+  HDRS
+    atan.h
+DEPENDS
+    .atan_utils
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
+  atanf
+  HDRS
+    atanf.h
+  DEPENDS
+    .inv_trigf_utils
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
   asinf
   HDRS
     asinf.h
@@ -157,6 +231,20 @@ add_header_library(
 )
 
 add_header_library(
+  asinf16
+  HDRS
+    asinf16.h
+  DEPENDS
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
   erff
   HDRS
     erff.h
diff --git a/libc/src/__support/math/acos.h b/libc/src/__support/math/acos.h
index a52ead7..0e1e413 100644
--- a/libc/src/__support/math/acos.h
+++ b/libc/src/__support/math/acos.h
@@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr double acos(double x) {
+LIBC_INLINE static constexpr double acos(double x) {
   using DoubleDouble = fputil::DoubleDouble;
   using namespace asin_internal;
   using FPBits = fputil::FPBits<double>;
diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h
index 153087e..7a0c0e5 100644
--- a/libc/src/__support/math/acosf.h
+++ b/libc/src/__support/math/acosf.h
@@ -45,7 +45,7 @@ static constexpr fputil::ExceptValues<float, N_EXCEPTS> ACOSF_EXCEPTS = {{
 
 } // namespace acosf_internal
 
-static constexpr float acosf(float x) {
+LIBC_INLINE static constexpr float acosf(float x) {
   using namespace acosf_internal;
   using namespace inv_trigf_utils_internal;
   using FPBits = typename fputil::FPBits<float>;
diff --git a/libc/src/__support/math/acosf16.h b/libc/src/__support/math/acosf16.h
index 58d3761..3f0e002 100644
--- a/libc/src/__support/math/acosf16.h
+++ b/libc/src/__support/math/acosf16.h
@@ -26,7 +26,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 acosf16(float16 x) {
+LIBC_INLINE static constexpr float16 acosf16(float16 x) {
 
   // Generated by Sollya using the following command:
   // > round(pi/2, SG, RN);
diff --git a/libc/src/__support/math/acoshf.h b/libc/src/__support/math/acoshf.h
index f18f169..4e00311 100644
--- a/libc/src/__support/math/acoshf.h
+++ b/libc/src/__support/math/acoshf.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float acoshf(float x) {
+LIBC_INLINE static constexpr float acoshf(float x) {
   using namespace acoshf_internal;
   using FPBits_t = typename fputil::FPBits<float>;
   FPBits_t xbits(x);
diff --git a/libc/src/__support/math/acoshf16.h b/libc/src/__support/math/acoshf16.h
index a02b7b6..e5be2a8 100644
--- a/libc/src/__support/math/acoshf16.h
+++ b/libc/src/__support/math/acoshf16.h
@@ -28,7 +28,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 acoshf16(float16 x) {
+LIBC_INLINE static constexpr float16 acoshf16(float16 x) {
 
   using namespace acoshf_internal;
   constexpr size_t N_EXCEPTS = 2;
diff --git a/libc/src/__support/math/acospif16.h b/libc/src/__support/math/acospif16.h
index 5829aed..cf29c76 100644
--- a/libc/src/__support/math/acospif16.h
+++ b/libc/src/__support/math/acospif16.h
@@ -25,7 +25,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 acospif16(float16 x) {
+LIBC_INLINE static constexpr float16 acospif16(float16 x) {
   using FPBits = fputil::FPBits<float16>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/asin.h b/libc/src/__support/math/asin.h
index 84fc1cf..5e06d04 100644
--- a/libc/src/__support/math/asin.h
+++ b/libc/src/__support/math/asin.h
@@ -25,7 +25,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr double asin(double x) {
+LIBC_INLINE static constexpr double asin(double x) {
   using namespace asin_internal;
   using FPBits = fputil::FPBits<double>;
 
diff --git a/libc/src/__support/math/asinf16.h b/libc/src/__support/math/asinf16.h
new file mode 100644
index 0000000..3d032a4
--- /dev/null
+++ b/libc/src/__support/math/asinf16.h
@@ -0,0 +1,146 @@
+//===-- Implementation header for asinf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 asinf16(float16 x) {
+
+  // Generated by Sollya using the following command:
+  // > round(pi/2, D, RN);
+  constexpr float PI_2 = 0x1.921fb54442d18p0f;
+
+  using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  float xf = x;
+
+  // |x| > 0x1p0, |x| > 1, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
+    // asinf16(NaN) = NaN
+    if (xbits.is_nan()) {
+      if (xbits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // 1 < |x| <= +/-inf
+    fputil::raise_except_if_required(FE_INVALID);
+    fputil::set_errno_if_required(EDOM);
+
+    return FPBits::quiet_nan().get_val();
+  }
+
+  float xsq = xf * xf;
+
+  // |x| <= 0x1p-1, |x| <= 0.5
+  if (x_abs <= 0x3800) {
+    // asinf16(+/-0) = +/-0
+    if (LIBC_UNLIKELY(x_abs == 0))
+      return x;
+
+    // Exhaustive tests show that,
+    // for |x| <= 0x1.878p-9, when:
+    // x > 0, and rounding upward, or
+    // x < 0, and rounding downward, then,
+    // asin(x) = x * 2^-11 + x
+    // else, in other rounding modes,
+    // asin(x) = x
+    if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) {
+      int rounding = fputil::quick_get_round();
+
+      if ((xbits.is_pos() && rounding == FE_UPWARD) ||
+          (xbits.is_neg() && rounding == FE_DOWNWARD))
+        return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf));
+      return x;
+    }
+
+    // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
+    // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
+    float result =
+        fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
+                         0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
+    return fputil::cast<float16>(xf * result);
+  }
+
+  // When |x| > 0.5, assume that 0.5 < |x| <= 1,
+  //
+  // Step-by-step range-reduction proof:
+  // 1:  Let y = asin(x), such that, x = sin(y)
+  // 2:  From complimentary angle identity:
+  //       x = sin(y) = cos(pi/2 - y)
+  // 3:  Let z = pi/2 - y, such that x = cos(z)
+  // 4:  From double angle formula; cos(2A) = 1 - sin^2(A):
+  //       z = 2A, z/2 = A
+  //       cos(z) = 1 - 2 * sin^2(z/2)
+  // 5:  Make sin(z/2) subject of the formula:
+  //       sin(z/2) = sqrt((1 - cos(z))/2)
+  // 6:  Recall [3]; x = cos(z). Therefore:
+  //       sin(z/2) = sqrt((1 - x)/2)
+  // 7:  Let u = (1 - x)/2
+  // 8:  Therefore:
+  //       asin(sqrt(u)) = z/2
+  //       2 * asin(sqrt(u)) = z
+  // 9:  Recall [3], z = pi/2 - y. Therefore:
+  //       y = pi/2 - z
+  //       y = pi/2 - 2 * asin(sqrt(u))
+  // 10: Recall [1], y = asin(x). Therefore:
+  //       asin(x) = pi/2 - 2 * asin(sqrt(u))
+  //
+  // WHY?
+  // 11: Recall [7], u = (1 - x)/2
+  // 12: Since 0.5 < x <= 1, therefore:
+  //       0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
+  //
+  // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
+  // Step [10] as `sqrt(u)` is in range.
+
+  // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0
+  float xf_abs = (xf < 0 ? -xf : xf);
+  float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0);
+  float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
+  float u_sqrt = fputil::sqrt<float>(u);
+
+  // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
+  // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
+  float asin_sqrt_u =
+      u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
+                                0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
+
+  return fputil::cast<float16>(sign *
+                               fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF16_H
diff --git a/libc/src/__support/math/asinhf.h b/libc/src/__support/math/asinhf.h
new file mode 100644
index 0000000..1c08a6e
--- /dev/null
+++ b/libc/src/__support/math/asinhf.h
@@ -0,0 +1,125 @@
+//===-- Implementation header for asinf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float asinhf(float x) {
+  using namespace acoshf_internal;
+  using FPBits_t = typename fputil::FPBits<float>;
+  FPBits_t xbits(x);
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = xbits.abs().uintval();
+
+  // |x| <= 2^-3
+  if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
+    // |x| <= 2^-26
+    if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
+      return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
+                                    ? x
+                                    : (x - 0x1.5555555555555p-3 * x * x * x));
+    }
+
+    double x_d = x;
+    double x_sq = x_d * x_d;
+    // Generated by Sollya with:
+    // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
+    //                 [0, 2^-2]);
+    double p = fputil::polyeval(
+        x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
+        -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
+        0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
+    return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
+  }
+
+  const double SIGN[2] = {1.0, -1.0};
+  double x_sign = SIGN[x_u >> 31];
+  double x_d = x;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // Helper functions to set results for exceptional cases.
+  auto round_result_slightly_down = [x_sign](float r) -> float {
+    return fputil::multiply_add(static_cast<float>(x_sign), r,
+                                static_cast<float>(x_sign) * (-0x1.0p-24f));
+  };
+  auto round_result_slightly_up = [x_sign](float r) -> float {
+    return fputil::multiply_add(static_cast<float>(x_sign), r,
+                                static_cast<float>(x_sign) * 0x1.0p-24f);
+  };
+
+  if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
+    if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
+      if (xbits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits_t::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // Exceptional cases when x > 2^24.
+    switch (x_abs) {
+    case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
+      return round_result_slightly_down(0x1.1e0696p4f);
+    case 0x4c803f2c: // |x| = 0x1.007e58p26f
+      return round_result_slightly_down(0x1.2b786cp4f);
+    case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
+      return round_result_slightly_up(0x1.6fdd34p4f);
+    case 0x5c569e88: // |x| = 0x1.ad3d1p57f
+      return round_result_slightly_up(0x1.45c146p5f);
+    case 0x5e68984e: // |x| = 0x1.d1309cp61f
+      return round_result_slightly_up(0x1.5c9442p5f);
+    case 0x655890d3: // |x| = 0x1.b121a6p75f
+      return round_result_slightly_down(0x1.a9a3f2p5f);
+    case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
+      return round_result_slightly_up(0x1.af66cp5f);
+    case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
+      return round_result_slightly_down(0x1.08b512p6f);
+    case 0x7997f30a: // |x| = 0x1.2fe614p116f
+      return round_result_slightly_up(0x1.451436p6f);
+    }
+  } else {
+    // Exceptional cases when x < 2^24.
+    if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
+      // |x| = 0x1.575e4cp12f
+      return round_result_slightly_down(0x1.29becap3f);
+    }
+    if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
+      // |x| = 0x1.a5209p20f
+      return round_result_slightly_down(0x1.e1b92p3f);
+    }
+  }
+#else
+  if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
+    return x;
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // asinh(x) = log(x + sqrt(x^2 + 1))
+  return static_cast<float>(
+      x_sign * log_eval(fputil::multiply_add(
+                   x_d, x_sign,
+                   fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF_H
diff --git a/libc/src/__support/math/asinhf16.h b/libc/src/__support/math/asinhf16.h
new file mode 100644
index 0000000..3c5171e
--- /dev/null
+++ b/libc/src/__support/math/asinhf16.h
@@ -0,0 +1,121 @@
+//===-- Implementation header for asinhf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 asinhf16(float16 x) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr size_t N_EXCEPTS = 8;
+
+  constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+
+      // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ)
+      {0x3769, 0x372a, 1, 0, 1},
+      // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ)
+      {0x3b5b, 0x3a96, 1, 0, 0},
+      // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ)
+      {0x4b1f, 0x42b3, 1, 0, 0},
+      // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ)
+      {0x4c9b, 0x4336, 1, 0, 1},
+      // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ)
+      {0xb769, 0xb72a, 0, 1, 1},
+      // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ)
+      {0xbb5b, 0xba96, 0, 1, 0},
+      // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ)
+      {0xcb1f, 0xc2b3, 0, 1, 0},
+      // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ)
+      {0xcc9b, 0xc336, 0, 1, 1},
+  }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using namespace acoshf_internal;
+  using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+
+  if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
+    if (xbits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+
+    return x;
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // Handle exceptional values
+  if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  float xf = x;
+  const float SIGN[2] = {1.0f, -1.0f};
+  float x_sign = SIGN[x_u >> 15];
+
+  // |x| <= 0.25
+  if (LIBC_UNLIKELY(x_abs <= 0x3400)) {
+    // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain
+    // rounding types.
+    if (LIBC_UNLIKELY(x_abs < 0x29c6)) {
+      int rounding = fputil::quick_get_round();
+      if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0)
+        return fputil::cast<float16>(xf + 0x1p-24f);
+      if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0)
+        return fputil::cast<float16>(xf - 0x1p-24f);
+      return fputil::cast<float16>(xf);
+    }
+
+    float x_sq = xf * xf;
+    // Generated by Sollya with:
+    // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]);
+    // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f
+    // for better accuracy.
+    float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f,
+                               -0x1.6c53dep-5f, 0x1.bd114ep-5f);
+
+    return fputil::cast<float16>(xf * p);
+  }
+
+  // General case: asinh(x) = ln(x + sqrt(x^2 + 1))
+  float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f));
+  return fputil::cast<float16>(
+      x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term)));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINHF16_H
diff --git a/libc/src/__support/math/atan.h b/libc/src/__support/math/atan.h
new file mode 100644
index 0000000..62190b0
--- /dev/null
+++ b/libc/src/__support/math/atan.h
@@ -0,0 +1,189 @@
+//===-- Implementation header for atan --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// To compute atan(x), we divided it into the following cases:
+// * |x| < 2^-26:
+//      Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
+//      return atan(x) = x - sign(x) * epsilon.
+// * 2^-26 <= |x| < 1:
+//      We perform range reduction mod 2^-6 = 1/64 as follow:
+//      Let k = 2^(-6) * round(|x| * 2^6), then
+//        atan(x) = sign(x) * atan(|x|)
+//                = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
+//      We store atan(k) in a look up table, and perform intermediate steps in
+//      double-double.
+// * 1 < |x| < 2^53:
+//      First we perform the transformation y = 1/|x|:
+//        atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+//                = sign(x) * (pi/2 - atan(y)).
+//      Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
+//      previous case:
+//      Let k = 2^(-6) * round(y * 2^6), then
+//        atan(y) = atan(k) + atan((y - k) / (1 + y*k))
+//                = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
+//                = atan(k) + atan((1 - k*|x|) / (|x| + k)).
+// * |x| >= 2^53:
+//      Using the reciprocal transformation:
+//        atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
+//      We have that:
+//        atan(1/|x|) <= 1/|x| <= 2^-53,
+//      which is smaller than ulp(pi/2) / 2.
+//      So we can return:
+//        atan(x) = sign(x) * (pi/2 - epsilon)
+
+LIBC_INLINE static constexpr double atan(double x) {
+
+  using namespace atan_internal;
+  using FPBits = fputil::FPBits<double>;
+
+  constexpr double IS_NEG[2] = {1.0, -1.0};
+  constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
+                                      0x1.921fb54442d18p0};
+  constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
+                                       -0x1.921fb54442d18p0};
+
+  FPBits xbits(x);
+  bool x_sign = xbits.is_neg();
+  xbits = xbits.abs();
+  uint64_t x_abs = xbits.uintval();
+  int x_exp =
+      static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
+
+  // |x| < 1.
+  if (x_exp < 0) {
+    if (LIBC_UNLIKELY(x_exp < -26)) {
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      return x;
+#else
+      if (x == 0.0)
+        return x;
+      // |x| < 2^-26
+      return fputil::multiply_add(-0x1.0p-54, x, x);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    }
+
+    double x_d = xbits.get_val();
+    // k = 2^-6 * round(2^6 * |x|)
+    double k = fputil::nearest_integer(0x1.0p6 * x_d);
+    unsigned idx = static_cast<unsigned>(k);
+    k *= 0x1.0p-6;
+
+    // numerator = |x| - k
+    DoubleDouble num, den;
+    num.lo = 0.0;
+    num.hi = x_d - k;
+
+    // denominator = 1 - k * |x|
+    den.hi = fputil::multiply_add(x_d, k, 1.0);
+    DoubleDouble prod = fputil::exact_mult(x_d, k);
+    // Using Dekker's 2SUM algorithm to compute the lower part.
+    den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
+
+    // x_r = (|x| - k) / (1 + k * |x|)
+    DoubleDouble x_r = fputil::div(num, den);
+
+    // Approximating atan(x_r) using Taylor polynomial.
+    DoubleDouble p = atan_eval(x_r);
+
+    // atan(x) = sign(x) * (atan(k) + atan(x_r))
+    //         = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
+#else
+
+    DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
+    double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
+    double r = IS_NEG[x_sign] * (c0.hi + c1);
+
+    return r;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  }
+
+  // |x| >= 2^53 or x is NaN.
+  if (LIBC_UNLIKELY(x_exp >= 53)) {
+    // x is nan
+    if (xbits.is_nan()) {
+      if (xbits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+      return x;
+    }
+    // |x| >= 2^53
+    // atan(x) ~ sign(x) * pi/2.
+    if (x_exp >= 53)
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      return IS_NEG[x_sign] * PI_OVER_2.hi;
+#else
+      return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
+                                  IS_NEG[x_sign] * PI_OVER_2.lo);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  }
+
+  double x_d = xbits.get_val();
+  double y = 1.0 / x_d;
+
+  // k = 2^-6 * round(2^6 / |x|)
+  double k = fputil::nearest_integer(0x1.0p6 * y);
+  unsigned idx = static_cast<unsigned>(k);
+  k *= 0x1.0p-6;
+
+  // denominator = |x| + k
+  DoubleDouble den = fputil::exact_add(x_d, k);
+  // numerator = 1 - k * |x|
+  DoubleDouble num;
+  num.hi = fputil::multiply_add(-x_d, k, 1.0);
+  DoubleDouble prod = fputil::exact_mult(x_d, k);
+  // Using Dekker's 2SUM algorithm to compute the lower part.
+  num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
+
+  // x_r = (1/|x| - k) / (1 - k/|x|)
+  //     = (1 - k * |x|) / (|x| - k)
+  DoubleDouble x_r = fputil::div(num, den);
+
+  // Approximating atan(x_r) using Taylor polynomial.
+  DoubleDouble p = atan_eval(x_r);
+
+  // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
+  //         = sign(x) * (pi/2 - atan(k) - atan(x_r))
+  //         = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
+  return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
+#else
+  DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
+  DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
+  double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
+
+  double r = IS_NEG[!x_sign] * (c1.hi + c2);
+
+  return r;
+#endif
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
diff --git a/libc/src/math/generic/atan_utils.h b/libc/src/__support/math/atan_utils.h
index 24c7271..9e8d7d6 100644
--- a/libc/src/math/generic/atan_utils.h
+++ b/libc/src/__support/math/atan_utils.h
@@ -18,7 +18,7 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-namespace {
+namespace atan_internal {
 
 using DoubleDouble = fputil::DoubleDouble;
 using Float128 = fputil::DyadicFloat<128>;
@@ -29,7 +29,7 @@ using Float128 = fputil::DyadicFloat<128>;
 //     b = round(atan(i/64) - a, D, RN);
 //     print("{", b, ",", a, "},");
 //   };
-constexpr DoubleDouble ATAN_I[65] = {
+static constexpr DoubleDouble ATAN_I[65] = {
     {0.0, 0.0},
     {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
     {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
@@ -110,7 +110,8 @@ constexpr DoubleDouble ATAN_I[65] = {
 //        + x_lo * (1 - x_hi^2 + x_hi^4)
 // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by:
 //   |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66.
-[[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) {
+[[maybe_unused]] LIBC_INLINE static DoubleDouble
+atan_eval(const DoubleDouble &x) {
   DoubleDouble p;
   p.hi = x.hi;
   double x_hi_sq = x.hi * x.hi;
@@ -142,7 +143,7 @@ constexpr DoubleDouble ATAN_I[65] = {
 //     b = 2^ll + a;
 //     print("{Sign::POS, ", 2^(ll - 128), ",", b, "},");
 // };
-constexpr Float128 ATAN_I_F128[65] = {
+static constexpr Float128 ATAN_I_F128[65] = {
     {Sign::POS, 0, 0_u128},
     {Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128},
     {Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128},
@@ -215,7 +216,7 @@ constexpr Float128 ATAN_I_F128[65] = {
 //                 [0, 2^-7]);
 // > dirtyinfnorm(atan(x) - P, [0, 2^-7]);
 // 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122
-constexpr Float128 ATAN_POLY_F128[] = {
+static constexpr Float128 ATAN_POLY_F128[] = {
     {Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128},
     {Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128},
     {Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128},
@@ -225,7 +226,8 @@ constexpr Float128 ATAN_POLY_F128[] = {
 };
 
 // Approximate atan for |x| <= 2^-7.
-[[maybe_unused]] Float128 atan_eval(const Float128 &x) {
+[[maybe_unused]] LIBC_INLINE static constexpr Float128
+atan_eval(const Float128 &x) {
   Float128 x_sq = fputil::quick_mul(x, x);
   Float128 x3 = fputil::quick_mul(x, x_sq);
   Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1],
@@ -234,7 +236,7 @@ constexpr Float128 ATAN_POLY_F128[] = {
   return fputil::multiply_add(x3, p, x);
 }
 
-} // anonymous namespace
+} // namespace atan_internal
 
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h
new file mode 100644
index 0000000..92799dc
--- /dev/null
+++ b/libc/src/__support/math/atanf.h
@@ -0,0 +1,129 @@
+//===-- Implementation header for atanf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float atanf(float x) {
+  using namespace inv_trigf_utils_internal;
+  using FPBits = typename fputil::FPBits<float>;
+
+  constexpr double FINAL_SIGN[2] = {1.0, -1.0};
+  constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
+                                          -0x1.921fb54442d18p0};
+
+  FPBits x_bits(x);
+  Sign sign = x_bits.sign();
+  x_bits.set_sign(Sign::POS);
+  uint32_t x_abs = x_bits.uintval();
+
+  // x is inf or nan, |x| < 2^-4 or |x|= > 16.
+  if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
+    double x_d = static_cast<double>(x);
+    double const_term = 0.0;
+    if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
+      // atan(+-Inf) = +-pi/2.
+      if (x_bits.is_inf()) {
+        volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()];
+        return static_cast<float>(sign_pi_over_2);
+      }
+      if (x_bits.is_nan())
+        return x;
+      // x >= 16
+      x_d = -1.0 / x_d;
+      const_term = SIGNED_PI_OVER_2[sign.is_neg()];
+    }
+    // 0 <= x < 1/16;
+    if (LIBC_UNLIKELY(x_bits.is_zero()))
+      return x;
+    // x <= 2^-12;
+    if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+      return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+      double x_d = static_cast<double>(x);
+      return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+    }
+    // Use Taylor polynomial:
+    //   atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
+    constexpr double ATAN_TAYLOR[6] = {
+        0x1.0000000000000p+0,  -0x1.5555555555555p-2, 0x1.999999999999ap-3,
+        -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4,  -0x1.745d1745d1746p-4,
+    };
+    double x2 = x_d * x_d;
+    double x4 = x2 * x2;
+    double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+    double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+    double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]);
+    double p = fputil::polyeval(x4, c0, c1, c2);
+    double r = fputil::multiply_add(x_d, p, const_term);
+    return static_cast<float>(r);
+  }
+
+  // Range reduction steps:
+  // 1)  atan(x) = sign(x) * atan(|x|)
+  // 2)  If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+  // 3)  For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
+  // 4)  Then we use polynomial approximation:
+  //   atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
+  //           = P(x - k/16)
+  double x_d = 0, const_term = 0, final_sign = 0;
+  int idx = 0;
+
+  if (x_abs > 0x3f80'0000U) {
+    // |x| > 1, we need to invert x, so we will perform range reduction in
+    // double precision.
+    x_d = 1.0 / static_cast<double>(x_bits.get_val());
+    double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
+    x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
+    idx = static_cast<int>(k_d);
+    final_sign = FINAL_SIGN[sign.is_pos()];
+    // Adjust constant term of the polynomial by +- pi/2.
+    const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
+                                      SIGNED_PI_OVER_2[sign.is_neg()]);
+  } else {
+    // Exceptional value:
+    if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4
+      return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f)
+                           : fputil::round_result_slightly_up(-0x1.1a6386p-4f);
+    }
+    // Perform range reduction in single precision.
+    float x_f = x_bits.get_val();
+    float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
+    x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
+    x_d = static_cast<double>(x_f);
+    idx = static_cast<int>(k_f);
+    final_sign = FINAL_SIGN[sign.is_neg()];
+    const_term = final_sign * ATAN_COEFFS[idx][0];
+  }
+
+  double p = atan_eval(x_d, idx);
+  double r = fputil::multiply_add(final_sign * x_d, p, const_term);
+
+  return static_cast<float>(r);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
diff --git a/libc/src/__support/math/erff.h b/libc/src/__support/math/erff.h
index e54ec77..b81be30 100644
--- a/libc/src/__support/math/erff.h
+++ b/libc/src/__support/math/erff.h
@@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float erff(float x) {
+LIBC_INLINE static constexpr float erff(float x) {
 
   // Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by
   // Sollya with: > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h
index 14f0592..83638e8 100644
--- a/libc/src/__support/math/exp.h
+++ b/libc/src/__support/math/exp.h
@@ -67,7 +67,7 @@ namespace {
 // Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
 // For |dx| < 2^-13 + 2^-30:
 //   | output - expm1(dx) / dx | < 2^-51.
-static double poly_approx_d(double dx) {
+LIBC_INLINE static double poly_approx_d(double dx) {
   // dx^2
   double dx2 = dx * dx;
   // c0 = 1 + dx / 2
@@ -85,7 +85,7 @@ static double poly_approx_d(double dx) {
 // Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
 // For |dx| < 2^-13 + 2^-30:
 //   | output - exp(dx) | < 2^-101
-static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
+LIBC_INLINE static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
   // Taylor polynomial.
   constexpr DoubleDouble COEFFS[] = {
       {0, 0x1p0},                                      // 1
@@ -106,7 +106,7 @@ static DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
 // Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^7 / 5040
 // For |dx| < 2^-13 + 2^-30:
 //   | output - exp(dx) | < 2^-126.
-static Float128 poly_approx_f128(const Float128 &dx) {
+LIBC_INLINE static Float128 poly_approx_f128(const Float128 &dx) {
   constexpr Float128 COEFFS_128[]{
       {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
       {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
@@ -127,7 +127,7 @@ static Float128 poly_approx_f128(const Float128 &dx) {
 // Compute exp(x) using 128-bit precision.
 // TODO(lntue): investigate triple-double precision implementation for this
 // step.
-static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
+LIBC_INLINE static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
   // Recalculate dx:
 
   double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -160,8 +160,8 @@ static Float128 exp_f128(double x, double kd, int idx1, int idx2) {
 }
 
 // Compute exp(x) with double-double precision.
-static DoubleDouble exp_double_double(double x, double kd,
-                                      const DoubleDouble &exp_mid) {
+LIBC_INLINE static DoubleDouble exp_double_double(double x, double kd,
+                                                  const DoubleDouble &exp_mid) {
   // Recalculate dx:
   //   dx = x - k * 2^-12 * log(2)
   double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -184,7 +184,7 @@ static DoubleDouble exp_double_double(double x, double kd,
 
 // Check for exceptional cases when
 // |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
-static double set_exceptional(double x) {
+LIBC_INLINE static double set_exceptional(double x) {
   using FPBits = typename fputil::FPBits<double>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
index fa60e40c..12a09d7 100644
--- a/libc/src/__support/math/exp10.h
+++ b/libc/src/__support/math/exp10.h
@@ -83,7 +83,8 @@ LIBC_INLINE static double exp10_poly_approx_d(double dx) {
 // > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
 // Error bounds:
 //   | output - 10^(dx) | < 2^-101
-static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) {
+LIBC_INLINE static constexpr DoubleDouble
+exp10_poly_approx_dd(const DoubleDouble &dx) {
   // Taylor polynomial.
   constexpr DoubleDouble COEFFS[] = {
       {0, 0x1p0},
@@ -105,7 +106,8 @@ static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) {
 // Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
 // For |dx| < 2^-14:
 //   | output - 10^dx | < 1.5 * 2^-124.
-static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) {
+LIBC_INLINE static constexpr Float128
+exp10_poly_approx_f128(const Float128 &dx) {
   constexpr Float128 COEFFS_128[]{
       {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
       {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128},
@@ -126,7 +128,8 @@ static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) {
 // Compute 10^(x) using 128-bit precision.
 // TODO(lntue): investigate triple-double precision implementation for this
 // step.
-static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
+LIBC_INLINE static Float128 exp10_f128(double x, double kd, int idx1,
+                                       int idx2) {
   double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
   double t2 = kd * MLOG10_2_EXP2_M12_MID_32;                     // exact
   double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
@@ -157,8 +160,8 @@ static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
 }
 
 // Compute 10^x with double-double precision.
-static DoubleDouble exp10_double_double(double x, double kd,
-                                        const DoubleDouble &exp_mid) {
+LIBC_INLINE static DoubleDouble
+exp10_double_double(double x, double kd, const DoubleDouble &exp_mid) {
   // Recalculate dx:
   //   dx = x - k * 2^-12 * log10(2)
   double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
@@ -180,7 +183,7 @@ static DoubleDouble exp10_double_double(double x, double kd,
 #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // When output is denormal.
-static double exp10_denorm(double x) {
+LIBC_INLINE static double exp10_denorm(double x) {
   // Range reduction.
   double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
   int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
@@ -234,7 +237,7 @@ static double exp10_denorm(double x) {
 //  * x >= log10(2^1024)
 //  * x <= log10(2^-1022)
 //  * x is inf or nan
-static constexpr double exp10_set_exceptional(double x) {
+LIBC_INLINE static constexpr double exp10_set_exceptional(double x) {
   using FPBits = typename fputil::FPBits<double>;
   FPBits xbits(x);
 
@@ -285,7 +288,7 @@ static constexpr double exp10_set_exceptional(double x) {
 
 namespace math {
 
-static constexpr double exp10(double x) {
+LIBC_INLINE static constexpr double exp10(double x) {
   using FPBits = typename fputil::FPBits<double>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/exp10f.h b/libc/src/__support/math/exp10f.h
index 807b4f0..76ae197 100644
--- a/libc/src/__support/math/exp10f.h
+++ b/libc/src/__support/math/exp10f.h
@@ -20,7 +20,7 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace math {
 
-static constexpr float exp10f(float x) {
+LIBC_INLINE static constexpr float exp10f(float x) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/exp10f16.h b/libc/src/__support/math/exp10f16.h
index 0d8b125..3eca867 100644
--- a/libc/src/__support/math/exp10f16.h
+++ b/libc/src/__support/math/exp10f16.h
@@ -57,7 +57,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS>
     }};
 #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
-static constexpr float16 exp10f16(float16 x) {
+LIBC_INLINE static constexpr float16 exp10f16(float16 x) {
   using FPBits = fputil::FPBits<float16>;
   FPBits x_bits(x);
 
diff --git a/libc/src/__support/math/exp10f_utils.h b/libc/src/__support/math/exp10f_utils.h
index c30def9..010a2f1 100644
--- a/libc/src/__support/math/exp10f_utils.h
+++ b/libc/src/__support/math/exp10f_utils.h
@@ -89,7 +89,7 @@ struct Exp10Base : public ExpBase {
                                        0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
                                        0x1.1429e74a98f43p-1};
 
-  static double powb_lo(double dx) {
+  LIBC_INLINE static double powb_lo(double dx) {
     using fputil::multiply_add;
     double dx2 = dx * dx;
     // c0 = 1 + COEFFS[0] * dx
diff --git a/libc/src/__support/math/exp_utils.h b/libc/src/__support/math/exp_utils.h
index fc9ab10..ef408ed 100644
--- a/libc/src/__support/math/exp_utils.h
+++ b/libc/src/__support/math/exp_utils.h
@@ -22,8 +22,8 @@ namespace LIBC_NAMESPACE_DECL {
 // So if we scale x up by 2^1022, we can use
 //   double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
 template <bool SKIP_ZIV_TEST = false>
-static constexpr cpp::optional<double> ziv_test_denorm(int hi, double mid,
-                                                       double lo, double err) {
+LIBC_INLINE static constexpr cpp::optional<double>
+ziv_test_denorm(int hi, double mid, double lo, double err) {
   using FPBits = typename fputil::FPBits<double>;
 
   // Scaling factor = 1/(min normal number) = 2^1022
diff --git a/libc/src/__support/math/expf.h b/libc/src/__support/math/expf.h
index 88c1514..f7e11be 100644
--- a/libc/src/__support/math/expf.h
+++ b/libc/src/__support/math/expf.h
@@ -24,7 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float expf(float x) {
+LIBC_INLINE static constexpr float expf(float x) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/expf16.h b/libc/src/__support/math/expf16.h
index ded28c7..14302a7 100644
--- a/libc/src/__support/math/expf16.h
+++ b/libc/src/__support/math/expf16.h
@@ -31,7 +31,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 expf16(float16 x) {
+LIBC_INLINE static constexpr float16 expf16(float16 x) {
 #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
   constexpr fputil::ExceptValues<float16, 2> EXPF16_EXCEPTS_LO = {{
       // (input, RZ output, RU offset, RD offset, RN offset)
diff --git a/libc/src/__support/math/expf16_utils.h b/libc/src/__support/math/expf16_utils.h
index 8a2fc94..4204dab7 100644
--- a/libc/src/__support/math/expf16_utils.h
+++ b/libc/src/__support/math/expf16_utils.h
@@ -47,7 +47,8 @@ struct ExpRangeReduction {
   float exp_lo;
 };
 
-[[maybe_unused]] static ExpRangeReduction exp_range_reduction(float16 x) {
+[[maybe_unused]] LIBC_INLINE static ExpRangeReduction
+exp_range_reduction(float16 x) {
   // For -18 < x < 12, to compute exp(x), we perform the following range
   // reduction: find hi, mid, lo, such that:
   //   x = hi + mid + lo, in which
diff --git a/libc/src/__support/math/frexpf.h b/libc/src/__support/math/frexpf.h
index 4d2f494..7834a12 100644
--- a/libc/src/__support/math/frexpf.h
+++ b/libc/src/__support/math/frexpf.h
@@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float frexpf(float x, int *exp) {
+LIBC_INLINE static constexpr float frexpf(float x, int *exp) {
   return fputil::frexp(x, *exp);
 }
 
diff --git a/libc/src/__support/math/frexpf128.h b/libc/src/__support/math/frexpf128.h
index 2fd5bc4..5218b26 100644
--- a/libc/src/__support/math/frexpf128.h
+++ b/libc/src/__support/math/frexpf128.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float128 frexpf128(float128 x, int *exp) {
+LIBC_INLINE static constexpr float128 frexpf128(float128 x, int *exp) {
   return fputil::frexp(x, *exp);
 }
 
diff --git a/libc/src/__support/math/frexpf16.h b/libc/src/__support/math/frexpf16.h
index 8deeba0..530b61a 100644
--- a/libc/src/__support/math/frexpf16.h
+++ b/libc/src/__support/math/frexpf16.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 frexpf16(float16 x, int *exp) {
+LIBC_INLINE static constexpr float16 frexpf16(float16 x, int *exp) {
   return fputil::frexp(x, *exp);
 }
 
diff --git a/libc/src/__support/math/ldexpf.h b/libc/src/__support/math/ldexpf.h
index 3a5ec1d..9ef5d96 100644
--- a/libc/src/__support/math/ldexpf.h
+++ b/libc/src/__support/math/ldexpf.h
@@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float ldexpf(float x, int exp) {
+LIBC_INLINE static constexpr float ldexpf(float x, int exp) {
   return fputil::ldexp(x, exp);
 }
 
diff --git a/libc/src/__support/math/ldexpf128.h b/libc/src/__support/math/ldexpf128.h
index 3625830..4fba20c 100644
--- a/libc/src/__support/math/ldexpf128.h
+++ b/libc/src/__support/math/ldexpf128.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float128 ldexpf128(float128 x, int exp) {
+LIBC_INLINE static constexpr float128 ldexpf128(float128 x, int exp) {
   return fputil::ldexp(x, exp);
 }
 
diff --git a/libc/src/__support/math/ldexpf16.h b/libc/src/__support/math/ldexpf16.h
index fbead87..d978d22 100644
--- a/libc/src/__support/math/ldexpf16.h
+++ b/libc/src/__support/math/ldexpf16.h
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-static constexpr float16 ldexpf16(float16 x, int exp) {
+LIBC_INLINE static constexpr float16 ldexpf16(float16 x, int exp) {
   return fputil::ldexp(x, exp);
 }
 
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index cbb7886..14aaad2 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -124,3 +124,14 @@ add_object_library(
     libc.src.__support.threads.linux.raw_mutex
     libc.src.__support.CPP.mutex
 )
+
+add_object_library(
+  barrier
+  HDRS
+    barrier.h
+  SRCS
+    barrier.cpp
+  DEPENDS
+    libc.src.__support.threads.CndVar
+    libc.src.__support.threads.mutex
+)
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
new file mode 100644
index 0000000..cf7207b5
--- /dev/null
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -0,0 +1,85 @@
+//===-- Implementation of Barrier class ------------- ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/threads/linux/barrier.h"
+#include "hdr/errno_macros.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int Barrier::init(Barrier *b,
+                  [[maybe_unused]] const pthread_barrierattr_t *attr,
+                  unsigned count) {
+  LIBC_ASSERT(attr == nullptr); // TODO implement barrierattr
+  if (count == 0)
+    return EINVAL;
+
+  b->expected = count;
+  b->waiting = 0;
+  b->blocking = true;
+
+  int err;
+  err = CndVar::init(&b->entering);
+  if (err != 0)
+    return err;
+
+  err = CndVar::init(&b->exiting);
+  if (err != 0)
+    return err;
+
+  auto mutex_err = Mutex::init(&b->m, false, false, false, false);
+  if (mutex_err != MutexError::NONE)
+    return EAGAIN;
+
+  return 0;
+}
+
+int Barrier::wait() {
+  m.lock();
+
+  // if the barrier is emptying out threads, wait until it finishes
+  while (!blocking)
+    entering.wait(&m);
+  waiting++;
+
+  if (waiting < expected) {
+    // block threads until waiting = expected
+    while (blocking)
+      exiting.wait(&m);
+  } else {
+    // this is the last thread to call wait(), so lets wake everyone up
+    blocking = false;
+    exiting.broadcast();
+  }
+  waiting--;
+
+  if (waiting == 0) {
+    // all threads have exited the barrier, let's let the ones waiting to enter
+    // continue
+    blocking = true;
+    entering.broadcast();
+    m.unlock();
+
+    // POSIX dictates that the barrier should return a special value to just one
+    // thread, so we can arbitrarily choose this thread
+    return PTHREAD_BARRIER_SERIAL_THREAD;
+  }
+  m.unlock();
+
+  return 0;
+}
+
+int Barrier::destroy(Barrier *b) {
+  CndVar::destroy(&b->entering);
+  CndVar::destroy(&b->exiting);
+  Mutex::destroy(&b->m);
+  return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
new file mode 100644
index 0000000..f0655bf
--- /dev/null
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -0,0 +1,50 @@
+//===-- A platform independent abstraction layer for barriers --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
+#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
+
+#include "hdr/pthread_macros.h"
+#include "include/llvm-libc-types/pthread_barrier_t.h"
+#include "include/llvm-libc-types/pthread_barrierattr_t.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// NOTE: if the size of this class changes, you must ensure that the size of
+// pthread_barrier_t (found in include/llvm-libc/types/pthread_barrier_t.h) is
+// the same size
+class Barrier {
+private:
+  unsigned expected;
+  unsigned waiting;
+  bool blocking;
+  CndVar entering;
+  CndVar exiting;
+  Mutex m;
+
+public:
+  static int init(Barrier *b, const pthread_barrierattr_t *attr,
+                  unsigned count);
+  static int destroy(Barrier *b);
+  int wait();
+};
+
+static_assert(
+    sizeof(Barrier) == sizeof(pthread_barrier_t),
+    "The public pthread_barrier_t type cannot accommodate the internal "
+    "barrier type.");
+
+static_assert(alignof(Barrier) == alignof(pthread_barrier_t),
+              "The public pthread_barrier_t type has a different alignment "
+              "than the internal barrier type.");
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_BARRIER_H
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index e363ad3..aed1d53 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -60,14 +60,31 @@ add_object_library(
   SRCS
     mbrtowc.cpp
   DEPENDS
-  libc.hdr.errno_macros
-  libc.hdr.types.wchar_t
-  libc.hdr.types.size_t
-  libc.src.__support.common
-  libc.src.__support.error_or
-  libc.src.__support.macros.config
-  .character_converter
-  .mbstate
+    libc.hdr.errno_macros
+    libc.hdr.types.wchar_t
+    libc.hdr.types.size_t
+    libc.src.__support.common
+    libc.src.__support.error_or
+    libc.src.__support.macros.config
+    .character_converter
+    .mbstate
+)
+
+add_header_library(
+  mbsnrtowcs
+  HDRS
+    mbsnrtowcs.h
+  DEPENDS
+    libc.hdr.errno_macros
+    libc.hdr.types.wchar_t
+    libc.hdr.types.size_t
+    libc.src.__support.common
+    libc.src.__support.error_or
+    libc.src.__support.macros.config
+    libc.src.__support.macros.null_check
+    .character_converter
+    .mbstate
+    .string_converter
 )
 
 add_header_library(
diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h
new file mode 100644
index 0000000..54e3152
--- /dev/null
+++ b/libc/src/__support/wchar/mbsnrtowcs.h
@@ -0,0 +1,66 @@
+//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
+#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
+                                              const char **__restrict src,
+                                              size_t nmc, size_t len,
+                                              mbstate *__restrict ps) {
+  LIBC_CRASH_ON_NULLPTR(src);
+  // Checking if mbstate is valid
+  CharacterConverter char_conv(ps);
+  if (!char_conv.isValidState())
+    return Error(EINVAL);
+
+  StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
+                                    len, nmc);
+  size_t dst_idx = 0;
+  ErrorOr<char32_t> converted = str_conv.popUTF32();
+  while (converted.has_value()) {
+    if (dst != nullptr)
+      dst[dst_idx] = converted.value();
+    // null terminator should not be counted in return value
+    if (converted.value() == L'\0') {
+      if (dst != nullptr)
+        *src = nullptr;
+      return dst_idx;
+    }
+    dst_idx++;
+    converted = str_conv.popUTF32();
+  }
+
+  if (converted.error() == -1) { // if we hit conversion limit
+    if (dst != nullptr)
+      *src += str_conv.getSourceIndex();
+    return dst_idx;
+  }
+
+  return Error(converted.error());
+}
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 455ad34..0522e0e 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -189,6 +189,7 @@ add_math_entrypoint_object(fabsf)
 add_math_entrypoint_object(fabsl)
 add_math_entrypoint_object(fabsf16)
 add_math_entrypoint_object(fabsf128)
+add_math_entrypoint_object(fabsbf16)
 
 add_math_entrypoint_object(fadd)
 add_math_entrypoint_object(faddl)
diff --git a/libc/src/math/fabsbf16.h b/libc/src/math/fabsbf16.h
new file mode 100644
index 0000000..4993668
--- /dev/null
+++ b/libc/src/math/fabsbf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for fabsbf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FABSBF16_H
+#define LLVM_LIBC_SRC_MATH_FABSBF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bfloat16 fabsbf16(bfloat16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_FABSBF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index ecf0967..701dc4b 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -698,6 +698,19 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  fabsbf16
+  SRCS
+    fabsbf16.cpp
+  HDRS
+    ../fabsbf16.h
+  DEPENDS
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.bfloat16
+    libc.src.__support.macros.config
+    libc.src.__support.macros.properties.types
+)
+
+add_entrypoint_object(
   fadd
   SRCS
     fadd.cpp
@@ -3889,12 +3902,7 @@ add_entrypoint_object(
   HDRS
     ../asinhf.h
   DEPENDS
-    .explogxf
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
+    libc.src.__support.math.asinhf
 )
 
 add_entrypoint_object(
@@ -3904,18 +3912,7 @@ add_entrypoint_object(
   HDRS
     ../asinhf16.h
   DEPENDS
-    .explogxf
-    libc.hdr.fenv_macros
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.types
+    libc.src.__support.math.asinhf16
 )
 
 add_entrypoint_object(
@@ -3968,16 +3965,7 @@ add_entrypoint_object(
   HDRS
     ../asinf16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.types
+    libc.src.__support.math.asinf16
 )
 
 add_entrypoint_object(
@@ -4032,19 +4020,6 @@ add_entrypoint_object(
     libc.src.errno.errno
 )
 
-add_header_library(
-  atan_utils
-  HDRS
-    atan_utils.h
-  DEPENDS
-    libc.src.__support.integer_literals
-    libc.src.__support.FPUtil.double_double
-    libc.src.__support.FPUtil.dyadic_float
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.macros.optimization
-)
-
 add_entrypoint_object(
   atanf
   SRCS
@@ -4052,14 +4027,7 @@ add_entrypoint_object(
   HDRS
     ../atanf.h
   DEPENDS
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.inv_trigf_utils
+    libc.src.__support.math.atanf
 )
 
 add_entrypoint_object(
@@ -4091,13 +4059,7 @@ add_entrypoint_object(
   COMPILE_OPTIONS
     -O3
   DEPENDS
-    .atan_utils
-    libc.src.__support.FPUtil.double_double
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.macros.optimization
+    libc.src.__support.math.atan
 )
 
 add_entrypoint_object(
@@ -4127,7 +4089,7 @@ add_entrypoint_object(
   HDRS
     ../atan2.h
   DEPENDS
-    .atan_utils
+    libc.src.__support.math.atan_utils
     libc.src.__support.FPUtil.double_double
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
@@ -4153,7 +4115,7 @@ add_entrypoint_object(
   HDRS
     ../atan2f128.h
   DEPENDS
-    .atan_utils
+    libc.src.__support.math.atan_utils
     libc.src.__support.integer_literals
     libc.src.__support.uint128
     libc.src.__support.FPUtil.dyadic_float
diff --git a/libc/src/math/generic/asinf16.cpp b/libc/src/math/generic/asinf16.cpp
index 518c384..af8dbfe 100644
--- a/libc/src/math/generic/asinf16.cpp
+++ b/libc/src/math/generic/asinf16.cpp
@@ -7,127 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/asinf16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/asinf16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-// Generated by Sollya using the following command:
-// > round(pi/2, D, RN);
-static constexpr float PI_2 = 0x1.921fb54442d18p0f;
-
-LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) {
-  using FPBits = fputil::FPBits<float16>;
-  FPBits xbits(x);
-
-  uint16_t x_u = xbits.uintval();
-  uint16_t x_abs = x_u & 0x7fff;
-  float xf = x;
-
-  // |x| > 0x1p0, |x| > 1, or x is NaN.
-  if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
-    // asinf16(NaN) = NaN
-    if (xbits.is_nan()) {
-      if (xbits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // 1 < |x| <= +/-inf
-    fputil::raise_except_if_required(FE_INVALID);
-    fputil::set_errno_if_required(EDOM);
-
-    return FPBits::quiet_nan().get_val();
-  }
-
-  float xsq = xf * xf;
-
-  // |x| <= 0x1p-1, |x| <= 0.5
-  if (x_abs <= 0x3800) {
-    // asinf16(+/-0) = +/-0
-    if (LIBC_UNLIKELY(x_abs == 0))
-      return x;
-
-    // Exhaustive tests show that,
-    // for |x| <= 0x1.878p-9, when:
-    // x > 0, and rounding upward, or
-    // x < 0, and rounding downward, then,
-    // asin(x) = x * 2^-11 + x
-    // else, in other rounding modes,
-    // asin(x) = x
-    if (LIBC_UNLIKELY(x_abs <= 0x1a1e)) {
-      int rounding = fputil::quick_get_round();
-
-      if ((xbits.is_pos() && rounding == FE_UPWARD) ||
-          (xbits.is_neg() && rounding == FE_DOWNWARD))
-        return fputil::cast<float16>(fputil::multiply_add(xf, 0x1.0p-11f, xf));
-      return x;
-    }
-
-    // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
-    // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
-    float result =
-        fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
-                         0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
-    return fputil::cast<float16>(xf * result);
-  }
-
-  // When |x| > 0.5, assume that 0.5 < |x| <= 1,
-  //
-  // Step-by-step range-reduction proof:
-  // 1:  Let y = asin(x), such that, x = sin(y)
-  // 2:  From complimentary angle identity:
-  //       x = sin(y) = cos(pi/2 - y)
-  // 3:  Let z = pi/2 - y, such that x = cos(z)
-  // 4:  From double angle formula; cos(2A) = 1 - sin^2(A):
-  //       z = 2A, z/2 = A
-  //       cos(z) = 1 - 2 * sin^2(z/2)
-  // 5:  Make sin(z/2) subject of the formula:
-  //       sin(z/2) = sqrt((1 - cos(z))/2)
-  // 6:  Recall [3]; x = cos(z). Therefore:
-  //       sin(z/2) = sqrt((1 - x)/2)
-  // 7:  Let u = (1 - x)/2
-  // 8:  Therefore:
-  //       asin(sqrt(u)) = z/2
-  //       2 * asin(sqrt(u)) = z
-  // 9:  Recall [3], z = pi/2 - y. Therefore:
-  //       y = pi/2 - z
-  //       y = pi/2 - 2 * asin(sqrt(u))
-  // 10: Recall [1], y = asin(x). Therefore:
-  //       asin(x) = pi/2 - 2 * asin(sqrt(u))
-  //
-  // WHY?
-  // 11: Recall [7], u = (1 - x)/2
-  // 12: Since 0.5 < x <= 1, therefore:
-  //       0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
-  //
-  // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
-  // Step [10] as `sqrt(u)` is in range.
-
-  // 0x1p-1 < |x| <= 0x1p0, 0.5 < |x| <= 1.0
-  float xf_abs = (xf < 0 ? -xf : xf);
-  float sign = (xbits.uintval() >> 15 == 1 ? -1.0 : 1.0);
-  float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
-  float u_sqrt = fputil::sqrt<float>(u);
-
-  // Degree-6 minimax odd polynomial of asin(x) generated by Sollya with:
-  // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
-  float asin_sqrt_u =
-      u_sqrt * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
-                                0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
-
-  return fputil::cast<float16>(sign *
-                               fputil::multiply_add(-2.0f, asin_sqrt_u, PI_2));
-}
+LLVM_LIBC_FUNCTION(float16, asinf16, (float16 x)) { return math::asinf16(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp
index 3aed3bc..45023c8 100644
--- a/libc/src/math/generic/asinhf.cpp
+++ b/libc/src/math/generic/asinhf.cpp
@@ -7,112 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/asinhf.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/generic/common_constants.h"
-#include "src/math/generic/explogxf.h"
+#include "src/__support/math/asinhf.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, asinhf, (float x)) {
-  using namespace acoshf_internal;
-  using FPBits_t = typename fputil::FPBits<float>;
-  FPBits_t xbits(x);
-  uint32_t x_u = xbits.uintval();
-  uint32_t x_abs = xbits.abs().uintval();
-
-  // |x| <= 2^-3
-  if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) {
-    // |x| <= 2^-26
-    if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
-      return static_cast<float>(LIBC_UNLIKELY(x_abs == 0)
-                                    ? x
-                                    : (x - 0x1.5555555555555p-3 * x * x * x));
-    }
-
-    double x_d = x;
-    double x_sq = x_d * x_d;
-    // Generated by Sollya with:
-    // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16|], [|D...|],
-    //                 [0, 2^-2]);
-    double p = fputil::polyeval(
-        x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4,
-        -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6,
-        0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7);
-    return static_cast<float>(fputil::multiply_add(x_d, p, x_d));
-  }
-
-  const double SIGN[2] = {1.0, -1.0};
-  double x_sign = SIGN[x_u >> 31];
-  double x_d = x;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  // Helper functions to set results for exceptional cases.
-  auto round_result_slightly_down = [x_sign](float r) -> float {
-    return fputil::multiply_add(static_cast<float>(x_sign), r,
-                                static_cast<float>(x_sign) * (-0x1.0p-24f));
-  };
-  auto round_result_slightly_up = [x_sign](float r) -> float {
-    return fputil::multiply_add(static_cast<float>(x_sign), r,
-                                static_cast<float>(x_sign) * 0x1.0p-24f);
-  };
-
-  if (LIBC_UNLIKELY(x_abs >= 0x4bdd'65a5U)) {
-    if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
-      if (xbits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits_t::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // Exceptional cases when x > 2^24.
-    switch (x_abs) {
-    case 0x4bdd65a5: // |x| = 0x1.bacb4ap24f
-      return round_result_slightly_down(0x1.1e0696p4f);
-    case 0x4c803f2c: // |x| = 0x1.007e58p26f
-      return round_result_slightly_down(0x1.2b786cp4f);
-    case 0x4f8ffb03: // |x| = 0x1.1ff606p32f
-      return round_result_slightly_up(0x1.6fdd34p4f);
-    case 0x5c569e88: // |x| = 0x1.ad3d1p57f
-      return round_result_slightly_up(0x1.45c146p5f);
-    case 0x5e68984e: // |x| = 0x1.d1309cp61f
-      return round_result_slightly_up(0x1.5c9442p5f);
-    case 0x655890d3: // |x| = 0x1.b121a6p75f
-      return round_result_slightly_down(0x1.a9a3f2p5f);
-    case 0x65de7ca6: // |x| = 0x1.bcf94cp76f
-      return round_result_slightly_up(0x1.af66cp5f);
-    case 0x6eb1a8ec: // |x| = 0x1.6351d8p94f
-      return round_result_slightly_down(0x1.08b512p6f);
-    case 0x7997f30a: // |x| = 0x1.2fe614p116f
-      return round_result_slightly_up(0x1.451436p6f);
-    }
-  } else {
-    // Exceptional cases when x < 2^24.
-    if (LIBC_UNLIKELY(x_abs == 0x45abaf26)) {
-      // |x| = 0x1.575e4cp12f
-      return round_result_slightly_down(0x1.29becap3f);
-    }
-    if (LIBC_UNLIKELY(x_abs == 0x49d29048)) {
-      // |x| = 0x1.a5209p20f
-      return round_result_slightly_down(0x1.e1b92p3f);
-    }
-  }
-#else
-  if (LIBC_UNLIKELY(xbits.is_inf_or_nan()))
-    return x;
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // asinh(x) = log(x + sqrt(x^2 + 1))
-  return static_cast<float>(
-      x_sign * log_eval(fputil::multiply_add(
-                   x_d, x_sign,
-                   fputil::sqrt<double>(fputil::multiply_add(x_d, x_d, 1.0)))));
-}
+LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { return math::asinhf(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asinhf16.cpp b/libc/src/math/generic/asinhf16.cpp
index 0a0b471..d517e63 100644
--- a/libc/src/math/generic/asinhf16.cpp
+++ b/libc/src/math/generic/asinhf16.cpp
@@ -7,102 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/asinhf16.h"
-#include "explogxf.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/asinhf16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr size_t N_EXCEPTS = 8;
-
-static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ASINHF16_EXCEPTS{{
-    // (input, RZ output, RU offset, RD offset, RN offset)
-
-    // x = 0x1.da4p-2, asinhf16(x) = 0x1.ca8p-2 (RZ)
-    {0x3769, 0x372a, 1, 0, 1},
-    // x = 0x1.d6cp-1, asinhf16(x) = 0x1.a58p-1 (RZ)
-    {0x3b5b, 0x3a96, 1, 0, 0},
-    // x = 0x1.c7cp+3, asinhf16(x) = 0x1.accp+1 (RZ)
-    {0x4b1f, 0x42b3, 1, 0, 0},
-    // x = 0x1.26cp+4, asinhf16(x) = 0x1.cd8p+1 (RZ)
-    {0x4c9b, 0x4336, 1, 0, 1},
-    // x = -0x1.da4p-2, asinhf16(x) = -0x1.ca8p-2 (RZ)
-    {0xb769, 0xb72a, 0, 1, 1},
-    // x = -0x1.d6cp-1, asinhf16(x) = -0x1.a58p-1 (RZ)
-    {0xbb5b, 0xba96, 0, 1, 0},
-    // x = -0x1.c7cp+3, asinhf16(x) = -0x1.accp+1 (RZ)
-    {0xcb1f, 0xc2b3, 0, 1, 0},
-    // x = -0x1.26cp+4, asinhf16(x) = -0x1.cd8p+1 (RZ)
-    {0xcc9b, 0xc336, 0, 1, 1},
-}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) {
-  using namespace acoshf_internal;
-  using FPBits = fputil::FPBits<float16>;
-  FPBits xbits(x);
-
-  uint16_t x_u = xbits.uintval();
-  uint16_t x_abs = x_u & 0x7fff;
-
-  if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
-    if (xbits.is_signaling_nan()) {
-      fputil::raise_except_if_required(FE_INVALID);
-      return FPBits::quiet_nan().get_val();
-    }
-
-    return x;
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  // Handle exceptional values
-  if (auto r = ASINHF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  float xf = x;
-  const float SIGN[2] = {1.0f, -1.0f};
-  float x_sign = SIGN[x_u >> 15];
-
-  // |x| <= 0.25
-  if (LIBC_UNLIKELY(x_abs <= 0x3400)) {
-    // when |x| < 0x1.718p-5, asinhf16(x) = x. Adjust by 1 ULP for certain
-    // rounding types.
-    if (LIBC_UNLIKELY(x_abs < 0x29c6)) {
-      int rounding = fputil::quick_get_round();
-      if ((rounding == FE_UPWARD || rounding == FE_TOWARDZERO) && xf < 0)
-        return fputil::cast<float16>(xf + 0x1p-24f);
-      if ((rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) && xf > 0)
-        return fputil::cast<float16>(xf - 0x1p-24f);
-      return fputil::cast<float16>(xf);
-    }
-
-    float x_sq = xf * xf;
-    // Generated by Sollya with:
-    // > P = fpminimax(asinh(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 2^-2]);
-    // The last coefficient 0x1.bd114ep-6f has been changed to 0x1.bd114ep-5f
-    // for better accuracy.
-    float p = fputil::polyeval(x_sq, 1.0f, -0x1.555552p-3f, 0x1.332f6ap-4f,
-                               -0x1.6c53dep-5f, 0x1.bd114ep-5f);
-
-    return fputil::cast<float16>(xf * p);
-  }
-
-  // General case: asinh(x) = ln(x + sqrt(x^2 + 1))
-  float sqrt_term = fputil::sqrt<float>(fputil::multiply_add(xf, xf, 1.0f));
-  return fputil::cast<float16>(
-      x_sign * log_eval(fputil::multiply_add(xf, x_sign, sqrt_term)));
-}
+LLVM_LIBC_FUNCTION(float16, asinhf16, (float16 x)) { return math::asinhf16(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/atan.cpp b/libc/src/math/generic/atan.cpp
index cbca605..93bf2e1 100644
--- a/libc/src/math/generic/atan.cpp
+++ b/libc/src/math/generic/atan.cpp
@@ -7,173 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/atan.h"
-#include "atan_utils.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/atan.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-// To compute atan(x), we divided it into the following cases:
-// * |x| < 2^-26:
-//      Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
-//      return atan(x) = x - sign(x) * epsilon.
-// * 2^-26 <= |x| < 1:
-//      We perform range reduction mod 2^-6 = 1/64 as follow:
-//      Let k = 2^(-6) * round(|x| * 2^6), then
-//        atan(x) = sign(x) * atan(|x|)
-//                = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
-//      We store atan(k) in a look up table, and perform intermediate steps in
-//      double-double.
-// * 1 < |x| < 2^53:
-//      First we perform the transformation y = 1/|x|:
-//        atan(x) = sign(x) * (pi/2 - atan(1/|x|))
-//                = sign(x) * (pi/2 - atan(y)).
-//      Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
-//      previous case:
-//      Let k = 2^(-6) * round(y * 2^6), then
-//        atan(y) = atan(k) + atan((y - k) / (1 + y*k))
-//                = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
-//                = atan(k) + atan((1 - k*|x|) / (|x| + k)).
-// * |x| >= 2^53:
-//      Using the reciprocal transformation:
-//        atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
-//      We have that:
-//        atan(1/|x|) <= 1/|x| <= 2^-53,
-//      which is smaller than ulp(pi/2) / 2.
-//      So we can return:
-//        atan(x) = sign(x) * (pi/2 - epsilon)
-
-LLVM_LIBC_FUNCTION(double, atan, (double x)) {
-  using FPBits = fputil::FPBits<double>;
-
-  constexpr double IS_NEG[2] = {1.0, -1.0};
-  constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
-                                      0x1.921fb54442d18p0};
-  constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
-                                       -0x1.921fb54442d18p0};
-
-  FPBits xbits(x);
-  bool x_sign = xbits.is_neg();
-  xbits = xbits.abs();
-  uint64_t x_abs = xbits.uintval();
-  int x_exp =
-      static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
-
-  // |x| < 1.
-  if (x_exp < 0) {
-    if (LIBC_UNLIKELY(x_exp < -26)) {
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      return x;
-#else
-      if (x == 0.0)
-        return x;
-      // |x| < 2^-26
-      return fputil::multiply_add(-0x1.0p-54, x, x);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-    }
-
-    double x_d = xbits.get_val();
-    // k = 2^-6 * round(2^6 * |x|)
-    double k = fputil::nearest_integer(0x1.0p6 * x_d);
-    unsigned idx = static_cast<unsigned>(k);
-    k *= 0x1.0p-6;
-
-    // numerator = |x| - k
-    DoubleDouble num, den;
-    num.lo = 0.0;
-    num.hi = x_d - k;
-
-    // denominator = 1 - k * |x|
-    den.hi = fputil::multiply_add(x_d, k, 1.0);
-    DoubleDouble prod = fputil::exact_mult(x_d, k);
-    // Using Dekker's 2SUM algorithm to compute the lower part.
-    den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;
-
-    // x_r = (|x| - k) / (1 + k * |x|)
-    DoubleDouble x_r = fputil::div(num, den);
-
-    // Approximating atan(x_r) using Taylor polynomial.
-    DoubleDouble p = atan_eval(x_r);
-
-    // atan(x) = sign(x) * (atan(k) + atan(x_r))
-    //         = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-    return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
-#else
-
-    DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
-    double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
-    double r = IS_NEG[x_sign] * (c0.hi + c1);
-
-    return r;
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  }
-
-  // |x| >= 2^53 or x is NaN.
-  if (LIBC_UNLIKELY(x_exp >= 53)) {
-    // x is nan
-    if (xbits.is_nan()) {
-      if (xbits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-      return x;
-    }
-    // |x| >= 2^53
-    // atan(x) ~ sign(x) * pi/2.
-    if (x_exp >= 53)
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      return IS_NEG[x_sign] * PI_OVER_2.hi;
-#else
-      return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
-                                  IS_NEG[x_sign] * PI_OVER_2.lo);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  }
-
-  double x_d = xbits.get_val();
-  double y = 1.0 / x_d;
-
-  // k = 2^-6 * round(2^6 / |x|)
-  double k = fputil::nearest_integer(0x1.0p6 * y);
-  unsigned idx = static_cast<unsigned>(k);
-  k *= 0x1.0p-6;
-
-  // denominator = |x| + k
-  DoubleDouble den = fputil::exact_add(x_d, k);
-  // numerator = 1 - k * |x|
-  DoubleDouble num;
-  num.hi = fputil::multiply_add(-x_d, k, 1.0);
-  DoubleDouble prod = fputil::exact_mult(x_d, k);
-  // Using Dekker's 2SUM algorithm to compute the lower part.
-  num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;
-
-  // x_r = (1/|x| - k) / (1 - k/|x|)
-  //     = (1 - k * |x|) / (|x| - k)
-  DoubleDouble x_r = fputil::div(num, den);
-
-  // Approximating atan(x_r) using Taylor polynomial.
-  DoubleDouble p = atan_eval(x_r);
-
-  // atan(x) = sign(x) * (pi/2 - atan(1/|x|))
-  //         = sign(x) * (pi/2 - atan(k) - atan(x_r))
-  //         = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
-  return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
-#else
-  DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
-  DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
-  double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
-
-  double r = IS_NEG[!x_sign] * (c1.hi + c2);
-
-  return r;
-#endif
-}
+LLVM_LIBC_FUNCTION(double, atan, (double x)) { return math::atan(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp
index aa770de..58042d3 100644
--- a/libc/src/math/generic/atan2.cpp
+++ b/libc/src/math/generic/atan2.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/atan2.h"
-#include "atan_utils.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/double_double.h"
@@ -15,6 +14,7 @@
 #include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/atan_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -72,6 +72,7 @@ namespace LIBC_NAMESPACE_DECL {
 //   |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73.
 
 LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) {
+  using namespace atan_internal;
   using FPBits = fputil::FPBits<double>;
 
   constexpr double IS_NEG[2] = {1.0, -1.0};
diff --git a/libc/src/math/generic/atan2f128.cpp b/libc/src/math/generic/atan2f128.cpp
index a3aba0b..8838d94 100644
--- a/libc/src/math/generic/atan2f128.cpp
+++ b/libc/src/math/generic/atan2f128.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/atan2f128.h"
-#include "atan_utils.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/dyadic_float.h"
 #include "src/__support/FPUtil/multiply_add.h"
@@ -16,6 +15,7 @@
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 #include "src/__support/macros/properties/types.h"
+#include "src/__support/math/atan_utils.h"
 #include "src/__support/uint128.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -103,6 +103,7 @@ static constexpr Float128 CONST_ADJ[2][2][2] = {
 //   |(atan(u) - P(u)) / P(u)| < 2^-114.
 
 LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) {
+  using namespace atan_internal;
   using FPBits = fputil::FPBits<float128>;
   using Float128 = fputil::DyadicFloat<128>;
 
diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp
index 22f962e..acd32f0 100644
--- a/libc/src/math/generic/atanf.cpp
+++ b/libc/src/math/generic/atanf.cpp
@@ -7,116 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/atanf.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/math/inv_trigf_utils.h"
+#include "src/__support/math/atanf.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
-  using namespace inv_trigf_utils_internal;
-  using FPBits = typename fputil::FPBits<float>;
-
-  constexpr double FINAL_SIGN[2] = {1.0, -1.0};
-  constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
-                                          -0x1.921fb54442d18p0};
-
-  FPBits x_bits(x);
-  Sign sign = x_bits.sign();
-  x_bits.set_sign(Sign::POS);
-  uint32_t x_abs = x_bits.uintval();
-
-  // x is inf or nan, |x| < 2^-4 or |x|= > 16.
-  if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
-    double x_d = static_cast<double>(x);
-    double const_term = 0.0;
-    if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
-      // atan(+-Inf) = +-pi/2.
-      if (x_bits.is_inf()) {
-        volatile double sign_pi_over_2 = SIGNED_PI_OVER_2[sign.is_neg()];
-        return static_cast<float>(sign_pi_over_2);
-      }
-      if (x_bits.is_nan())
-        return x;
-      // x >= 16
-      x_d = -1.0 / x_d;
-      const_term = SIGNED_PI_OVER_2[sign.is_neg()];
-    }
-    // 0 <= x < 1/16;
-    if (LIBC_UNLIKELY(x_bits.is_zero()))
-      return x;
-    // x <= 2^-12;
-    if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
-#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
-      return fputil::multiply_add(x, -0x1.0p-25f, x);
-#else
-      double x_d = static_cast<double>(x);
-      return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
-#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
-    }
-    // Use Taylor polynomial:
-    //   atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
-    constexpr double ATAN_TAYLOR[6] = {
-        0x1.0000000000000p+0,  -0x1.5555555555555p-2, 0x1.999999999999ap-3,
-        -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4,  -0x1.745d1745d1746p-4,
-    };
-    double x2 = x_d * x_d;
-    double x4 = x2 * x2;
-    double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
-    double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
-    double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]);
-    double p = fputil::polyeval(x4, c0, c1, c2);
-    double r = fputil::multiply_add(x_d, p, const_term);
-    return static_cast<float>(r);
-  }
-
-  // Range reduction steps:
-  // 1)  atan(x) = sign(x) * atan(|x|)
-  // 2)  If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
-  // 3)  For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
-  // 4)  Then we use polynomial approximation:
-  //   atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
-  //           = P(x - k/16)
-  double x_d, const_term, final_sign;
-  int idx;
-
-  if (x_abs > 0x3f80'0000U) {
-    // |x| > 1, we need to invert x, so we will perform range reduction in
-    // double precision.
-    x_d = 1.0 / static_cast<double>(x_bits.get_val());
-    double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
-    x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
-    idx = static_cast<int>(k_d);
-    final_sign = FINAL_SIGN[sign.is_pos()];
-    // Adjust constant term of the polynomial by +- pi/2.
-    const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
-                                      SIGNED_PI_OVER_2[sign.is_neg()]);
-  } else {
-    // Exceptional value:
-    if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4
-      return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f)
-                           : fputil::round_result_slightly_up(-0x1.1a6386p-4f);
-    }
-    // Perform range reduction in single precision.
-    float x_f = x_bits.get_val();
-    float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
-    x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
-    x_d = static_cast<double>(x_f);
-    idx = static_cast<int>(k_f);
-    final_sign = FINAL_SIGN[sign.is_neg()];
-    const_term = final_sign * ATAN_COEFFS[idx][0];
-  }
-
-  double p = atan_eval(x_d, idx);
-  double r = fputil::multiply_add(final_sign * x_d, p, const_term);
-
-  return static_cast<float>(r);
-}
+LLVM_LIBC_FUNCTION(float, atanf, (float x)) { return math::atanf(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/fabsbf16.cpp b/libc/src/math/generic/fabsbf16.cpp
new file mode 100644
index 0000000..ea39719
--- /dev/null
+++ b/libc/src/math/generic/fabsbf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fabsbf16 function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fabsbf16.h"
+
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(bfloat16, fabsbf16, (bfloat16 x)) { return fputil::abs(x); }
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index c5db6fa..fe31e6a 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -273,6 +273,40 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  pthread_barrier_init
+  SRCS
+    pthread_barrier_init.cpp
+  HDRS
+    pthread_barrier_init.h
+  DEPENDS
+    libc.src.errno.errno
+    libc.include.pthread
+    libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
+  pthread_barrier_destroy
+  SRCS
+    pthread_barrier_destroy.cpp
+  HDRS
+    pthread_barrier_destroy.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
+  pthread_barrier_wait
+  SRCS
+    pthread_barrier_wait.cpp
+  HDRS
+    pthread_barrier_wait.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.barrier
+)
+
+add_entrypoint_object(
   pthread_mutex_init
   SRCS
     pthread_mutex_init.cpp
diff --git a/libc/src/pthread/pthread_barrier_destroy.cpp b/libc/src/pthread/pthread_barrier_destroy.cpp
new file mode 100644
index 0000000..82de8f2
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_destroy.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of the pthread_barrier_destroy function ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_destroy.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_destroy, (pthread_barrier_t * b)) {
+  return Barrier::destroy(reinterpret_cast<Barrier *>(b));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_destroy.h b/libc/src/pthread/pthread_barrier_destroy.h
new file mode 100644
index 0000000..e27552c
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_destroy.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for pthread_barrier_destroy --------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_destroy(pthread_barrier_t *b);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_DESTROY_H
diff --git a/libc/src/pthread/pthread_barrier_init.cpp b/libc/src/pthread/pthread_barrier_init.cpp
new file mode 100644
index 0000000..2e92238
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_init.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of the pthread_barrier_init function ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_init.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "hdr/types/pthread_barrierattr_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_init,
+                   (pthread_barrier_t * b,
+                    const pthread_barrierattr_t *__restrict attr,
+                    unsigned count)) {
+  return Barrier::init(reinterpret_cast<Barrier *>(b), attr, count);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_init.h b/libc/src/pthread/pthread_barrier_init.h
new file mode 100644
index 0000000..bb17f3f
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_init.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for pthread_barrier_init ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "hdr/types/pthread_barrierattr_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_init(pthread_barrier_t *b,
+                         const pthread_barrierattr_t *__restrict attr,
+                         unsigned count);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_INIT_H
diff --git a/libc/src/pthread/pthread_barrier_wait.cpp b/libc/src/pthread/pthread_barrier_wait.cpp
new file mode 100644
index 0000000..dbd1333
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_wait.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of the pthread_barrier_wait function ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "pthread_barrier_wait.h"
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/threads/linux/barrier.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, pthread_barrier_wait, (pthread_barrier_t * b)) {
+  return reinterpret_cast<Barrier *>(b)->wait();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/pthread/pthread_barrier_wait.h b/libc/src/pthread/pthread_barrier_wait.h
new file mode 100644
index 0000000..16ddc06
--- /dev/null
+++ b/libc/src/pthread/pthread_barrier_wait.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for pthread_barrier_wait ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
+
+#include "hdr/types/pthread_barrier_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int pthread_barrier_wait(pthread_barrier_t *b);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_BARRIER_WAIT_H
diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt
index e879230..548938f 100644
--- a/libc/src/stdio/baremetal/CMakeLists.txt
+++ b/libc/src/stdio/baremetal/CMakeLists.txt
@@ -72,6 +72,7 @@ add_entrypoint_object(
     ../scanf.h
   DEPENDS
     .scanf_internal
+    libc.include.inttypes
     libc.src.stdio.scanf_core.scanf_main
     libc.src.__support.arg_list
     libc.src.__support.OSUtil.osutil
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index dee125c..561180c 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -35,6 +35,7 @@ add_header_library(
     core_structs.h
   DEPENDS
     .scanf_config
+    libc.include.inttypes
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.bitset
     libc.src.__support.FPUtil.fp_bits
@@ -97,6 +98,7 @@ add_header_library(
   DEPENDS
     .reader
     .core_structs
+    libc.include.inttypes
     libc.src.__support.common
     libc.src.__support.ctype_utils
     libc.src.__support.CPP.bitset
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 49f4a1b..9ba0a06 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -185,6 +185,55 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  mbstowcs
+  SRCS
+    mbstowcs.cpp
+  HDRS
+    mbstowcs.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.macros.null_check
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
+  mbsrtowcs
+  SRCS
+    mbsrtowcs.cpp
+  HDRS
+    mbsrtowcs.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
+  mbsnrtowcs
+  SRCS
+    mbsnrtowcs.cpp
+  HDRS
+    mbsnrtowcs.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.mbsnrtowcs
+)
+
+add_entrypoint_object(
   wcstombs
   SRCS
     wcstombs.cpp
diff --git a/libc/src/wchar/mbsnrtowcs.cpp b/libc/src/wchar/mbsnrtowcs.cpp
new file mode 100644
index 0000000..28e0ff3
--- /dev/null
+++ b/libc/src/wchar/mbsnrtowcs.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of mbsnrtowcs --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsnrtowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs,
+                   (wchar_t *__restrict dst, const char **__restrict src,
+                    size_t nmc, size_t len, mbstate_t *__restrict ps)) {
+  static internal::mbstate internal_mbstate;
+  // If destination is null, ignore len
+  len = dst == nullptr ? SIZE_MAX : len;
+  auto ret = internal::mbsnrtowcs(
+      dst, src, nmc, len,
+      ps == nullptr ? &internal_mbstate
+                    : reinterpret_cast<internal::mbstate *>(ps));
+  if (!ret.has_value()) {
+    // Encoding failure
+    libc_errno = ret.error();
+    return -1;
+  }
+  return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsnrtowcs.h b/libc/src/wchar/mbsnrtowcs.h
new file mode 100644
index 0000000..0d66b95
--- /dev/null
+++ b/libc/src/wchar/mbsnrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsnrtowcs ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                  size_t nmc, size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000..82ca25a
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of mbsrtowcs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsrtowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
+                   (wchar_t *__restrict dst, const char **__restrict src,
+                    size_t len, mbstate_t *__restrict ps)) {
+  static internal::mbstate internal_mbstate;
+  // If destination is null, ignore len
+  len = dst == nullptr ? SIZE_MAX : len;
+  auto ret = internal::mbsnrtowcs(
+      dst, src, SIZE_MAX, len,
+      ps == nullptr ? &internal_mbstate
+                    : reinterpret_cast<internal::mbstate *>(ps));
+  if (!ret.has_value()) {
+    // Encoding failure
+    libc_errno = ret.error();
+    return -1;
+  }
+  return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h
new file mode 100644
index 0000000..f8d4cc2
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsrtowcs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                 size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
diff --git a/libc/src/wchar/mbstowcs.cpp b/libc/src/wchar/mbstowcs.cpp
new file mode 100644
index 0000000..43e953c
--- /dev/null
+++ b/libc/src/wchar/mbstowcs.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of mbstowcs ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbstowcs.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbsnrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbstowcs,
+                   (wchar_t *__restrict pwcs, const char *__restrict s,
+                    size_t n)) {
+  LIBC_CRASH_ON_NULLPTR(s);
+  // If destination is null, ignore n
+  n = pwcs == nullptr ? SIZE_MAX : n;
+  static internal::mbstate internal_mbstate;
+  const char *temp = s;
+  auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate);
+
+  if (!ret.has_value()) {
+    // Encoding failure
+    libc_errno = ret.error();
+    return -1;
+  }
+  return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbstowcs.h b/libc/src/wchar/mbstowcs.h
new file mode 100644
index 0000000..7d08a83
--- /dev/null
+++ b/libc/src/wchar/mbstowcs.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for mbstowcs --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h
index e0218c7..55a3cee 100644
--- a/libc/src/wchar/wchar_utils.h
+++ b/libc/src/wchar/wchar_utils.h
@@ -17,13 +17,10 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-// returns true if the character exists in the string
-LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) {
-  for (int n = 0; str[n]; ++n) {
-    if (str[n] == c)
-      return true;
-  }
-  return false;
+LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) {
+  for (; *s && *s != c; ++s)
+    ;
+  return (*s == c) ? s : nullptr;
 }
 
 // bool should be true for wcscspn for complimentary span
@@ -32,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2,
                                  bool not_match_set) {
   size_t i = 0;
   for (; s1[i]; ++i) {
-    bool in_set = wcschr(s1[i], s2);
+    bool in_set = internal::wcschr(s2, s1[i]);
     if (in_set == not_match_set)
       return i;
   }
diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
index defc2ce..8ac4916 100644
--- a/libc/src/wchar/wcschr.cpp
+++ b/libc/src/wchar/wcschr.cpp
@@ -11,15 +11,14 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
-  for (; *s && *s != c; ++s)
-    ;
-  if (*s == c)
-    return s;
-  return nullptr;
+  LIBC_CRASH_ON_NULLPTR(s);
+  return internal::wcschr(s, c);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index a00ba99..f329b73 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -11,17 +11,10 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/null_check.h"
+#include "src/wchar/wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-bool contains_char(const wchar_t *str, wchar_t target) {
-  for (; *str != L'\0'; str++)
-    if (*str == target)
-      return true;
-
-  return false;
-}
-
 LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
                    (const wchar_t *src, const wchar_t *breakset)) {
   LIBC_CRASH_ON_NULLPTR(src);
@@ -29,7 +22,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
 
   // currently O(n * m), can be further optimized to O(n + m) with a hash set
   for (int src_idx = 0; src[src_idx] != 0; src_idx++)
-    if (contains_char(breakset, src[src_idx]))
+    if (internal::wcschr(breakset, src[src_idx]))
       return src + src_idx;
 
   return nullptr;
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 291efc1..ed4f0aa 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -10,18 +10,12 @@
 
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
+#include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
-  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
-    if (wc == *delim_ptr)
-      return true;
-  return false;
-}
-
 LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
-                   (wchar_t *__restrict str, const wchar_t *__restrict delim,
+                   (wchar_t *__restrict str, const wchar_t *__restrict delims,
                     wchar_t **__restrict context)) {
   if (str == nullptr) {
     if (*context == nullptr)
@@ -30,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
     str = *context;
   }
 
-  wchar_t *tok_start, *tok_end;
-  for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
-       ++tok_start)
-    ;
+  wchar_t *tok_start = str;
+  while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
+    ++tok_start;
 
-  for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
-       ++tok_end)
-    ;
+  wchar_t *tok_end = tok_start;
+  while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
+    ++tok_end;
 
   if (*tok_end != L'\0') {
     *tok_end = L'\0';
diff --git a/libc/test/integration/src/pthread/CMakeLists.txt b/libc/test/integration/src/pthread/CMakeLists.txt
index 0bdd99c..251b009 100644
--- a/libc/test/integration/src/pthread/CMakeLists.txt
+++ b/libc/test/integration/src/pthread/CMakeLists.txt
@@ -19,6 +19,23 @@ add_integration_test(
 )
 
 add_integration_test(
+  pthread_barrier_test
+  SUITE
+    libc-pthread-integration-tests
+  SRCS
+    pthread_barrier_test.cpp
+  DEPENDS
+    libc.include.pthread
+    libc.src.errno.errno
+    libc.src.pthread.pthread_barrier_destroy
+    libc.src.pthread.pthread_barrier_wait
+    libc.src.pthread.pthread_barrier_init
+    libc.src.pthread.pthread_create
+    libc.src.pthread.pthread_join
+    libc.src.stdio.printf
+)
+
+add_integration_test(
   pthread_rwlock_test
   SUITE
     libc-pthread-integration-tests
diff --git a/libc/test/integration/src/pthread/pthread_barrier_test.cpp b/libc/test/integration/src/pthread/pthread_barrier_test.cpp
new file mode 100644
index 0000000..c8e1104
--- /dev/null
+++ b/libc/test/integration/src/pthread/pthread_barrier_test.cpp
@@ -0,0 +1,117 @@
+//===-- Tests for pthread_barrier_t ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_barrier_destroy.h"
+#include "src/pthread/pthread_barrier_init.h"
+#include "src/pthread/pthread_barrier_wait.h"
+
+#include "src/__support/CPP/atomic.h"
+#include "src/pthread/pthread_create.h"
+#include "src/pthread/pthread_join.h"
+#include "src/pthread/pthread_mutex_destroy.h"
+#include "src/pthread/pthread_mutex_init.h"
+#include "src/pthread/pthread_mutex_lock.h"
+#include "src/pthread/pthread_mutex_unlock.h"
+#include "src/string/memset.h"
+
+#include "test/IntegrationTest/test.h"
+
+#include <pthread.h>
+
+pthread_barrier_t barrier;
+LIBC_NAMESPACE::cpp::Atomic<int> counter;
+
+void *increment_counter_and_wait(void *args) {
+  counter.fetch_add(1);
+  return reinterpret_cast<void *>(
+      LIBC_NAMESPACE::pthread_barrier_wait(&barrier));
+}
+
+void single_use_barrier_test(int num_threads) {
+  counter.set(0);
+  // create n - 1 ADDITIONAL threads since the current thread will also wait at
+  // the barrier
+  pthread_t threads[num_threads - 1];
+  LIBC_NAMESPACE::memset(&barrier, 0, sizeof(pthread_barrier_t));
+  ASSERT_EQ(
+      LIBC_NAMESPACE::pthread_barrier_init(&barrier, nullptr, num_threads), 0);
+
+  for (int i = 0; i < num_threads - 1; ++i)
+    LIBC_NAMESPACE::pthread_create(&threads[i], nullptr,
+                                   increment_counter_and_wait, nullptr);
+
+  uintptr_t return_val_sum =
+      reinterpret_cast<uintptr_t>(increment_counter_and_wait(nullptr));
+  ASSERT_EQ(counter.load(), num_threads);
+
+  // verify only one thread got the PTHREAD_BARRIER_SERIAL_THREAD return value
+  for (int i = 0; i < num_threads - 1; ++i) {
+    void *ret;
+    LIBC_NAMESPACE::pthread_join(threads[i], &ret);
+    if (reinterpret_cast<uintptr_t>(ret) ==
+        static_cast<uintptr_t>(PTHREAD_BARRIER_SERIAL_THREAD)) {
+      return_val_sum += reinterpret_cast<uintptr_t>(ret);
+    } else {
+      ASSERT_EQ(ret, 0);
+    }
+  }
+  ASSERT_EQ(return_val_sum,
+            static_cast<uintptr_t>(PTHREAD_BARRIER_SERIAL_THREAD));
+
+  LIBC_NAMESPACE::pthread_barrier_destroy(&barrier);
+}
+
+void reused_barrier_test() {
+  counter.set(0);
+  const int NUM_THREADS = 30;
+  const int REPEAT = 20;
+  pthread_t threads[NUM_THREADS - 1]; // subtract 1 for main thread
+  LIBC_NAMESPACE::memset(&barrier, 0, sizeof(pthread_barrier_t));
+  ASSERT_EQ(
+      LIBC_NAMESPACE::pthread_barrier_init(&barrier, nullptr, NUM_THREADS), 0);
+
+  for (int i = 0; i < REPEAT; ++i) {
+    for (int j = 0; j < NUM_THREADS - 1; ++j)
+      LIBC_NAMESPACE::pthread_create(&threads[j], nullptr,
+                                     increment_counter_and_wait, nullptr);
+
+    uintptr_t return_val_sum =
+        reinterpret_cast<uintptr_t>(increment_counter_and_wait(nullptr));
+    ASSERT_EQ(counter.load(), NUM_THREADS * (i + 1));
+
+    // verify only one thread got the PTHREAD_BARRIER_SERIAL_THREAD return value
+    for (int i = 0; i < NUM_THREADS - 1; ++i) {
+      void *ret;
+      LIBC_NAMESPACE::pthread_join(threads[i], &ret);
+      if (reinterpret_cast<uintptr_t>(ret) ==
+          static_cast<uintptr_t>(PTHREAD_BARRIER_SERIAL_THREAD)) {
+        return_val_sum += reinterpret_cast<uintptr_t>(ret);
+      } else {
+        ASSERT_EQ(ret, 0);
+      }
+    }
+    ASSERT_EQ(return_val_sum,
+              static_cast<uintptr_t>(PTHREAD_BARRIER_SERIAL_THREAD));
+  }
+
+  LIBC_NAMESPACE::pthread_barrier_destroy(&barrier);
+}
+
+void *barrier_wait(void *in) {
+  return reinterpret_cast<void *>(
+      LIBC_NAMESPACE::pthread_barrier_wait(&barrier));
+}
+
+TEST_MAIN() {
+  // don't create any additional threads; only use main thread
+  single_use_barrier_test(1);
+
+  single_use_barrier_test(30);
+  reused_barrier_test();
+  return 0;
+}
diff --git a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
index 77479f8..4c540a8 100644
--- a/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
+++ b/libc/test/integration/src/stdlib/gpu/malloc_stress.cpp
@@ -14,6 +14,20 @@
 
 using namespace LIBC_NAMESPACE;
 
+static inline uint32_t entropy() {
+  return (static_cast<uint32_t>(gpu::processor_clock()) ^
+          (gpu::get_thread_id_x() * 0x632be59b) ^
+          (gpu::get_block_id_x() * 0x85157af5)) *
+         0x9e3779bb;
+}
+
+static inline uint32_t xorshift32(uint32_t &state) {
+  state ^= state << 13;
+  state ^= state >> 17;
+  state ^= state << 5;
+  return state * 0x9e3779bb;
+}
+
 static inline void use(uint8_t *ptr, uint32_t size) {
   EXPECT_NE(ptr, nullptr);
   for (int i = 0; i < size; ++i)
@@ -34,5 +48,19 @@ TEST_MAIN(int, char **, char **) {
 
   for (int i = 0; i < 256; ++i)
     free(ptrs[i]);
+
+  uint32_t state = entropy();
+  for (int i = 0; i < 1024; ++i) {
+    if (xorshift32(state) % 2) {
+      uint64_t size = xorshift32(state) % 256 + 16;
+      uint64_t *ptr = reinterpret_cast<uint64_t *>(malloc(size));
+      *ptr = gpu::get_thread_id();
+
+      EXPECT_EQ(*ptr, gpu::get_thread_id());
+      ASSERT_TRUE(ptr);
+      ASSERT_TRUE(__builtin_is_aligned(ptr, 16));
+      free(ptr);
+    }
+  }
   return 0;
 }
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index e5dfac9..6d0601f 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -15,6 +15,11 @@ add_fp_unittest(
     libc.src.__support.math.acospif16
     libc.src.__support.math.asin
     libc.src.__support.math.asinf
+    libc.src.__support.math.asinf16
+    libc.src.__support.math.asinhf
+    libc.src.__support.math.asinhf16
+    libc.src.__support.math.atan
+    libc.src.__support.math.atanf
     libc.src.__support.math.erff
     libc.src.__support.math.exp
     libc.src.__support.math.exp10
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 7881d68..228fa42 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -17,6 +17,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
 
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acoshf16(1.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acospif16(1.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinhf16(0.0f16));
 
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
 
@@ -41,6 +43,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat) {
   EXPECT_FP_EQ(0x1.921fb6p+0, LIBC_NAMESPACE::shared::acosf(0.0f));
   EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f));
   EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinf(0.0f));
+  EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinhf(0.0f));
+  EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanf(0.0f));
   EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f));
   EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f));
   EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f));
@@ -56,6 +60,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat) {
 TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1.921fb54442d18p+0, LIBC_NAMESPACE::shared::acos(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::asin(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::atan(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
 }
diff --git a/libc/test/src/__support/FPUtil/comparison_operations_test.cpp b/libc/test/src/__support/FPUtil/comparison_operations_test.cpp
index 04a3321..05b8f68 100644
--- a/libc/test/src/__support/FPUtil/comparison_operations_test.cpp
+++ b/libc/test/src/__support/FPUtil/comparison_operations_test.cpp
@@ -25,28 +25,15 @@ template <typename T>
 class ComparisonOperationsTest : public LIBC_NAMESPACE::testing::FEnvSafeTest {
   DECLARE_SPECIAL_CONSTANTS(T)
 
-  // TODO: Make these constexpr once quick_get_round is made constexpr.
-  T normal1;
-  T neg_normal1;
-  T normal2;
-  T small;
-  T neg_small;
-  T large;
-  T neg_large;
+  static constexpr T normal1 = T(3.14);
+  static constexpr T neg_normal1 = T(-3.14);
+  static constexpr T normal2 = T(2.71);
+  static constexpr T small = T(0.1);
+  static constexpr T neg_small = T(-0.1);
+  static constexpr T large = T(10000.0);
+  static constexpr T neg_large = T(-10000.0);
 
 public:
-  void SetUp() override {
-    with_fenv_preserved([this]() {
-      normal1 = T(3.14);
-      neg_normal1 = T(-3.14);
-      normal2 = T(2.71);
-      small = T(0.1);
-      neg_small = T(-0.1);
-      large = T(10000.0);
-      neg_large = T(-10000.0);
-    });
-  }
-
   void test_equals() {
     EXPECT_TRUE(equals(neg_zero, neg_zero));
     EXPECT_TRUE(equals(zero, neg_zero));
diff --git a/libc/test/src/math/generic/CMakeLists.txt b/libc/test/src/math/generic/CMakeLists.txt
index 1fe7801..a9d54d6 100644
--- a/libc/test/src/math/generic/CMakeLists.txt
+++ b/libc/test/src/math/generic/CMakeLists.txt
@@ -30,4 +30,3 @@ add_fp_unittest(
   DEPENDS
     libc.src.math.generic.ceill
 )
-
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index ec4c09c..40b7a342 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -221,6 +221,19 @@ add_fp_unittest(
 )
 
 add_fp_unittest(
+  fabsbf16_test
+  SUITE
+  libc-math-smoke-tests
+  SRCS
+    fabsbf16_test.cpp
+  HDRS
+    FAbsTest.h
+  DEPENDS
+    libc.src.__support.FPUtil.bfloat16
+    libc.src.math.fabsbf16
+)
+
+add_fp_unittest(
   fadd_test
   SUITE
     libc-math-smoke-tests
diff --git a/libc/test/src/math/smoke/fabsbf16_test.cpp b/libc/test/src/math/smoke/fabsbf16_test.cpp
new file mode 100644
index 0000000..611050a
--- /dev/null
+++ b/libc/test/src/math/smoke/fabsbf16_test.cpp
@@ -0,0 +1,14 @@
+//===-- Unittests for fabsbf16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FAbsTest.h"
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/math/fabsbf16.h"
+
+LIST_FABS_TESTS(bfloat16, LIBC_NAMESPACE::fabsbf16)
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index fad89dc..d1a0684 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -76,6 +76,19 @@ add_libc_test(
 )
 
 add_libc_test(
+  mbstowcs_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    mbstowcs_test.cpp
+  DEPENDS
+    libc.src.__support.libc_errno
+    libc.src.wchar.mbstowcs
+    libc.hdr.types.wchar_t
+    libc.test.UnitTest.ErrnoCheckingTest
+)
+
+add_libc_test(
   mblen_test
   SUITE
     libc_wchar_unittests
@@ -88,6 +101,22 @@ add_libc_test(
 )
 
 add_libc_test(
+  mbsrtowcs_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    mbsrtowcs_test.cpp
+  DEPENDS
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.string.memset
+    libc.src.wchar.mbsrtowcs
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+    libc.test.UnitTest.ErrnoCheckingTest
+)
+
+add_libc_test(
   mbrlen_test
   SUITE
     libc_wchar_unittests
@@ -97,7 +126,23 @@ add_libc_test(
     libc.src.__support.libc_errno
     libc.src.__support.wchar.mbstate
     libc.src.string.memset
-    libc.src.wchar.mbrlen
+    libc.src.wchar.mbsrlen
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+    libc.test.UnitTest.ErrnoCheckingTest
+)
+
+add_libc_test(
+  mbsnrtowcs_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    mbsnrtowcs_test.cpp
+  DEPENDS
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.string.memset
+    libc.src.wchar.mbsnrtowcs
     libc.hdr.types.mbstate_t
     libc.hdr.types.wchar_t
     libc.test.UnitTest.ErrnoCheckingTest
diff --git a/libc/test/src/wchar/mbsnrtowcs_test.cpp b/libc/test/src/wchar/mbsnrtowcs_test.cpp
new file mode 100644
index 0000000..a3de68f
--- /dev/null
+++ b/libc/test/src/wchar/mbsnrtowcs_test.cpp
@@ -0,0 +1,212 @@
+//===-- Unittests for mbsetowcs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbsnrtowcs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcMBSNRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBSNRToWCSTest, OneByteOneChar) {
+  const char *ch = "A";
+  const char *original = ch;
+  wchar_t dest[2];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &ch, 1, 1, &mb);
+  ASSERT_EQ(static_cast<char>(*dest), 'A');
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Should point to null terminator now
+  ASSERT_EQ(ch, original + 1);
+  ASSERT_ERRNO_SUCCESS();
+
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &ch, 1, 1, &mb);
+  ASSERT_EQ(static_cast<char>(dest[1]), '\0');
+  // Should not include null terminator
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // Should now be a nullptr
+  ASSERT_EQ(ch, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, FourByteOneChar) {
+  const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
+  const char *original = src;
+  wchar_t dest[2];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // Not enough bytes for the full character
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 3, 2, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 0);
+  ASSERT_EQ(src, original + 3);
+  // Needs 2 more bytes (last byte of cat + null terminator)
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 2, 2, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  // Does not include null terminator
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(src, nullptr);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_TRUE(dest[1] == L'\0');
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, MixedNumberOfBytes) {
+  // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
+  const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+
+  // Read 'A'
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<char>(dest[0]), 'A');
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(src, original + 1);
+
+  // Read sigma 'Σ'
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[1]), 931);
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(src, original + 3);
+
+  // Read recycling '♻'
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 0);
+  ASSERT_EQ(src, original + 5);
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(src, original + 6);
+  ASSERT_EQ(static_cast<int>(dest[2]), 9851);
+
+  // Read laughing cat emoji '😹'
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(src, original + 10);
+  ASSERT_EQ(static_cast<int>(dest[3]), 128569);
+
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 4, &src, 4, 4, nullptr);
+  ASSERT_TRUE(dest[4] == L'\0');
+  ASSERT_ERRNO_SUCCESS();
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // Should now be a nullptr
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, ReadLessThanStringLength) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 100, 3, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Should have read 3 emojis
+  ASSERT_EQ(static_cast<int>(n), 3);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[2]), 128569);
+  ASSERT_TRUE(dest[3] == L'd');
+  ASSERT_TRUE(dest[4] == L'e');
+  // Read three laughing cat emojis, 12 bytes
+  ASSERT_EQ(src, original + 12);
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, InvalidFirstByte) {
+  // 0x80 is invalid first byte of mb character
+  const char *src =
+      "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 88, 88, nullptr);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  // Should not update pointer
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, InvalidMiddleByte) {
+  // The 7th byte is invalid for a 4 byte character
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // Successfully read one character and first byte of the second character
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, &mb);
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(src, original + 5);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 5, 88, &mb);
+  // Should return error, set errno, and not update the pointer
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  ASSERT_EQ(src, original + 5);
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, NullDestination) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(nullptr, &src, 88, 88, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Null destination should ignore len and read till end of string
+  ASSERT_EQ(static_cast<int>(n), 4);
+  // It should also not change the src pointer
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSNRToWCSTest, ErrnoChecks) {
+  // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // First two bytes are valid --> should not set errno
+  size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 80, 2, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(src, original + 8);
+
+  // Trying to read the 3rd byte should set errno
+  n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 4, 2, &mb);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  // Should not move the pointer
+  ASSERT_EQ(src, original + 8);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS)
+TEST(LlvmLibcMBSNRToWCSTest, NullptrCrash) {
+  // Passing in a nullptr should crash the program.
+  EXPECT_DEATH(
+      [] { LIBC_NAMESPACE::mbsnrtowcs(nullptr, nullptr, 1, 1, nullptr); },
+      WITH_SIGNAL(-1));
+}
+#endif // LIBC_ADD_NULL_CHECKS
diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp
new file mode 100644
index 0000000..59efc0d
--- /dev/null
+++ b/libc/test/src/wchar/mbsrtowcs_test.cpp
@@ -0,0 +1,185 @@
+//===-- Unittests for mbsetowcs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbsrtowcs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) {
+  const char *ch = "A";
+  const char *original = ch;
+  wchar_t dest[2];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, &mb);
+  ASSERT_EQ(static_cast<char>(*dest), 'A');
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Should point to null terminator now
+  ASSERT_EQ(ch, original + 1);
+  ASSERT_ERRNO_SUCCESS();
+
+  n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, &mb);
+  ASSERT_EQ(static_cast<char>(dest[1]), '\0');
+  // Should not include null terminator
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // Should now be a nullptr
+  ASSERT_EQ(ch, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) {
+  const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
+  wchar_t dest[2];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_TRUE(dest[1] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Should now be a nullptr
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
+  // Two laughing cat emojis "😹😹"
+  const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  mbstate_t mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, &mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_TRUE(dest[2] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 2);
+  // Should now be a nullptr
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) {
+  // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
+  const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 4, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<char>(dest[0]), 'A');
+  ASSERT_EQ(static_cast<int>(dest[1]), 931);
+  ASSERT_EQ(static_cast<int>(dest[2]), 9851);
+  ASSERT_EQ(static_cast<int>(dest[3]), 128569);
+  // Should point to null terminator (byte at 10th index)
+  ASSERT_EQ(src, original + 10);
+  ASSERT_EQ(static_cast<int>(n), 4);
+  n = LIBC_NAMESPACE::mbsrtowcs(dest + 4, &src, 4, nullptr);
+  ASSERT_TRUE(dest[4] == L'\0');
+  ASSERT_ERRNO_SUCCESS();
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // Should now be a nullptr
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Should have read 3 emojis
+  ASSERT_EQ(static_cast<int>(n), 3);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[2]), 128569);
+  ASSERT_TRUE(dest[3] == L'd');
+  ASSERT_TRUE(dest[4] == L'e');
+  // Read three laughing cat emojis, 12 bytes
+  ASSERT_EQ(src, original + 12);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
+  // 0x80 is invalid first byte of mb character
+  const char *src =
+      "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  // Should not update pointer
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
+  // The 7th byte is invalid for a 4 byte character
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
+  // Should return error, set errno, and not update the pointer
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 2, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Null destination should ignore len and read till end of string
+  ASSERT_EQ(static_cast<int>(n), 4);
+  // It should also not change the src pointer
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) {
+  // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  // First two bytes are valid --> should not set errno
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(src, original + 8);
+
+  // Trying to read the 3rd byte should set errno
+  n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  // Should not move the pointer
+  ASSERT_EQ(src, original + 8);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS)
+TEST(LlvmLibcMBSRToWCSTest, NullptrCrash) {
+  // Passing in a nullptr should crash the program.
+  EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
+               WITH_SIGNAL(-1));
+}
+#endif // LIBC_ADD_NULL_CHECKS
diff --git a/libc/test/src/wchar/mbstowcs_test.cpp b/libc/test/src/wchar/mbstowcs_test.cpp
new file mode 100644
index 0000000..f0396e0
--- /dev/null
+++ b/libc/test/src/wchar/mbstowcs_test.cpp
@@ -0,0 +1,171 @@
+//===-- Unittests for mbstowcs --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/null_check.h"
+#include "src/wchar/mbstowcs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) {
+  const char *ch = "A";
+  const char *original = ch;
+  wchar_t dest[2];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1);
+  ASSERT_EQ(static_cast<char>(*dest), 'A');
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(ch, original);
+  ASSERT_ERRNO_SUCCESS();
+
+  n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1);
+  ASSERT_EQ(static_cast<char>(dest[1]), '\0');
+  // Should not include null terminator
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(ch, original);
+  ASSERT_ERRNO_SUCCESS();
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) {
+  const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
+  const char *original = src;
+  wchar_t dest[2];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_TRUE(dest[1] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) {
+  // Two laughing cat emojis "😹😹"
+  const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_TRUE(dest[2] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 2);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) {
+  // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
+  const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<char>(dest[0]), 'A');
+  ASSERT_EQ(static_cast<int>(dest[1]), 931);
+  ASSERT_EQ(static_cast<int>(dest[2]), 9851);
+  ASSERT_EQ(static_cast<int>(dest[3]), 128569);
+  ASSERT_TRUE(dest[4] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 4);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
+  ASSERT_ERRNO_SUCCESS();
+  // Should have read 3 emojis
+  ASSERT_EQ(static_cast<int>(n), 3);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[2]), 128569);
+  ASSERT_TRUE(dest[3] == L'd');
+  ASSERT_TRUE(dest[4] == L'e');
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) {
+  // 0x80 is invalid first byte of mb character
+  const char *src =
+      "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) {
+  // The 7th byte is invalid for a 4 byte character
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, NullDestination) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2);
+  ASSERT_ERRNO_SUCCESS();
+  // Null destination should ignore len and read till end of string
+  ASSERT_EQ(static_cast<int>(n), 4);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) {
+  // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
+  const char *original = src;
+  wchar_t dest[5];
+  // First two bytes are valid --> should not set errno
+  size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+  // Trying to read the 3rd byte should set errno
+  n = LIBC_NAMESPACE::mbstowcs(dest, src + 2, 2);
+  ASSERT_ERRNO_EQ(EILSEQ);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  // Making sure the pointer is not getting updated
+  ASSERT_EQ(src, original);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS)
+TEST(LlvmLibcMBSToWCSTest, NullptrCrash) {
+  // Passing in a nullptr should crash the program.
+  EXPECT_DEATH([] { LIBC_NAMESPACE::mbstowcs(nullptr, nullptr, 1); },
+               WITH_SIGNAL(-1));
+}
+#endif // LIBC_ADD_NULL_CHECKS