21 files changed, 795 insertions, 360 deletions
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 94f8b95..6e447fc 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -7,6 +7,7 @@ add_header_library(
     libc.hdr.fenv_macros
     libc.hdr.math_macros
     libc.hdr.stdint_proxy
+    libc.src.__support.CPP.type_traits
     libc.src.__support.macros.attributes
     libc.src.errno.errno
 )
diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h
index 7691088..7bd5643 100644
--- a/libc/src/__support/FPUtil/FEnvImpl.h
+++ b/libc/src/__support/FPUtil/FEnvImpl.h
@@ -12,6 +12,7 @@
 #include "hdr/fenv_macros.h"
 #include "hdr/math_macros.h"
 #include "hdr/types/fenv_t.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/libc_errno.h"
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
@@ -72,40 +73,58 @@ LIBC_INLINE int set_env(const fenv_t *) { return 0; }
 namespace LIBC_NAMESPACE_DECL {
 namespace fputil {
 
-LIBC_INLINE static int clear_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+clear_except_if_required([[maybe_unused]] int excepts) {
+  if (cpp::is_constant_evaluated()) {
+    return 0;
+  } else {
 #ifndef LIBC_MATH_HAS_NO_EXCEPT
-  if (math_errhandling & MATH_ERREXCEPT)
-    return clear_except(excepts);
+    if (math_errhandling & MATH_ERREXCEPT)
+      return clear_except(excepts);
 #endif // LIBC_MATH_HAS_NO_EXCEPT
-  return 0;
+    return 0;
+  }
 }
 
-LIBC_INLINE static int set_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+set_except_if_required([[maybe_unused]] int excepts) {
+  if (cpp::is_constant_evaluated()) {
+    return 0;
+  } else {
 #ifndef LIBC_MATH_HAS_NO_EXCEPT
-  if (math_errhandling & MATH_ERREXCEPT)
-    return set_except(excepts);
+    if (math_errhandling & MATH_ERREXCEPT)
+      return set_except(excepts);
 #endif // LIBC_MATH_HAS_NO_EXCEPT
-  return 0;
+    return 0;
+  }
 }
 
-LIBC_INLINE static int raise_except_if_required([[maybe_unused]] int excepts) {
+LIBC_INLINE static constexpr int
+raise_except_if_required([[maybe_unused]] int excepts) {
+  if (cpp::is_constant_evaluated()) {
+    return 0;
+  } else {
 #ifndef LIBC_MATH_HAS_NO_EXCEPT
-  if (math_errhandling & MATH_ERREXCEPT)
+    if (math_errhandling & MATH_ERREXCEPT)
 #ifdef LIBC_TARGET_ARCH_IS_X86_64
-    return raise_except</*SKIP_X87_FPU*/ true>(excepts);
+      return raise_except</*SKIP_X87_FPU*/ true>(excepts);
 #else  // !LIBC_TARGET_ARCH_IS_X86
-    return raise_except(excepts);
+      return raise_except(excepts);
 #endif // LIBC_TARGET_ARCH_IS_X86
 
 #endif // LIBC_MATH_HAS_NO_EXCEPT
-  return 0;
+    return 0;
+  }
 }
 
-LIBC_INLINE static void set_errno_if_required([[maybe_unused]] int err) {
+LIBC_INLINE static constexpr void
+set_errno_if_required([[maybe_unused]] int err) {
+  if (!cpp::is_constant_evaluated()) {
 #ifndef LIBC_MATH_HAS_NO_ERRNO
-  if (math_errhandling & MATH_ERRNO)
-    libc_errno = err;
+    if (math_errhandling & MATH_ERRNO)
+      libc_errno = err;
 #endif // LIBC_MATH_HAS_NO_ERRNO
+  }
 }
 
 } // namespace fputil
diff --git a/libc/src/__support/FPUtil/generic/add_sub.h b/libc/src/__support/FPUtil/generic/add_sub.h
index fda7029..7205d8d 100644
--- a/libc/src/__support/FPUtil/generic/add_sub.h
+++ b/libc/src/__support/FPUtil/generic/add_sub.h
@@ -153,8 +153,10 @@ add_or_sub(InType x, InType y) {
 
     result_mant <<= GUARD_BITS_LEN;
   } else {
-    InStorageType max_mant = max_bits.get_explicit_mantissa() << GUARD_BITS_LEN;
-    InStorageType min_mant = min_bits.get_explicit_mantissa() << GUARD_BITS_LEN;
+    InStorageType max_mant = static_cast<InStorageType>(
+        max_bits.get_explicit_mantissa() << GUARD_BITS_LEN);
+    InStorageType min_mant = static_cast<InStorageType>(
+        min_bits.get_explicit_mantissa() << GUARD_BITS_LEN);
 
     int alignment = (max_bits.get_biased_exponent() - max_bits.is_normal()) -
                     (min_bits.get_biased_exponent() - min_bits.is_normal());
@@ -172,7 +174,8 @@ add_or_sub(InType x, InType y) {
           (static_cast<InStorageType>(
               min_mant << (InFPBits::STORAGE_LEN - alignment))) != 0;
 
-    InStorageType min_mant_sticky(static_cast<int>(aligned_min_mant_sticky));
+    InStorageType min_mant_sticky =
+        static_cast<InStorageType>(static_cast<int>(aligned_min_mant_sticky));
 
     if (is_effectively_add)
       result_mant = max_mant + (aligned_min_mant | min_mant_sticky);
diff --git a/libc/src/__support/macros/CMakeLists.txt b/libc/src/__support/macros/CMakeLists.txt
index 99d4f64..a639d3e 100644
--- a/libc/src/__support/macros/CMakeLists.txt
+++ b/libc/src/__support/macros/CMakeLists.txt
@@ -35,5 +35,4 @@ add_header_library(
   DEPENDS
     .config
     .optimization
-    .sanitizer
 )
diff --git a/libc/src/__support/macros/null_check.h b/libc/src/__support/macros/null_check.h
index abf65c56..438ba69 100644
--- a/libc/src/__support/macros/null_check.h
+++ b/libc/src/__support/macros/null_check.h
@@ -11,9 +11,8 @@
 
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h"
-#include "src/__support/macros/sanitizer.h"
 
-#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
+#if defined(LIBC_ADD_NULL_CHECKS)
 #define LIBC_CRASH_ON_NULLPTR(ptr)                                             \
   do {                                                                         \
     if (LIBC_UNLIKELY((ptr) == nullptr))                                       \
diff --git a/libc/src/__support/macros/sanitizer.h b/libc/src/__support/macros/sanitizer.h
index c20412e..84268a1 100644
--- a/libc/src/__support/macros/sanitizer.h
+++ b/libc/src/__support/macros/sanitizer.h
@@ -23,16 +23,6 @@
 #define LIBC_HAS_MEMORY_SANITIZER
 #endif
 
-#if LIBC_HAS_FEATURE(undefined_behavior_sanitizer)
-#define LIBC_HAS_UNDEFINED_BEHAVIOR_SANITIZER
-#endif
-
-#if defined(LIBC_HAS_ADDRESS_SANITIZER) ||                                     \
-    defined(LIBC_HAS_MEMORY_SANITIZER) ||                                      \
-    defined(LIBC_HAS_UNDEFINED_BEHAVIOR_SANITIZER)
-#define LIBC_HAS_SANITIZER
-#endif
-
 #ifdef LIBC_HAS_MEMORY_SANITIZER
 // Only perform MSAN unpoison in non-constexpr context.
 #include <sanitizer/msan_interface.h>
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 926bbd5..b096c61 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -4,7 +4,6 @@ add_header_library(
     acos.h
   DEPENDS
     .asin_utils
-    libc.src.__support.math.asin_utils
     libc.src.__support.FPUtil.double_double
     libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
@@ -96,6 +95,21 @@ add_header_library(
 )
 
 add_header_library(
+  acospif16
+  HDRS
+    acospif16.h
+  DEPENDS
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.types
+)
+
+add_header_library(
   asin_utils
   HDRS
     asin_utils.h
@@ -110,6 +124,39 @@ add_header_library(
 )
 
 add_header_library(
+  asin
+  HDRS
+    asin.h
+  DEPENDS
+    .asin_utils
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
+add_header_library(
+  asinf
+  HDRS
+    asinf.h
+  DEPENDS
+    .inv_trigf_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.config
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
+add_header_library(
   erff
   HDRS
     erff.h
@@ -358,11 +405,10 @@ add_header_library(
   DEPENDS
     .exp10f16_utils
     libc.src.__support.FPUtil.fp_bits
-    src.__support.FPUtil.FEnvImpl
-    src.__support.FPUtil.FPBits
-    src.__support.FPUtil.cast
-    src.__support.FPUtil.rounding_mode
-    src.__support.FPUtil.except_value_utils
-    src.__support.macros.optimization
-    src.__support.macros.properties.cpu_features
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
 )
diff --git a/libc/src/__support/math/acoshf16.h b/libc/src/__support/math/acoshf16.h
index 15e7f6a..a02b7b6 100644
--- a/libc/src/__support/math/acoshf16.h
+++ b/libc/src/__support/math/acoshf16.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
-#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
 
 #include "include/llvm-libc-macros/float16-macros.h"
 
@@ -120,4 +120,4 @@ static constexpr float16 acoshf16(float16 x) {
 
 #endif // LIBC_TYPES_HAS_FLOAT16
 
-#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF_H
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSHF16_H
diff --git a/libc/src/__support/math/acospif16.h b/libc/src/__support/math/acospif16.h
new file mode 100644
index 0000000..5829aed
--- /dev/null
+++ b/libc/src/__support/math/acospif16.h
@@ -0,0 +1,147 @@
+//===-- Implementation header for acospif16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+static constexpr float16 acospif16(float16 x) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  uint16_t x_sign = x_u >> 15;
+
+  // |x| > 0x1p0, |x| > 1, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
+    // acospif16(NaN) = NaN
+    if (xbits.is_nan()) {
+      if (xbits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // 1 < |x| <= +inf
+    fputil::raise_except_if_required(FE_INVALID);
+    fputil::set_errno_if_required(EDOM);
+
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // |x| == 0x1p0, x is 1 or -1
+  // if x is (-)1, return 1
+  // if x is (+)1, return 0
+  if (LIBC_UNLIKELY(x_abs == 0x3c00))
+    return fputil::cast<float16>(x_sign ? 1.0f : 0.0f);
+
+  float xf = x;
+  float xsq = xf * xf;
+
+  // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya
+  // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0,
+  // 0.5]);
+  constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f,
+                                    0x1.897e36p-6f, 0x1.9efafcp-7f,
+                                    0x1.06d884p-6f};
+  // |x| <= 0x1p-1, |x| <= 0.5
+  if (x_abs <= 0x3800) {
+    // if x is 0, return 0.5
+    if (LIBC_UNLIKELY(x_abs == 0))
+      return fputil::cast<float16>(0.5f);
+
+    // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then
+    //            acospi(x) = 0.5 - asin(x)/pi
+    float interm =
+        fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2],
+                         POLY_COEFFS[3], POLY_COEFFS[4]);
+
+    return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f));
+  }
+
+  // When |x| > 0.5, assume that 0.5 < |x| <= 1
+  //
+  // Step-by-step range-reduction proof:
+  // 1:  Let y = asin(x), such that, x = sin(y)
+  // 2:  From complimentary angle identity:
+  //       x = sin(y) = cos(pi/2 - y)
+  // 3:  Let z = pi/2 - y, such that x = cos(z)
+  // 4:  From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
+  //       z = 2A, z/2 = A
+  //       cos(z) = 1 - 2 * sin^2(z/2)
+  // 5:  Make sin(z/2) subject of the formula:
+  //       sin(z/2) = sqrt((1 - cos(z))/2)
+  // 6:  Recall [3]; x = cos(z). Therefore:
+  //       sin(z/2) = sqrt((1 - x)/2)
+  // 7:  Let u = (1 - x)/2
+  // 8:  Therefore:
+  //       asin(sqrt(u)) = z/2
+  //       2 * asin(sqrt(u)) = z
+  // 9:  Recall [3]; z = pi/2 - y. Therefore:
+  //       y = pi/2 - z
+  //       y = pi/2 - 2 * asin(sqrt(u))
+  // 10: Recall [1], y = asin(x). Therefore:
+  //       asin(x) = pi/2 - 2 * asin(sqrt(u))
+  // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
+  //     Therefore:
+  //       acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
+  //       acos(x) = 2 * asin(sqrt(u))
+  //       acospi(x) = 2 * (asin(sqrt(u)) / pi)
+  //
+  // THE RANGE REDUCTION, HOW?
+  // 12: Recall [7], u = (1 - x)/2
+  // 13: Since 0.5 < x <= 1, therefore:
+  //       0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
+  //
+  // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
+  // Step [11] as `sqrt(u)` is in range.
+  // When -1 < x <= -0.5, the identity:
+  //       acos(x) = pi - acos(-x)
+  //       acospi(x) = 1 - acos(-x)/pi
+  // allows us to compute for the negative x value (lhs)
+  // with a positive x value instead (rhs).
+
+  float xf_abs = (xf < 0 ? -xf : xf);
+  float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
+  float sqrt_u = fputil::sqrt<float>(u);
+
+  float asin_sqrt_u =
+      sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1],
+                                POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]);
+
+  // Same as acos(x), but devided the expression with pi
+  return fputil::cast<float16>(
+      x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f)
+             : 2.0f * asin_sqrt_u);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF16_H
diff --git a/libc/src/__support/math/asin.h b/libc/src/__support/math/asin.h
new file mode 100644
index 0000000..84fc1cf
--- /dev/null
+++ b/libc/src/__support/math/asin.h
@@ -0,0 +1,297 @@
+//===-- Implementation header for asin --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
+
+#include "asin_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+#include "src/__support/math/asin_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+static constexpr double asin(double x) {
+  using namespace asin_internal;
+  using FPBits = fputil::FPBits<double>;
+
+  FPBits xbits(x);
+  int x_exp = xbits.get_biased_exponent();
+
+  // |x| < 0.5.
+  if (x_exp < FPBits::EXP_BIAS - 1) {
+    // |x| < 2^-26.
+    if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) {
+      // When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
+      // is:
+      //   |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+      //                             = x^2 / 6
+      //                             < 2^-54
+      //                             < epsilon(1)/2.
+      // So the correctly rounded values of asin(x) are:
+      //   = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+      //                        or (rounding mode = FE_UPWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, 2^-54, x) instead.
+      // Note: to use the formula x + 2^-54*x to decide the correct rounding, we
+      // do need fma(x, 2^-54, x) to prevent underflow caused by 2^-54*x when
+      // |x| < 2^-1022. For targets without FMA instructions, when x is close to
+      // denormal range, we normalize x,
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS)
+      return x;
+#elif defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+      return fputil::multiply_add(x, 0x1.0p-54, x);
+#else
+      if (xbits.abs().uintval() == 0)
+        return x;
+      // Get sign(x) * min_normal.
+      FPBits eps_bits = FPBits::min_normal();
+      eps_bits.set_sign(xbits.sign());
+      double eps = eps_bits.get_val();
+      double normalize_const = (x_exp == 0) ? eps : 0.0;
+      double scaled_normal =
+          fputil::multiply_add(x + normalize_const, 0x1.0p54, eps);
+      return fputil::multiply_add(scaled_normal, 0x1.0p-54, -normalize_const);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    }
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    return x * asin_eval(x * x);
+#else
+    using Float128 = fputil::DyadicFloat<128>;
+    using DoubleDouble = fputil::DoubleDouble;
+
+    unsigned idx = 0;
+    DoubleDouble x_sq = fputil::exact_mult(x, x);
+    double err = xbits.abs().get_val() * 0x1.0p-51;
+    // Polynomial approximation:
+    //   p ~ asin(x)/x
+
+    DoubleDouble p = asin_eval(x_sq, idx, err);
+    // asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
+    DoubleDouble r0 = fputil::exact_mult(x, p.hi);
+    double r_lo = fputil::multiply_add(x, p.lo, r0.lo);
+
+    // Ziv's accuracy test.
+
+    double r_upper = r0.hi + (r_lo + err);
+    double r_lower = r0.hi + (r_lo - err);
+
+    if (LIBC_LIKELY(r_upper == r_lower))
+      return r_upper;
+
+    // Ziv's accuracy test failed, perform 128-bit calculation.
+
+    // Recalculate mod 1/64.
+    idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6));
+
+    // Get x^2 - idx/64 exactly.  When FMA is available, double-double
+    // multiplication will be correct for all rounding modes.  Otherwise we use
+    // Float128 directly.
+    Float128 x_f128(x);
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+    // u = x^2 - idx/64
+    Float128 u_hi(
+        fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
+    Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo));
+#else
+    Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128);
+    Float128 u = fputil::quick_add(
+        x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6)));
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+    Float128 p_f128 = asin_eval(u, idx);
+    Float128 r = fputil::quick_mul(x_f128, p_f128);
+
+    return static_cast<double>(r);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  }
+  // |x| >= 0.5
+
+  double x_abs = xbits.abs().get_val();
+
+  // Maintaining the sign:
+  constexpr double SIGN[2] = {1.0, -1.0};
+  double x_sign = SIGN[xbits.is_neg()];
+
+  // |x| >= 1
+  if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
+    // x = +-1, asin(x) = +- pi/2
+    if (x_abs == 1.0) {
+      // return +- pi/2
+      return fputil::multiply_add(x_sign, PI_OVER_TWO.hi,
+                                  x_sign * PI_OVER_TWO.lo);
+    }
+    // |x| > 1, return NaN.
+    if (xbits.is_quiet_nan())
+      return x;
+
+    // Set domain error for non-NaN input.
+    if (!xbits.is_nan())
+      fputil::set_errno_if_required(EDOM);
+
+    fputil::raise_except_if_required(FE_INVALID);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // When |x| >= 0.5, we perform range reduction as follow:
+  //
+  // Assume further that 0.5 <= x < 1, and let:
+  //   y = asin(x)
+  // We will use the double angle formula:
+  //   cos(2y) = 1 - 2 sin^2(y)
+  // and the complement angle identity:
+  //   x = sin(y) = cos(pi/2 - y)
+  //              = 1 - 2 sin^2 (pi/4 - y/2)
+  // So:
+  //   sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+  // And hence:
+  //   pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+  // Equivalently:
+  //   asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+  // Let u = (1 - x)/2, then:
+  //   asin(x) = pi/2 - 2 * asin( sqrt(u) )
+  // Moreover, since 0.5 <= x < 1:
+  //   0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
+  // And hence we can reuse the same polynomial approximation of asin(x) when
+  // |x| <= 0.5:
+  //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+  // u = (1 - |x|)/2
+  double u = fputil::multiply_add(x_abs, -0.5, 0.5);
+  // v_hi + v_lo ~ sqrt(u).
+  // Let:
+  //   h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
+  // Then:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //           ~ v_hi + h / (2 * v_hi)
+  // So we can use:
+  //   v_lo = h / (2 * v_hi).
+  // Then,
+  //   asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
+  double v_hi = fputil::sqrt<double>(u);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  double p = asin_eval(u);
+  double r = x_sign * fputil::multiply_add(-2.0 * v_hi, p, PI_OVER_TWO.hi);
+  return r;
+#else
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double h = fputil::multiply_add(v_hi, -v_hi, u);
+#else
+  DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi);
+  double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+  // Scale v_lo and v_hi by 2 from the formula:
+  //   vh = v_hi * 2
+  //   vl = 2*v_lo = h / v_hi.
+  double vh = v_hi * 2.0;
+  double vl = h / v_hi;
+
+  // Polynomial approximation:
+  //   p ~ asin(sqrt(u))/sqrt(u)
+  unsigned idx = 0;
+  double err = vh * 0x1.0p-51;
+
+  DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err);
+
+  // Perform computations in double-double arithmetic:
+  //   asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
+  DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p);
+  DoubleDouble r = fputil::exact_add(PI_OVER_TWO.hi, -r0.hi);
+
+  double r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
+
+  // Ziv's accuracy test.
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double r_upper = fputil::multiply_add(
+      r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, err));
+  double r_lower = fputil::multiply_add(
+      r.hi, x_sign, fputil::multiply_add(r_lo, x_sign, -err));
+#else
+  r_lo *= x_sign;
+  r.hi *= x_sign;
+  double r_upper = r.hi + (r_lo + err);
+  double r_lower = r.hi + (r_lo - err);
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+  if (LIBC_LIKELY(r_upper == r_lower))
+    return r_upper;
+
+  // Ziv's accuracy test failed, we redo the computations in Float128.
+  // Recalculate mod 1/64.
+  idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6));
+
+  // After the first step of Newton-Raphson approximating v = sqrt(u), we have
+  // that:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //      v_lo = h / (2 * v_hi)
+  // With error:
+  //   sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
+  //                           = -h^2 / (2*v * (sqrt(u) + v)^2).
+  // Since:
+  //   (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
+  // we can add another correction term to (v_hi + v_lo) that is:
+  //   v_ll = -h^2 / (2*v_hi * 4u)
+  //        = -v_lo * (h / 4u)
+  //        = -vl * (h / 8u),
+  // making the errors:
+  //   sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
+  // well beyond 128-bit precision needed.
+
+  // Get the rounding error of vl = 2 * v_lo ~ h / vh
+  // Get full product of vh * vl
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
+#else
+  DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl);
+  double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  // vll = 2*v_ll = -vl * (h / (4u)).
+  double t = h * (-0.25) / u;
+  double vll = fputil::multiply_add(vl, t, vl_lo);
+  // m_v = -(v_hi + v_lo + v_ll).
+  Float128 m_v = fputil::quick_add(
+      Float128(vh), fputil::quick_add(Float128(vl), Float128(vll)));
+  m_v.sign = Sign::NEG;
+
+  // Perform computations in Float128:
+  //   asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
+  Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u));
+
+  Float128 p_f128 = asin_eval(y_f128, idx);
+  Float128 r0_f128 = fputil::quick_mul(m_v, p_f128);
+  Float128 r_f128 = fputil::quick_add(PI_OVER_TWO_F128, r0_f128);
+
+  if (xbits.is_neg())
+    r_f128.sign = Sign::NEG;
+
+  return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASIN_H
diff --git a/libc/src/__support/math/asinf.h b/libc/src/__support/math/asinf.h
new file mode 100644
index 0000000..bfa0dc3
--- /dev/null
+++ b/libc/src/__support/math/asinf.h
@@ -0,0 +1,175 @@
+//===-- Implementation header for asinf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float asinf(float x) {
+  using namespace inv_trigf_utils_internal;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr size_t N_EXCEPTS = 2;
+
+  // Exceptional values when |x| <= 0.5
+  constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
+      // (inputs, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
+      {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
+      // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
+      {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
+  }};
+
+  // Exceptional values when 0.5 < |x| <= 1
+  constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
+      // (inputs, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
+      {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
+      // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
+      {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
+  }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using namespace inv_trigf_utils_internal;
+  using FPBits = typename fputil::FPBits<float>;
+
+  FPBits xbits(x);
+  uint32_t x_uint = xbits.uintval();
+  uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
+  constexpr double SIGN[2] = {1.0, -1.0};
+  uint32_t x_sign = x_uint >> 31;
+
+  // |x| <= 0.5-ish
+  if (x_abs < 0x3f04'471dU) {
+    // |x| < 0x1.d12edp-12
+    if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) {
+      // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x
+      // is:
+      //   |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+      //                             = x^2 / 6
+      //                             < 2^-25
+      //                             < epsilon(1)/2.
+      // So the correctly rounded values of asin(x) are:
+      //   = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+      //                        or (rounding mode = FE_UPWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, 2^-25, x) instead.
+      // An exhaustive test shows that this formula work correctly for all
+      // rounding modes up to |x| < 0x1.d12edp-12.
+      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
+      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
+      // |x| < 2^-125. For targets without FMA instructions, we simply use
+      // double for intermediate results as it is more efficient than using an
+      // emulated version of FMA.
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+      return fputil::multiply_add(x, 0x1.0p-25f, x);
+#else
+      double xd = static_cast<double>(x);
+      return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+    }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    // Check for exceptional values
+    if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
+        LIBC_UNLIKELY(r.has_value()))
+      return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+    // For |x| <= 0.5, we approximate asinf(x) by:
+    //   asin(x) = x * P(x^2)
+    // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+    // asin(x)/x on [0, 0.5] generated by Sollya with:
+    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+    //                 [|1, D...|], [0, 0.5]);
+    // An exhaustive test shows that this approximation works well up to a
+    // little more than 0.5.
+    double xd = static_cast<double>(x);
+    double xsq = xd * xd;
+    double x3 = xd * xsq;
+    double r = asin_eval(xsq);
+    return static_cast<float>(fputil::multiply_add(x3, r, xd));
+  }
+
+  // |x| > 1, return NaNs.
+  if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) {
+    if (xbits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+
+    if (x_abs <= 0x7f80'0000U) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+    }
+
+    return FPBits::quiet_nan().get_val();
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // Check for exceptional values
+  if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
+      LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // When |x| > 0.5, we perform range reduction as follow:
+  //
+  // Assume further that 0.5 < x <= 1, and let:
+  //   y = asin(x)
+  // We will use the double angle formula:
+  //   cos(2y) = 1 - 2 sin^2(y)
+  // and the complement angle identity:
+  //   x = sin(y) = cos(pi/2 - y)
+  //              = 1 - 2 sin^2 (pi/4 - y/2)
+  // So:
+  //   sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+  // And hence:
+  //   pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+  // Equivalently:
+  //   asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+  // Let u = (1 - x)/2, then:
+  //   asin(x) = pi/2 - 2 * asin( sqrt(u) )
+  // Moreover, since 0.5 < x <= 1:
+  //   0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
+  // And hence we can reuse the same polynomial approximation of asin(x) when
+  // |x| <= 0.5:
+  //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+  xbits.set_sign(Sign::POS);
+  double sign = SIGN[x_sign];
+  double xd = static_cast<double>(xbits.get_val());
+  double u = fputil::multiply_add(-0.5, xd, 0.5);
+  double c1 = sign * (-2 * fputil::sqrt<double>(u));
+  double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
+  double c3 = c1 * u;
+
+  double r = asin_eval(u);
+  return static_cast<float>(fputil::multiply_add(c3, r, c2));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ASINF_H
diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h
index ff59ff7..14f0592 100644
--- a/libc/src/__support/math/exp.h
+++ b/libc/src/__support/math/exp.h
@@ -234,7 +234,7 @@ static double set_exceptional(double x) {
 
 namespace math {
 
-static double exp(double x) {
+LIBC_INLINE static double exp(double x) {
   using FPBits = typename fputil::FPBits<double>;
   FPBits xbits(x);
 
diff --git a/libc/src/__support/math/exp10f16_utils.h b/libc/src/__support/math/exp10f16_utils.h
index bffb81b..5952a41 100644
--- a/libc/src/__support/math/exp10f16_utils.h
+++ b/libc/src/__support/math/exp10f16_utils.h
@@ -19,8 +19,7 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LIBC_INLINE static constexpr ExpRangeReduction
-exp10_range_reduction(float16 x) {
+LIBC_INLINE static ExpRangeReduction exp10_range_reduction(float16 x) {
   // For -8 < x < 5, to compute 10^x, we perform the following range reduction:
   // find hi, mid, lo, such that:
   //   x = (hi + mid) * log2(10) + lo, in which
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index a001d99..ecf0967 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3958,13 +3958,7 @@ add_entrypoint_object(
   HDRS
     ../asinf.h
   DEPENDS
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
-    libc.src.__support.math.inv_trigf_utils
+    libc.src.__support.math.asinf
 )
 
 add_entrypoint_object(
@@ -3993,16 +3987,7 @@ add_entrypoint_object(
   HDRS
     ../asin.h
   DEPENDS
-    libc.src.__support.math.asin_utils
-    libc.src.__support.FPUtil.double_double
-    libc.src.__support.FPUtil.dyadic_float
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.cpu_features
+    libc.src.__support.math.asin
 )
 
 add_entrypoint_object(
@@ -4043,16 +4028,8 @@ add_entrypoint_object(
   HDRS
     ../acospif16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.types
+    libc.src.__support.math.acospif16
+    libc.src.errno.errno
 )
 
 add_header_library(
diff --git a/libc/src/math/generic/acospif16.cpp b/libc/src/math/generic/acospif16.cpp
index bfdf169..09cbd99 100644
--- a/libc/src/math/generic/acospif16.cpp
+++ b/libc/src/math/generic/acospif16.cpp
@@ -7,128 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/acospif16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/acospif16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, acospif16, (float16 x)) {
-  using FPBits = fputil::FPBits<float16>;
-  FPBits xbits(x);
-
-  uint16_t x_u = xbits.uintval();
-  uint16_t x_abs = x_u & 0x7fff;
-  uint16_t x_sign = x_u >> 15;
-
-  // |x| > 0x1p0, |x| > 1, or x is NaN.
-  if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
-    // acospif16(NaN) = NaN
-    if (xbits.is_nan()) {
-      if (xbits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // 1 < |x| <= +inf
-    fputil::raise_except_if_required(FE_INVALID);
-    fputil::set_errno_if_required(EDOM);
-
-    return FPBits::quiet_nan().get_val();
-  }
-
-  // |x| == 0x1p0, x is 1 or -1
-  // if x is (-)1, return 1
-  // if x is (+)1, return 0
-  if (LIBC_UNLIKELY(x_abs == 0x3c00))
-    return fputil::cast<float16>(x_sign ? 1.0f : 0.0f);
-
-  float xf = x;
-  float xsq = xf * xf;
-
-  // Degree-6 minimax polynomial coefficients of asin(x) generated by Sollya
-  // with: > P = fpminimax(asin(x)/(pi * x), [|0, 2, 4, 6, 8|], [|SG...|], [0,
-  // 0.5]);
-  constexpr float POLY_COEFFS[5] = {0x1.45f308p-2f, 0x1.b2900cp-5f,
-                                    0x1.897e36p-6f, 0x1.9efafcp-7f,
-                                    0x1.06d884p-6f};
-  // |x| <= 0x1p-1, |x| <= 0.5
-  if (x_abs <= 0x3800) {
-    // if x is 0, return 0.5
-    if (LIBC_UNLIKELY(x_abs == 0))
-      return fputil::cast<float16>(0.5f);
-
-    // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x), then
-    //            acospi(x) = 0.5 - asin(x)/pi
-    float interm =
-        fputil::polyeval(xsq, POLY_COEFFS[0], POLY_COEFFS[1], POLY_COEFFS[2],
-                         POLY_COEFFS[3], POLY_COEFFS[4]);
-
-    return fputil::cast<float16>(fputil::multiply_add(-xf, interm, 0.5f));
-  }
-
-  // When |x| > 0.5, assume that 0.5 < |x| <= 1
-  //
-  // Step-by-step range-reduction proof:
-  // 1:  Let y = asin(x), such that, x = sin(y)
-  // 2:  From complimentary angle identity:
-  //       x = sin(y) = cos(pi/2 - y)
-  // 3:  Let z = pi/2 - y, such that x = cos(z)
-  // 4:  From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
-  //       z = 2A, z/2 = A
-  //       cos(z) = 1 - 2 * sin^2(z/2)
-  // 5:  Make sin(z/2) subject of the formula:
-  //       sin(z/2) = sqrt((1 - cos(z))/2)
-  // 6:  Recall [3]; x = cos(z). Therefore:
-  //       sin(z/2) = sqrt((1 - x)/2)
-  // 7:  Let u = (1 - x)/2
-  // 8:  Therefore:
-  //       asin(sqrt(u)) = z/2
-  //       2 * asin(sqrt(u)) = z
-  // 9:  Recall [3]; z = pi/2 - y. Therefore:
-  //       y = pi/2 - z
-  //       y = pi/2 - 2 * asin(sqrt(u))
-  // 10: Recall [1], y = asin(x). Therefore:
-  //       asin(x) = pi/2 - 2 * asin(sqrt(u))
-  // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
-  //     Therefore:
-  //       acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
-  //       acos(x) = 2 * asin(sqrt(u))
-  //       acospi(x) = 2 * (asin(sqrt(u)) / pi)
-  //
-  // THE RANGE REDUCTION, HOW?
-  // 12: Recall [7], u = (1 - x)/2
-  // 13: Since 0.5 < x <= 1, therefore:
-  //       0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
-  //
-  // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
-  // Step [11] as `sqrt(u)` is in range.
-  // When -1 < x <= -0.5, the identity:
-  //       acos(x) = pi - acos(-x)
-  //       acospi(x) = 1 - acos(-x)/pi
-  // allows us to compute for the negative x value (lhs)
-  // with a positive x value instead (rhs).
-
-  float xf_abs = (xf < 0 ? -xf : xf);
-  float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
-  float sqrt_u = fputil::sqrt<float>(u);
-
-  float asin_sqrt_u =
-      sqrt_u * fputil::polyeval(u, POLY_COEFFS[0], POLY_COEFFS[1],
-                                POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4]);
-
-  // Same as acos(x), but devided the expression with pi
-  return fputil::cast<float16>(
-      x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, 1.0f)
-             : 2.0f * asin_sqrt_u);
+  return math::acospif16(x);
 }
+
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp
index d286fce..b5ba9ea 100644
--- a/libc/src/math/generic/asin.cpp
+++ b/libc/src/math/generic/asin.cpp
@@ -7,23 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/asin.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/dyadic_float.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
-#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#include "src/__support/math/asin_utils.h"
+#include "src/__support/math/asin.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-using DoubleDouble = fputil::DoubleDouble;
-using Float128 = fputil::DyadicFloat<128>;
-
 LLVM_LIBC_FUNCTION(double, asin, (double x)) {
   using namespace asin_internal;
   using FPBits = fputil::FPBits<double>;
diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp
index 77d6de9..9c6766f 100644
--- a/libc/src/math/generic/asinf.cpp
+++ b/libc/src/math/generic/asinf.cpp
@@ -7,161 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/asinf.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
-#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-
-#include "src/__support/math/inv_trigf_utils.h"
+#include "src/__support/math/asinf.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr size_t N_EXCEPTS = 2;
-
-// Exceptional values when |x| <= 0.5
-static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
-    // (inputs, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
-    {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
-    // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
-    {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
-}};
-
-// Exceptional values when 0.5 < |x| <= 1
-static constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
-    // (inputs, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
-    {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
-    // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
-    {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
-}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float, asinf, (float x)) {
-  using namespace inv_trigf_utils_internal;
-  using FPBits = typename fputil::FPBits<float>;
-
-  FPBits xbits(x);
-  uint32_t x_uint = xbits.uintval();
-  uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
-  constexpr double SIGN[2] = {1.0, -1.0};
-  uint32_t x_sign = x_uint >> 31;
-
-  // |x| <= 0.5-ish
-  if (x_abs < 0x3f04'471dU) {
-    // |x| < 0x1.d12edp-12
-    if (LIBC_UNLIKELY(x_abs < 0x39e8'9768U)) {
-      // When |x| < 2^-12, the relative error of the approximation asin(x) ~ x
-      // is:
-      //   |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
-      //                             = x^2 / 6
-      //                             < 2^-25
-      //                             < epsilon(1)/2.
-      // So the correctly rounded values of asin(x) are:
-      //   = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
-      //                        or (rounding mode = FE_UPWARD and x is
-      //                        negative),
-      //   = x otherwise.
-      // To simplify the rounding decision and make it more efficient, we use
-      //   fma(x, 2^-25, x) instead.
-      // An exhaustive test shows that this formula work correctly for all
-      // rounding modes up to |x| < 0x1.d12edp-12.
-      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
-      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
-      // |x| < 2^-125. For targets without FMA instructions, we simply use
-      // double for intermediate results as it is more efficient than using an
-      // emulated version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
-      return fputil::multiply_add(x, 0x1.0p-25f, x);
-#else
-      double xd = static_cast<double>(x);
-      return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
-    }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-    // Check for exceptional values
-    if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
-        LIBC_UNLIKELY(r.has_value()))
-      return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-    // For |x| <= 0.5, we approximate asinf(x) by:
-    //   asin(x) = x * P(x^2)
-    // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
-    // asin(x)/x on [0, 0.5] generated by Sollya with:
-    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
-    //                 [|1, D...|], [0, 0.5]);
-    // An exhaustive test shows that this approximation works well up to a
-    // little more than 0.5.
-    double xd = static_cast<double>(x);
-    double xsq = xd * xd;
-    double x3 = xd * xsq;
-    double r = asin_eval(xsq);
-    return static_cast<float>(fputil::multiply_add(x3, r, xd));
-  }
-
-  // |x| > 1, return NaNs.
-  if (LIBC_UNLIKELY(x_abs > 0x3f80'0000U)) {
-    if (xbits.is_signaling_nan()) {
-      fputil::raise_except_if_required(FE_INVALID);
-      return FPBits::quiet_nan().get_val();
-    }
-
-    if (x_abs <= 0x7f80'0000U) {
-      fputil::set_errno_if_required(EDOM);
-      fputil::raise_except_if_required(FE_INVALID);
-    }
-
-    return FPBits::quiet_nan().get_val();
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  // Check for exceptional values
-  if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
-      LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // When |x| > 0.5, we perform range reduction as follow:
-  //
-  // Assume further that 0.5 < x <= 1, and let:
-  //   y = asin(x)
-  // We will use the double angle formula:
-  //   cos(2y) = 1 - 2 sin^2(y)
-  // and the complement angle identity:
-  //   x = sin(y) = cos(pi/2 - y)
-  //              = 1 - 2 sin^2 (pi/4 - y/2)
-  // So:
-  //   sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
-  // And hence:
-  //   pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
-  // Equivalently:
-  //   asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
-  // Let u = (1 - x)/2, then:
-  //   asin(x) = pi/2 - 2 * asin( sqrt(u) )
-  // Moreover, since 0.5 < x <= 1:
-  //   0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
-  // And hence we can reuse the same polynomial approximation of asin(x) when
-  // |x| <= 0.5:
-  //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
-
-  xbits.set_sign(Sign::POS);
-  double sign = SIGN[x_sign];
-  double xd = static_cast<double>(xbits.get_val());
-  double u = fputil::multiply_add(-0.5, xd, 0.5);
-  double c1 = sign * (-2 * fputil::sqrt<double>(u));
-  double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
-  double c3 = c1 * u;
-
-  double r = asin_eval(u);
-  return static_cast<float>(fputil::multiply_add(c3, r, c2));
-}
+LLVM_LIBC_FUNCTION(float, asinf, (float x)) { return math::asinf(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h
index f3dcdb9..a93ee25 100644
--- a/libc/src/math/generic/range_reduction_double_common.h
+++ b/libc/src/math/generic/range_reduction_double_common.h
@@ -278,7 +278,7 @@ private:
 };
 
 #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static Float128 range_reduction_small_f128(double x) {
+LIBC_INLINE static Float128 range_reduction_small_f128(double x) {
   constexpr Float128 PI_OVER_128_F128 = {
       Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128};
   constexpr double ONE_TWENTY_EIGHT_OVER_PI_D = 0x1.45f306dc9c883p5;
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 43f44a9..49f4a1b 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -137,6 +137,21 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  mbsinit
+  SRCS
+    mbsinit.cpp
+  HDRS
+    mbsinit.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+    libc.hdr.types.mbstate_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.wchar.character_converter
+    libc.src.__support.wchar.mbstate
+)
+
+add_entrypoint_object(
   mbrtowc
   SRCS
     mbrtowc.cpp
diff --git a/libc/src/wchar/mbsinit.cpp b/libc/src/wchar/mbsinit.cpp
new file mode 100644
index 0000000..23ba542
--- /dev/null
+++ b/libc/src/wchar/mbsinit.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of mbsinit -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsinit.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, mbsinit, (mbstate_t * ps)) {
+  if (ps == nullptr)
+    return true;
+  internal::CharacterConverter cr(reinterpret_cast<internal::mbstate *>(ps));
+  return cr.isValidState() && cr.isEmpty();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsinit.h b/libc/src/wchar/mbsinit.h
new file mode 100644
index 0000000..fa6be0f
--- /dev/null
+++ b/libc/src/wchar/mbsinit.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for mbsinit ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSINIT_H
+#define LLVM_LIBC_SRC_WCHAR_MBSINIT_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int mbsinit(mbstate_t *ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSINIT_H