[libc][math][c23] Add f16fmaf C23 math function (#95483)

Part of #93566.
author: OverMighty <its.overmighty@gmail.com> 2024-06-14 18:31:32 +0200
committer: GitHub <noreply@github.com> 2024-06-14 12:31:32 -0400
commit: f3aceeee8a8c5fef107657dc6c4d558f3de99773 (patch)
tree: a2d88b41cea325781ddfe407407e94cd1e774494 /libc/src
parent: bbe9119d9cb37662faafe7fe273e792b1b70145e (diff)
download: llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.zip
llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.gz
llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.bz2
13 files changed, 212 insertions, 137 deletions
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index c277da4..cf01a31 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -10,41 +10,29 @@
 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
 
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/generic/FMA.h"
 #include "src/__support/macros/properties/architectures.h"
 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
 
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
-
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
-  return __builtin_fmaf(x, y, z);
+template <typename OutType, typename InType>
+LIBC_INLINE OutType fma(InType x, InType y, InType z) {
+  return generic::fma<OutType>(x, y, z);
 }
 
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) {
-  return __builtin_fma(x, y, z);
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+template <> LIBC_INLINE float fma(float x, float y, float z) {
+  return __builtin_fmaf(x, y, z);
 }
 
-} // namespace fputil
-} // namespace LIBC_NAMESPACE
-
-#else
-// FMA instructions are not available
-#include "generic/FMA.h"
-
-namespace LIBC_NAMESPACE {
-namespace fputil {
-
-template <typename T> LIBC_INLINE T fma(T x, T y, T z) {
-  return generic::fma(x, y, z);
+template <> LIBC_INLINE double fma(double x, double y, double z) {
+  return __builtin_fma(x, y, z);
 }
+#endif // LIBC_TARGET_CPU_HAS_FMA
 
 } // namespace fputil
 } // namespace LIBC_NAMESPACE
 
-#endif
-
 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 595656e..a8a95ba 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -19,12 +19,15 @@ add_header_library(
   HDRS
     FMA.h
   DEPENDS
+    libc.hdr.fenv_macros
     libc.src.__support.common
     libc.src.__support.CPP.bit
+    libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.big_int
     libc.src.__support.macros.optimization
     libc.src.__support.uint128
 )
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index f403aa7..71b1507 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -10,19 +10,26 @@
 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H
 
 #include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/big_int.h"
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/uint128.h"
+
+#include "hdr/fenv_macros.h"
 
 namespace LIBC_NAMESPACE {
 namespace fputil {
 namespace generic {
 
-template <typename T> LIBC_INLINE T fma(T x, T y, T z);
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                                 cpp::is_floating_point_v<InType> &&
+                                 sizeof(OutType) <= sizeof(InType),
+                             OutType>
+fma(InType x, InType y, InType z);
 
 // TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
 // The implementation below only is only correct for the default rounding mode,
@@ -64,11 +71,10 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
     // Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are
     // zero.
     if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) {
-      if (bit_sum.sign() != t.sign()) {
+      if (bit_sum.sign() != t.sign())
         bit_sum.set_mantissa(bit_sum.get_mantissa() + 1);
-      } else if (bit_sum.get_mantissa()) {
+      else if (bit_sum.get_mantissa())
         bit_sum.set_mantissa(bit_sum.get_mantissa() - 1);
-      }
     }
   }
 
@@ -79,12 +85,14 @@ namespace internal {
 
 // Extract the sticky bits and shift the `mantissa` to the right by
 // `shift_length`.
-LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
-  if (shift_length >= 128) {
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<is_unsigned_integral_or_big_int_v<T>, bool>
+shift_mantissa(int shift_length, T &mant) {
+  if (shift_length >= cpp::numeric_limits<T>::digits) {
     mant = 0;
     return true; // prod_mant is non-zero.
   }
-  UInt128 mask = (UInt128(1) << shift_length) - 1;
+  T mask = (T(1) << shift_length) - 1;
   bool sticky_bits = (mant & mask) != 0;
   mant >>= shift_length;
   return sticky_bits;
@@ -92,47 +100,64 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
 
 } // namespace internal
 
-template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
-  using FPBits = fputil::FPBits<double>;
-
-  if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
-    return x * y + z;
-  }
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+                                 cpp::is_floating_point_v<InType> &&
+                                 sizeof(OutType) <= sizeof(InType),
+                             OutType>
+fma(InType x, InType y, InType z) {
+  using OutFPBits = fputil::FPBits<OutType>;
+  using OutStorageType = typename OutFPBits::StorageType;
+  using InFPBits = fputil::FPBits<InType>;
+  using InStorageType = typename InFPBits::StorageType;
+
+  constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
+  constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
+  constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
+  using TmpResultType = UInt<TMP_RESULT_LEN>;
+
+  constexpr size_t EXTRA_FRACTION_LEN =
+      TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
+  constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
+      (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
+
+  if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
+    return static_cast<OutType>(x * y + z);
 
   int x_exp = 0;
   int y_exp = 0;
   int z_exp = 0;
 
   // Normalize denormal inputs.
-  if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) {
-    x_exp -= 52;
-    x *= 0x1.0p+52;
+  if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) {
+    x_exp -= InFPBits::FRACTION_LEN;
+    x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
   }
-  if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) {
-    y_exp -= 52;
-    y *= 0x1.0p+52;
+  if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) {
+    y_exp -= InFPBits::FRACTION_LEN;
+    y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
   }
-  if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) {
-    z_exp -= 52;
-    z *= 0x1.0p+52;
+  if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) {
+    z_exp -= InFPBits::FRACTION_LEN;
+    z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
   }
 
-  FPBits x_bits(x), y_bits(y), z_bits(z);
+  InFPBits x_bits(x), y_bits(y), z_bits(z);
   const Sign z_sign = z_bits.sign();
   Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
   x_exp += x_bits.get_biased_exponent();
   y_exp += y_bits.get_biased_exponent();
   z_exp += z_bits.get_biased_exponent();
 
-  if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT ||
-                    y_exp == FPBits::MAX_BIASED_EXPONENT ||
-                    z_exp == FPBits::MAX_BIASED_EXPONENT))
-    return x * y + z;
+  if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT ||
+                    y_exp == InFPBits::MAX_BIASED_EXPONENT ||
+                    z_exp == InFPBits::MAX_BIASED_EXPONENT))
+    return static_cast<OutType>(x * y + z);
 
   // Extract mantissa and append hidden leading bits.
-  UInt128 x_mant = x_bits.get_explicit_mantissa();
-  UInt128 y_mant = y_bits.get_explicit_mantissa();
-  UInt128 z_mant = z_bits.get_explicit_mantissa();
+  InStorageType x_mant = x_bits.get_explicit_mantissa();
+  InStorageType y_mant = y_bits.get_explicit_mantissa();
+  TmpResultType z_mant = z_bits.get_explicit_mantissa();
 
   // If the exponent of the product x*y > the exponent of z, then no extra
   // precision beside the entire product x*y is needed.  On the other hand, when
@@ -143,22 +168,20 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
   //      z :    10aa...a
   // - prod :     1bb...bb....b
   // In that case, in order to store the exact result, we need at least
-  //   (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
+  //     (Length of prod) - (Fraction length of z)
+  //   = 2*(Length of input explicit mantissa) - (Fraction length of z) bits.
   // Overall, before aligning the mantissas and exponents, we can simply left-
-  // shift the mantissa of z by at least 54, and left-shift the product of x*y
-  // by (that amount - 52).  After that, it is enough to align the least
-  // significant bit, given that we keep track of the round and sticky bits
-  // after the least significant bit.
-  // We pick shifting z_mant by 64 bits so that technically we can simply use
-  // the original mantissa as high part when constructing 128-bit z_mant. So the
-  // mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
-
-  UInt128 prod_mant = x_mant * y_mant << 10;
+  // shift the mantissa of z by that amount.  After that, it is enough to align
+  // the least significant bit, given that we keep track of the round and sticky
+  // bits after the least significant bit.
+
+  TmpResultType prod_mant = TmpResultType(x_mant) * y_mant;
   int prod_lsb_exp =
-      x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10);
+      x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN);
 
-  z_mant <<= 64;
-  int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64);
+  constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
+  z_mant <<= RESULT_MIN_LEN;
+  int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
   bool round_bit = false;
   bool sticky_bits = false;
   bool z_shifted = false;
@@ -198,46 +221,42 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
     }
   }
 
-  uint64_t result = 0;
+  OutStorageType result = 0;
   int r_exp = 0; // Unbiased exponent of the result
 
+  int round_mode = fputil::quick_get_round();
+
   // Normalize the result.
   if (prod_mant != 0) {
-    uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
-    int lead_zeros =
-        prod_hi ? cpp::countl_zero(prod_hi)
-                : 64 + cpp::countl_zero(static_cast<uint64_t>(prod_mant));
+    int lead_zeros = cpp::countl_zero(prod_mant);
     // Move the leading 1 to the most significant bit.
     prod_mant <<= lead_zeros;
-    // The lower 64 bits are always sticky bits after moving the leading 1 to
-    // the most significant bit.
-    sticky_bits |= (static_cast<uint64_t>(prod_mant) != 0);
-    result = static_cast<uint64_t>(prod_mant >> 64);
-    // Change prod_lsb_exp the be the exponent of the least significant bit of
-    // the result.
-    prod_lsb_exp += 64 - lead_zeros;
-    r_exp = prod_lsb_exp + 63;
+    prod_lsb_exp -= lead_zeros;
+    r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
+            InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
 
     if (r_exp > 0) {
-      // The result is normal.  We will shift the mantissa to the right by
-      // 63 - 52 = 11 bits (from the locations of the most significant bit).
-      // Then the rounding bit will correspond the 11th bit, and the lowest
-      // 10 bits are merged into sticky bits.
-      round_bit = (result & 0x0400ULL) != 0;
-      sticky_bits |= (result & 0x03ffULL) != 0;
-      result >>= 11;
+      // The result is normal.  We will shift the mantissa to the right by the
+      // amount of extra bits compared to the length of the explicit mantissa in
+      // the output type.  The rounding bit then becomes the highest bit that is
+      // shifted out, and the following lower bits are merged into sticky bits.
+      round_bit =
+          (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
+      sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
+      result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
     } else {
-      if (r_exp < -52) {
+      if (r_exp < -OutFPBits::FRACTION_LEN) {
         // The result is smaller than 1/2 of the smallest denormal number.
         sticky_bits = true; // since the result is non-zero.
         result = 0;
       } else {
         // The result is denormal.
-        uint64_t mask = 1ULL << (11 - r_exp);
-        round_bit = (result & mask) != 0;
-        sticky_bits |= (result & (mask - 1)) != 0;
-        if (r_exp > -52)
-          result >>= 12 - r_exp;
+        TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
+        round_bit = (prod_mant & mask) != 0;
+        sticky_bits |= (prod_mant & (mask - 1)) != 0;
+        if (r_exp > -OutFPBits::FRACTION_LEN)
+          result = static_cast<OutStorageType>(
+              prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
         else
           result = 0;
       }
@@ -245,27 +264,30 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
       r_exp = 0;
     }
   } else {
-    // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly.
-    prod_sign = Sign::POS;
+    // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
+    // rounding downward and +0.0 for other rounding modes.
+    if (round_mode == FE_DOWNWARD)
+      prod_sign = Sign::NEG;
+    else
+      prod_sign = Sign::POS;
   }
 
   // Finalize the result.
-  int round_mode = fputil::quick_get_round();
-  if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) {
+  if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
     if ((round_mode == FE_TOWARDZERO) ||
         (round_mode == FE_UPWARD && prod_sign.is_neg()) ||
         (round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
-      return FPBits::max_normal(prod_sign).get_val();
+      return OutFPBits::max_normal(prod_sign).get_val();
     }
-    return FPBits::inf(prod_sign).get_val();
+    return OutFPBits::inf(prod_sign).get_val();
   }
 
   // Remove hidden bit and append the exponent field and sign bit.
-  result = (result & FPBits::FRACTION_MASK) |
-           (static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
-  if (prod_sign.is_neg()) {
-    result |= FPBits::SIGN_MASK;
-  }
+  result = static_cast<OutStorageType>(
+      (result & OutFPBits::FRACTION_MASK) |
+      (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
+  if (prod_sign.is_neg())
+    result |= OutFPBits::SIGN_MASK;
 
   // Rounding.
   if (round_mode == FE_TONEAREST) {
@@ -277,7 +299,7 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
       ++result;
   }
 
-  return cpp::bit_cast<double>(result);
+  return cpp::bit_cast<OutType>(result);
 }
 
 } // namespace generic
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 82932da..622914e 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE {
 namespace fputil {
 
 LIBC_INLINE float multiply_add(float x, float y, float z) {
-  return fma(x, y, z);
+  return fma<float>(x, y, z);
 }
 
 LIBC_INLINE double multiply_add(double x, double y, double z) {
-  return fma(x, y, z);
+  return fma<double>(x, y, z);
 }
 
 } // namespace fputil
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6ee..c30c3ec 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -983,23 +983,18 @@ using UInt = BigInt<Bits, false, internal::WordTypeSelectorT<Bits>>;
 template <size_t Bits>
 using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
 
-// Provides limits of U/Int<128>.
-template <> class cpp::numeric_limits<UInt<128>> {
-public:
-  LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); }
-  LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); }
-  // Meant to match std::numeric_limits interface.
-  // NOLINTNEXTLINE(readability-identifier-naming)
-  LIBC_INLINE_VAR static constexpr int digits = 128;
-};
-
-template <> class cpp::numeric_limits<Int<128>> {
-public:
-  LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); }
-  LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); }
+// Provides limits of BigInt.
+template <size_t Bits, bool Signed, typename T>
+struct cpp::numeric_limits<BigInt<Bits, Signed, T>> {
+  LIBC_INLINE static constexpr BigInt<Bits, Signed, T> max() {
+    return BigInt<Bits, Signed, T>::max();
+  }
+  LIBC_INLINE static constexpr BigInt<Bits, Signed, T> min() {
+    return BigInt<Bits, Signed, T>::min();
+  }
   // Meant to match std::numeric_limits interface.
   // NOLINTNEXTLINE(readability-identifier-naming)
-  LIBC_INLINE_VAR static constexpr int digits = 128;
+  LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
 };
 
 // type traits to determine whether a T is a BigInt.
@@ -1073,6 +1068,18 @@ template <typename T>
 using make_integral_or_big_int_signed_t =
     typename make_integral_or_big_int_signed<T>::type;
 
+// is_unsigned_integral_or_big_int
+template <typename T>
+struct is_unsigned_integral_or_big_int
+    : cpp::bool_constant<
+          cpp::is_same_v<T, make_integral_or_big_int_unsigned_t<T>>> {};
+
+template <typename T>
+// Meant to look like <type_traits> helper variable templates.
+// NOLINTNEXTLINE(readability-identifier-naming)
+LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v =
+    is_unsigned_integral_or_big_int<T>::value;
+
 namespace cpp {
 
 // Specialization of cpp::bit_cast ('bit.h') from T to BigInt.
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index df8e6c0..4472367 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f)
 add_math_entrypoint_object(expm1)
 add_math_entrypoint_object(expm1f)
 
+add_math_entrypoint_object(f16fmaf)
+
 add_math_entrypoint_object(f16sqrtf)
 
 add_math_entrypoint_object(fabs)
diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h
new file mode 100644
index 0000000..d92cb43
--- /dev/null
+++ b/libc/src/math/f16fmaf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H
+#define LLVM_LIBC_SRC_MATH_F16FMAF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmaf(float x, float y, float z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f1f7d6c..aa0069d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3603,6 +3603,19 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  f16fmaf
+  SRCS
+    f16fmaf.cpp
+  HDRS
+    ../f16fmaf.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.fma
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
   f16sqrtf
   SRCS
     f16sqrtf.cpp
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 037e600..6b9f074 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
         // intermediate results as it is more efficient than using an emulated
         // version of FMA.
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
-      return fputil::fma(x, x, x);
+      return fputil::fma<float>(x, x, x);
 #else
       double xd = x;
       return static_cast<float>(fputil::multiply_add(xd, xd, xd));
diff --git a/libc/src/math/generic/f16fmaf.cpp b/libc/src/math/generic/f16fmaf.cpp
new file mode 100644
index 0000000..09f2712
--- /dev/null
+++ b/libc/src/math/generic/f16fmaf.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fmaf function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fmaf.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fmaf, (float x, float y, float z)) {
+  return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp
index e27e5ba..7937766 100644
--- a/libc/src/math/generic/fma.cpp
+++ b/libc/src/math/generic/fma.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
-  return fputil::fma(x, y, z);
+  return fputil::fma<double>(x, y, z);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaf.cpp b/libc/src/math/generic/fmaf.cpp
index 7512b82..d367a06 100644
--- a/libc/src/math/generic/fmaf.cpp
+++ b/libc/src/math/generic/fmaf.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
-  return fputil::fma(x, y, z);
+  return fputil::fma<float>(x, y, z);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
index aee8cbb..82b4ae1 100644
--- a/libc/src/math/generic/range_reduction_fma.h
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -33,8 +33,8 @@ static constexpr double THIRTYTWO_OVER_PI[5] = {
 //   k = round(x * 32 / pi) and y = (x * 32 / pi) - k.
 LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
   double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]);
-  y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd);
-  y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y);
+  y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -kd);
+  y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], y);
   return static_cast<int64_t>(kd);
 }
 
@@ -54,12 +54,13 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
     prod_hi.set_uintval(prod_hi.uintval() &
                         ((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55
     double k_hi = fputil::nearest_integer(prod_hi.get_val());
-    double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi);
-    double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod);
+    double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -k_hi);
+    double prod_lo =
+        fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod);
     double k_lo = fputil::nearest_integer(prod_lo);
-    y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
-    y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y);
-    y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
+    y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
+    y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], y);
+    y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
 
     return static_cast<int64_t>(k_lo);
   }
@@ -74,12 +75,12 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
   prod_hi.set_uintval(prod_hi.uintval() &
                       ((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110
   double k_hi = fputil::nearest_integer(prod_hi.get_val());
-  double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi);
-  double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod);
+  double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], -k_hi);
+  double prod_lo = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod);
   double k_lo = fputil::nearest_integer(prod_lo);
-  y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
-  y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
-  y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y);
+  y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
+  y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
+  y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[4], y);
 
   return static_cast<int64_t>(k_lo);
 }
author	OverMighty <its.overmighty@gmail.com>	2024-06-14 18:31:32 +0200
committer	GitHub <noreply@github.com>	2024-06-14 12:31:32 -0400
commit	f3aceeee8a8c5fef107657dc6c4d558f3de99773 (patch)
tree	a2d88b41cea325781ddfe407407e94cd1e774494 /libc/src
parent	bbe9119d9cb37662faafe7fe273e792b1b70145e (diff)
download	llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.zip llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.gz llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.bz2