aboutsummaryrefslogtreecommitdiff
path: root/libc/src
diff options
context:
space:
mode:
authorOverMighty <its.overmighty@gmail.com>2024-06-14 18:31:32 +0200
committerGitHub <noreply@github.com>2024-06-14 12:31:32 -0400
commitf3aceeee8a8c5fef107657dc6c4d558f3de99773 (patch)
treea2d88b41cea325781ddfe407407e94cd1e774494 /libc/src
parentbbe9119d9cb37662faafe7fe273e792b1b70145e (diff)
downloadllvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.zip
llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.gz
llvm-f3aceeee8a8c5fef107657dc6c4d558f3de99773.tar.bz2
[libc][math][c23] Add f16fmaf C23 math function (#95483)
Part of #93566.
Diffstat (limited to 'libc/src')
-rw-r--r--libc/src/__support/FPUtil/FMA.h32
-rw-r--r--libc/src/__support/FPUtil/generic/CMakeLists.txt3
-rw-r--r--libc/src/__support/FPUtil/generic/FMA.h188
-rw-r--r--libc/src/__support/FPUtil/multiply_add.h4
-rw-r--r--libc/src/__support/big_int.h37
-rw-r--r--libc/src/math/CMakeLists.txt2
-rw-r--r--libc/src/math/f16fmaf.h20
-rw-r--r--libc/src/math/generic/CMakeLists.txt13
-rw-r--r--libc/src/math/generic/expm1f.cpp2
-rw-r--r--libc/src/math/generic/f16fmaf.cpp19
-rw-r--r--libc/src/math/generic/fma.cpp2
-rw-r--r--libc/src/math/generic/fmaf.cpp2
-rw-r--r--libc/src/math/generic/range_reduction_fma.h25
13 files changed, 212 insertions, 137 deletions
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index c277da4..cf01a31 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -10,41 +10,29 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/generic/FMA.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
-
namespace LIBC_NAMESPACE {
namespace fputil {
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
- return __builtin_fmaf(x, y, z);
+template <typename OutType, typename InType>
+LIBC_INLINE OutType fma(InType x, InType y, InType z) {
+ return generic::fma<OutType>(x, y, z);
}
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) {
- return __builtin_fma(x, y, z);
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+template <> LIBC_INLINE float fma(float x, float y, float z) {
+ return __builtin_fmaf(x, y, z);
}
-} // namespace fputil
-} // namespace LIBC_NAMESPACE
-
-#else
-// FMA instructions are not available
-#include "generic/FMA.h"
-
-namespace LIBC_NAMESPACE {
-namespace fputil {
-
-template <typename T> LIBC_INLINE T fma(T x, T y, T z) {
- return generic::fma(x, y, z);
+template <> LIBC_INLINE double fma(double x, double y, double z) {
+ return __builtin_fma(x, y, z);
}
+#endif // LIBC_TARGET_CPU_HAS_FMA
} // namespace fputil
} // namespace LIBC_NAMESPACE
-#endif
-
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 595656e..a8a95ba 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -19,12 +19,15 @@ add_header_library(
HDRS
FMA.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.__support.common
libc.src.__support.CPP.bit
+ libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.big_int
libc.src.__support.macros.optimization
libc.src.__support.uint128
)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index f403aa7..71b1507 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -10,19 +10,26 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H
#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/big_int.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/uint128.h"
+
+#include "hdr/fenv_macros.h"
namespace LIBC_NAMESPACE {
namespace fputil {
namespace generic {
-template <typename T> LIBC_INLINE T fma(T x, T y, T z);
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z);
// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
// The implementation below only is only correct for the default rounding mode,
@@ -64,11 +71,10 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
// Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are
// zero.
if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) {
- if (bit_sum.sign() != t.sign()) {
+ if (bit_sum.sign() != t.sign())
bit_sum.set_mantissa(bit_sum.get_mantissa() + 1);
- } else if (bit_sum.get_mantissa()) {
+ else if (bit_sum.get_mantissa())
bit_sum.set_mantissa(bit_sum.get_mantissa() - 1);
- }
}
}
@@ -79,12 +85,14 @@ namespace internal {
// Extract the sticky bits and shift the `mantissa` to the right by
// `shift_length`.
-LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
- if (shift_length >= 128) {
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<is_unsigned_integral_or_big_int_v<T>, bool>
+shift_mantissa(int shift_length, T &mant) {
+ if (shift_length >= cpp::numeric_limits<T>::digits) {
mant = 0;
return true; // prod_mant is non-zero.
}
- UInt128 mask = (UInt128(1) << shift_length) - 1;
+ T mask = (T(1) << shift_length) - 1;
bool sticky_bits = (mant & mask) != 0;
mant >>= shift_length;
return sticky_bits;
@@ -92,47 +100,64 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
} // namespace internal
-template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
- using FPBits = fputil::FPBits<double>;
-
- if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
- return x * y + z;
- }
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z) {
+ using OutFPBits = fputil::FPBits<OutType>;
+ using OutStorageType = typename OutFPBits::StorageType;
+ using InFPBits = fputil::FPBits<InType>;
+ using InStorageType = typename InFPBits::StorageType;
+
+ constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
+ constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
+ constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
+ using TmpResultType = UInt<TMP_RESULT_LEN>;
+
+ constexpr size_t EXTRA_FRACTION_LEN =
+ TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
+ constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
+ (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
+
+ if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
+ return static_cast<OutType>(x * y + z);
int x_exp = 0;
int y_exp = 0;
int z_exp = 0;
// Normalize denormal inputs.
- if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) {
- x_exp -= 52;
- x *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) {
+ x_exp -= InFPBits::FRACTION_LEN;
+ x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) {
- y_exp -= 52;
- y *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) {
+ y_exp -= InFPBits::FRACTION_LEN;
+ y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) {
- z_exp -= 52;
- z *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) {
+ z_exp -= InFPBits::FRACTION_LEN;
+ z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- FPBits x_bits(x), y_bits(y), z_bits(z);
+ InFPBits x_bits(x), y_bits(y), z_bits(z);
const Sign z_sign = z_bits.sign();
Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
x_exp += x_bits.get_biased_exponent();
y_exp += y_bits.get_biased_exponent();
z_exp += z_bits.get_biased_exponent();
- if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT ||
- y_exp == FPBits::MAX_BIASED_EXPONENT ||
- z_exp == FPBits::MAX_BIASED_EXPONENT))
- return x * y + z;
+ if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ y_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ z_exp == InFPBits::MAX_BIASED_EXPONENT))
+ return static_cast<OutType>(x * y + z);
// Extract mantissa and append hidden leading bits.
- UInt128 x_mant = x_bits.get_explicit_mantissa();
- UInt128 y_mant = y_bits.get_explicit_mantissa();
- UInt128 z_mant = z_bits.get_explicit_mantissa();
+ InStorageType x_mant = x_bits.get_explicit_mantissa();
+ InStorageType y_mant = y_bits.get_explicit_mantissa();
+ TmpResultType z_mant = z_bits.get_explicit_mantissa();
// If the exponent of the product x*y > the exponent of z, then no extra
// precision beside the entire product x*y is needed. On the other hand, when
@@ -143,22 +168,20 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
// z : 10aa...a
// - prod : 1bb...bb....b
// In that case, in order to store the exact result, we need at least
- // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
+ // (Length of prod) - (Fraction length of z)
+ // = 2*(Length of input explicit mantissa) - (Fraction length of z) bits.
// Overall, before aligning the mantissas and exponents, we can simply left-
- // shift the mantissa of z by at least 54, and left-shift the product of x*y
- // by (that amount - 52). After that, it is enough to align the least
- // significant bit, given that we keep track of the round and sticky bits
- // after the least significant bit.
- // We pick shifting z_mant by 64 bits so that technically we can simply use
- // the original mantissa as high part when constructing 128-bit z_mant. So the
- // mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
-
- UInt128 prod_mant = x_mant * y_mant << 10;
+ // shift the mantissa of z by that amount. After that, it is enough to align
+ // the least significant bit, given that we keep track of the round and sticky
+ // bits after the least significant bit.
+
+ TmpResultType prod_mant = TmpResultType(x_mant) * y_mant;
int prod_lsb_exp =
- x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10);
+ x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN);
- z_mant <<= 64;
- int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64);
+ constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
+ z_mant <<= RESULT_MIN_LEN;
+ int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
bool round_bit = false;
bool sticky_bits = false;
bool z_shifted = false;
@@ -198,46 +221,42 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
}
}
- uint64_t result = 0;
+ OutStorageType result = 0;
int r_exp = 0; // Unbiased exponent of the result
+ int round_mode = fputil::quick_get_round();
+
// Normalize the result.
if (prod_mant != 0) {
- uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
- int lead_zeros =
- prod_hi ? cpp::countl_zero(prod_hi)
- : 64 + cpp::countl_zero(static_cast<uint64_t>(prod_mant));
+ int lead_zeros = cpp::countl_zero(prod_mant);
// Move the leading 1 to the most significant bit.
prod_mant <<= lead_zeros;
- // The lower 64 bits are always sticky bits after moving the leading 1 to
- // the most significant bit.
- sticky_bits |= (static_cast<uint64_t>(prod_mant) != 0);
- result = static_cast<uint64_t>(prod_mant >> 64);
- // Change prod_lsb_exp the be the exponent of the least significant bit of
- // the result.
- prod_lsb_exp += 64 - lead_zeros;
- r_exp = prod_lsb_exp + 63;
+ prod_lsb_exp -= lead_zeros;
+ r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
+ InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
if (r_exp > 0) {
- // The result is normal. We will shift the mantissa to the right by
- // 63 - 52 = 11 bits (from the locations of the most significant bit).
- // Then the rounding bit will correspond the 11th bit, and the lowest
- // 10 bits are merged into sticky bits.
- round_bit = (result & 0x0400ULL) != 0;
- sticky_bits |= (result & 0x03ffULL) != 0;
- result >>= 11;
+ // The result is normal. We will shift the mantissa to the right by the
+ // amount of extra bits compared to the length of the explicit mantissa in
+ // the output type. The rounding bit then becomes the highest bit that is
+ // shifted out, and the following lower bits are merged into sticky bits.
+ round_bit =
+ (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
+ sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
+ result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
} else {
- if (r_exp < -52) {
+ if (r_exp < -OutFPBits::FRACTION_LEN) {
// The result is smaller than 1/2 of the smallest denormal number.
sticky_bits = true; // since the result is non-zero.
result = 0;
} else {
// The result is denormal.
- uint64_t mask = 1ULL << (11 - r_exp);
- round_bit = (result & mask) != 0;
- sticky_bits |= (result & (mask - 1)) != 0;
- if (r_exp > -52)
- result >>= 12 - r_exp;
+ TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
+ round_bit = (prod_mant & mask) != 0;
+ sticky_bits |= (prod_mant & (mask - 1)) != 0;
+ if (r_exp > -OutFPBits::FRACTION_LEN)
+ result = static_cast<OutStorageType>(
+ prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
else
result = 0;
}
@@ -245,27 +264,30 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
r_exp = 0;
}
} else {
- // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly.
- prod_sign = Sign::POS;
+ // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
+ // rounding downward and +0.0 for other rounding modes.
+ if (round_mode == FE_DOWNWARD)
+ prod_sign = Sign::NEG;
+ else
+ prod_sign = Sign::POS;
}
// Finalize the result.
- int round_mode = fputil::quick_get_round();
- if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) {
+ if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
if ((round_mode == FE_TOWARDZERO) ||
(round_mode == FE_UPWARD && prod_sign.is_neg()) ||
(round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
- return FPBits::max_normal(prod_sign).get_val();
+ return OutFPBits::max_normal(prod_sign).get_val();
}
- return FPBits::inf(prod_sign).get_val();
+ return OutFPBits::inf(prod_sign).get_val();
}
// Remove hidden bit and append the exponent field and sign bit.
- result = (result & FPBits::FRACTION_MASK) |
- (static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
- if (prod_sign.is_neg()) {
- result |= FPBits::SIGN_MASK;
- }
+ result = static_cast<OutStorageType>(
+ (result & OutFPBits::FRACTION_MASK) |
+ (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
+ if (prod_sign.is_neg())
+ result |= OutFPBits::SIGN_MASK;
// Rounding.
if (round_mode == FE_TONEAREST) {
@@ -277,7 +299,7 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
++result;
}
- return cpp::bit_cast<double>(result);
+ return cpp::bit_cast<OutType>(result);
}
} // namespace generic
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 82932da..622914e 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE {
namespace fputil {
LIBC_INLINE float multiply_add(float x, float y, float z) {
- return fma(x, y, z);
+ return fma<float>(x, y, z);
}
LIBC_INLINE double multiply_add(double x, double y, double z) {
- return fma(x, y, z);
+ return fma<double>(x, y, z);
}
} // namespace fputil
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6ee..c30c3ec 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -983,23 +983,18 @@ using UInt = BigInt<Bits, false, internal::WordTypeSelectorT<Bits>>;
template <size_t Bits>
using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
-// Provides limits of U/Int<128>.
-template <> class cpp::numeric_limits<UInt<128>> {
-public:
- LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); }
- LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); }
- // Meant to match std::numeric_limits interface.
- // NOLINTNEXTLINE(readability-identifier-naming)
- LIBC_INLINE_VAR static constexpr int digits = 128;
-};
-
-template <> class cpp::numeric_limits<Int<128>> {
-public:
- LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); }
- LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); }
+// Provides limits of BigInt.
+template <size_t Bits, bool Signed, typename T>
+struct cpp::numeric_limits<BigInt<Bits, Signed, T>> {
+ LIBC_INLINE static constexpr BigInt<Bits, Signed, T> max() {
+ return BigInt<Bits, Signed, T>::max();
+ }
+ LIBC_INLINE static constexpr BigInt<Bits, Signed, T> min() {
+ return BigInt<Bits, Signed, T>::min();
+ }
// Meant to match std::numeric_limits interface.
// NOLINTNEXTLINE(readability-identifier-naming)
- LIBC_INLINE_VAR static constexpr int digits = 128;
+ LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
};
// type traits to determine whether a T is a BigInt.
@@ -1073,6 +1068,18 @@ template <typename T>
using make_integral_or_big_int_signed_t =
typename make_integral_or_big_int_signed<T>::type;
+// is_unsigned_integral_or_big_int
+template <typename T>
+struct is_unsigned_integral_or_big_int
+ : cpp::bool_constant<
+ cpp::is_same_v<T, make_integral_or_big_int_unsigned_t<T>>> {};
+
+template <typename T>
+// Meant to look like <type_traits> helper variable templates.
+// NOLINTNEXTLINE(readability-identifier-naming)
+LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v =
+ is_unsigned_integral_or_big_int<T>::value;
+
namespace cpp {
// Specialization of cpp::bit_cast ('bit.h') from T to BigInt.
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index df8e6c0..4472367 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f)
add_math_entrypoint_object(expm1)
add_math_entrypoint_object(expm1f)
+add_math_entrypoint_object(f16fmaf)
+
add_math_entrypoint_object(f16sqrtf)
add_math_entrypoint_object(fabs)
diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h
new file mode 100644
index 0000000..d92cb43
--- /dev/null
+++ b/libc/src/math/f16fmaf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H
+#define LLVM_LIBC_SRC_MATH_F16FMAF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmaf(float x, float y, float z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f1f7d6c..aa0069d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3603,6 +3603,19 @@ add_entrypoint_object(
)
add_entrypoint_object(
+ f16fmaf
+ SRCS
+ f16fmaf.cpp
+ HDRS
+ ../f16fmaf.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.fma
+ COMPILE_OPTIONS
+ -O3
+)
+
+add_entrypoint_object(
f16sqrtf
SRCS
f16sqrtf.cpp
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 037e600..6b9f074 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// intermediate results as it is more efficient than using an emulated
// version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA)
- return fputil::fma(x, x, x);
+ return fputil::fma<float>(x, x, x);
#else
double xd = x;
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
diff --git a/libc/src/math/generic/f16fmaf.cpp b/libc/src/math/generic/f16fmaf.cpp
new file mode 100644
index 0000000..09f2712
--- /dev/null
+++ b/libc/src/math/generic/f16fmaf.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fmaf function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fmaf.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fmaf, (float x, float y, float z)) {
+ return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp
index e27e5ba..7937766 100644
--- a/libc/src/math/generic/fma.cpp
+++ b/libc/src/math/generic/fma.cpp
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
- return fputil::fma(x, y, z);
+ return fputil::fma<double>(x, y, z);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaf.cpp b/libc/src/math/generic/fmaf.cpp
index 7512b82..d367a06 100644
--- a/libc/src/math/generic/fmaf.cpp
+++ b/libc/src/math/generic/fmaf.cpp
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
- return fputil::fma(x, y, z);
+ return fputil::fma<float>(x, y, z);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
index aee8cbb..82b4ae1 100644
--- a/libc/src/math/generic/range_reduction_fma.h
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -33,8 +33,8 @@ static constexpr double THIRTYTWO_OVER_PI[5] = {
// k = round(x * 32 / pi) and y = (x * 32 / pi) - k.
LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -kd);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], y);
return static_cast<int64_t>(kd);
}
@@ -54,12 +54,13 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
prod_hi.set_uintval(prod_hi.uintval() &
((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55
double k_hi = fputil::nearest_integer(prod_hi.get_val());
- double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi);
- double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod);
+ double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -k_hi);
+ double prod_lo =
+ fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod);
double k_lo = fputil::nearest_integer(prod_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
return static_cast<int64_t>(k_lo);
}
@@ -74,12 +75,12 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
prod_hi.set_uintval(prod_hi.uintval() &
((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110
double k_hi = fputil::nearest_integer(prod_hi.get_val());
- double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi);
- double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod);
+ double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], -k_hi);
+ double prod_lo = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod);
double k_lo = fputil::nearest_integer(prod_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[4], y);
return static_cast<int64_t>(k_lo);
}