aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libc/utils/FPUtil/CMakeLists.txt1
-rw-r--r--libc/utils/FPUtil/ManipulationFunctions.h111
-rw-r--r--libc/utils/FPUtil/NormalFloat.h228
3 files changed, 243 insertions, 97 deletions
diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt
index ea91120..745ede3 100644
--- a/libc/utils/FPUtil/CMakeLists.txt
+++ b/libc/utils/FPUtil/CMakeLists.txt
@@ -17,6 +17,7 @@ add_header_library(
BasicOperations.h
ManipulationFunctions.h
NearestIntegerOperations.h
+ NormalFloat.h
DEPENDS
libc.utils.CPP.standalone_cpp
)
diff --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h
index ce5400f..f233fcd 100644
--- a/libc/utils/FPUtil/ManipulationFunctions.h
+++ b/libc/utils/FPUtil/ManipulationFunctions.h
@@ -6,37 +6,20 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H
+#define LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H
+
#include "FPBits.h"
#include "NearestIntegerOperations.h"
+#include "NormalFloat.h"
#include "utils/CPP/TypeTraits.h"
-#ifndef LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H
-#define LLVM_LIBC_UTILS_FPUTIL_MANIPULATION_FUNCTIONS_H
-
namespace __llvm_libc {
namespace fputil {
-#if defined(__x86_64__) || defined(__i386__)
-template <typename T> struct Standard754Type {
- static constexpr bool Value =
- cpp::IsSame<float, cpp::RemoveCVType<T>>::Value ||
- cpp::IsSame<double, cpp::RemoveCVType<T>>::Value;
-};
-#else
-template <typename T> struct Standard754Type {
- static constexpr bool Value = cpp::IsFloatingPointType<T>::Value;
-};
-#endif
-
-template <typename T> static inline T frexp_impl(FPBits<T> &bits, int &exp) {
- exp = bits.getExponent() + 1;
- static constexpr uint16_t resultExponent = FPBits<T>::exponentBias - 1;
- bits.exponent = resultExponent;
- return bits;
-}
-
-template <typename T, cpp::EnableIfType<Standard754Type<T>::Value, int> = 0>
+template <typename T,
+ cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T frexp(T x, int &exp) {
FPBits<T> bits(x);
if (bits.isInfOrNaN())
@@ -46,42 +29,12 @@ static inline T frexp(T x, int &exp) {
return x;
}
- return frexp_impl(bits, exp);
+ NormalFloat<T> normal(bits);
+ exp = normal.exponent + 1;
+ normal.exponent = -1;
+ return normal;
}
-#if defined(__x86_64__) || defined(__i386__)
-static inline long double frexp(long double x, int &exp) {
- FPBits<long double> bits(x);
- if (bits.isInfOrNaN())
- return x;
- if (bits.isZero()) {
- exp = 0;
- return x;
- }
-
- if (bits.exponent != 0 || bits.implicitBit == 1)
- return frexp_impl(bits, exp);
-
- exp = bits.getExponent();
- int shiftCount = 0;
- uint64_t fullMantissa = *reinterpret_cast<uint64_t *>(&bits);
- static constexpr uint64_t msBitMask = uint64_t(1) << 63;
- for (; (fullMantissa & msBitMask) == uint64_t(0);
- fullMantissa <<= 1, ++shiftCount) {
- // This for loop will terminate as fullMantissa is != 0. If it were 0,
- // then x will be NaN and handled before control reaches here.
- // When the loop terminates, fullMantissa will represent the full mantissa
- // of a normal long double value. That is, the implicit bit has the value
- // of 1.
- }
-
- exp = exp - shiftCount + 1;
- *reinterpret_cast<uint64_t *>(&bits) = fullMantissa;
- bits.exponent = FPBits<long double>::exponentBias - 1;
- return bits;
-}
-#endif
-
template <typename T,
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T modf(T x, T &iptr) {
@@ -112,11 +65,8 @@ static inline T copysign(T x, T y) {
return xbits;
}
-template <typename T> static inline T logb_impl(const FPBits<T> &bits) {
- return bits.getExponent();
-}
-
-template <typename T, cpp::EnableIfType<Standard754Type<T>::Value, int> = 0>
+template <typename T,
+ cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
static inline T logb(T x) {
FPBits<T> bits(x);
if (bits.isZero()) {
@@ -130,42 +80,9 @@ static inline T logb(T x) {
return FPBits<T>::inf();
}
- return logb_impl(bits);
-}
-
-#if defined(__x86_64__) || defined(__i386__)
-static inline long double logb(long double x) {
- FPBits<long double> bits(x);
- if (bits.isZero()) {
- // TODO(Floating point exception): Raise div-by-zero exception.
- // TODO(errno): POSIX requires setting errno to ERANGE.
- return FPBits<long double>::negInf();
- } else if (bits.isNaN()) {
- return x;
- } else if (bits.isInf()) {
- // Return positive infinity.
- return FPBits<long double>::inf();
- }
-
- if (bits.exponent != 0 || bits.implicitBit == 1)
- return logb_impl(bits);
-
- int exp = bits.getExponent();
- int shiftCount = 0;
- uint64_t fullMantissa = *reinterpret_cast<uint64_t *>(&bits);
- static constexpr uint64_t msBitMask = uint64_t(1) << 63;
- for (; (fullMantissa & msBitMask) == uint64_t(0);
- fullMantissa <<= 1, ++shiftCount) {
- // This for loop will terminate as fullMantissa is != 0. If it were 0,
- // then x will be NaN and handled before control reaches here.
- // When the loop terminates, fullMantissa will represent the full mantissa
- // of a normal long double value. That is, the implicit bit has the value
- // of 1.
- }
-
- return exp - shiftCount;
+ NormalFloat<T> normal(bits);
+ return normal.exponent;
}
-#endif
} // namespace fputil
} // namespace __llvm_libc
diff --git a/libc/utils/FPUtil/NormalFloat.h b/libc/utils/FPUtil/NormalFloat.h
new file mode 100644
index 0000000..e0e6911
--- /dev/null
+++ b/libc/utils/FPUtil/NormalFloat.h
@@ -0,0 +1,228 @@
+//===-- A class to store a normalized floating point number -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H
+#define LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H
+
+#include "FPBits.h"
+
+#include "utils/CPP/TypeTraits.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+namespace fputil {
+
+// A class which stores the normalized form of a floating point value.
+// The special IEEE-754 bits patterns of Zero, infinity and NaNs are
+// are not handled by this class.
+//
+// A normalized floating point number is of this form:
+// (-1)*sign * 2^exponent * <mantissa>
+// where <mantissa> is of the form 1.<...>.
+template <typename T> struct NormalFloat {
+ static_assert(
+ cpp::IsFloatingPointType<T>::Value,
+ "NormalFloat template parameter has to be a floating point type.");
+
+ using UIntType = typename FPBits<T>::UIntType;
+ static constexpr UIntType one = (UIntType(1) << MantissaWidth<T>::value);
+
+ // Unbiased exponent value.
+ int32_t exponent;
+
+ UIntType mantissa;
+ // We want |UIntType| to have atleast one bit more than the actual mantissa
+ // bit width to accommodate the implicit 1 value.
+ static_assert(sizeof(UIntType) * 8 >= MantissaWidth<T>::value + 1,
+ "Bad type for mantissa in NormalFloat.");
+
+ bool sign;
+
+ NormalFloat(int32_t e, UIntType m, bool s)
+ : exponent(e), mantissa(m), sign(s) {
+ if (mantissa >= one)
+ return;
+
+ unsigned normalizationShift = evaluateNormalizationShift(mantissa);
+ mantissa = mantissa << normalizationShift;
+ exponent -= normalizationShift;
+ }
+
+ explicit NormalFloat(T x) { initFromBits(FPBits<T>(x)); }
+
+ explicit NormalFloat(FPBits<T> bits) { initFromBits(bits); }
+
+ // Compares this normalized number with another normalized number.
+ // Returns -1 is this number is less than |other|, 0 if this number is equal
+ // to |other|, and 1 if this number is greater than |other|.
+ int cmp(const NormalFloat<T> &other) const {
+ if (sign != other.sign)
+ return sign ? -1 : 1;
+
+ if (exponent > other.exponent) {
+ return sign ? -1 : 1;
+ } else if (exponent == other.exponent) {
+ if (mantissa > other.mantissa)
+ return sign ? -1 : 1;
+ else if (mantissa == other.mantissa)
+ return 0;
+ else
+ return sign ? 1 : -1;
+ } else {
+ return sign ? 1 : -1;
+ }
+ }
+
+ // Returns a new normalized floating point number which is equal in value
+ // to this number multiplied by 2^e. That is:
+ // new = this * 2^e
+ NormalFloat<T> mul2(int e) const {
+ NormalFloat<T> result = *this;
+ result.exponent += e;
+ return result;
+ }
+
+ operator T() const {
+ int biasedExponent = exponent + FPBits<T>::exponentBias;
+ // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
+ constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
+ if (biasedExponent > maxExponentValue) {
+ // TODO: Should infinity with the correct sign be returned?
+ return FPBits<T>::buildNaN(1);
+ }
+
+ FPBits<T> result(T(0.0));
+
+ constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
+ if (exponent < subnormalExponent) {
+ unsigned shift = subnormalExponent - exponent;
+ if (shift <= MantissaWidth<T>::value) {
+ // Generate a subnormal number. Might lead to loss of precision.
+ result.exponent = 0;
+ result.mantissa = mantissa >> shift;
+ result.sign = sign;
+ return result;
+ } else {
+ // TODO: Should zero with the correct sign be returned?
+ return FPBits<T>::buildNaN(1);
+ }
+ }
+
+ result.exponent = exponent + FPBits<T>::exponentBias;
+ result.mantissa = mantissa;
+ result.sign = sign;
+ return result;
+ }
+
+private:
+ void initFromBits(FPBits<T> bits) {
+ sign = bits.sign;
+
+ if (bits.isInfOrNaN() || bits.isZero()) {
+ // Ignore special bit patterns. Implementations deal with them separately
+ // anyway so this should not be a problem.
+ exponent = 0;
+ mantissa = 0;
+ return;
+ }
+
+ // Normalize subnormal numbers.
+ if (bits.exponent == 0) {
+ unsigned shift = evaluateNormalizationShift(bits.mantissa);
+ mantissa = UIntType(bits.mantissa) << shift;
+ exponent = 1 - FPBits<T>::exponentBias - shift;
+ } else {
+ exponent = bits.exponent - FPBits<T>::exponentBias;
+ mantissa = one | bits.mantissa;
+ }
+ }
+
+ unsigned evaluateNormalizationShift(UIntType m) {
+ unsigned shift = 0;
+ for (; (one & m) == 0 && (shift < MantissaWidth<T>::value);
+ m <<= 1, ++shift)
+ ;
+ return shift;
+ }
+};
+
+#if defined(__x86_64__) || defined(__i386__)
+template <>
+inline void NormalFloat<long double>::initFromBits(FPBits<long double> bits) {
+ sign = bits.sign;
+
+ if (bits.isInfOrNaN() || bits.isZero()) {
+ // Ignore special bit patterns. Implementations deal with them separately
+ // anyway so this should not be a problem.
+ exponent = 0;
+ mantissa = 0;
+ return;
+ }
+
+ if (bits.exponent == 0) {
+ if (bits.implicitBit == 0) {
+ // Since we ignore zero value, the mantissa in this case is non-zero.
+ int normalizationShift = evaluateNormalizationShift(bits.mantissa);
+ exponent = -16382 - normalizationShift;
+ mantissa = (bits.mantissa << normalizationShift);
+ } else {
+ exponent = -16382;
+ mantissa = one | bits.mantissa;
+ }
+ } else {
+ if (bits.implicitBit == 0) {
+ // Invalid number so just store 0 similar to a NaN.
+ exponent = 0;
+ mantissa = 0;
+ } else {
+ exponent = bits.exponent - 16383;
+ mantissa = one | bits.mantissa;
+ }
+ }
+}
+
+template <> inline NormalFloat<long double>::operator long double() const {
+ int biasedExponent = exponent + FPBits<long double>::exponentBias;
+ // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
+ constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2;
+ if (biasedExponent > maxExponentValue) {
+ // TODO: Should infinity with the correct sign be returned?
+ return FPBits<long double>::buildNaN(1);
+ }
+
+ FPBits<long double> result(0.0l);
+
+ constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
+ if (exponent < subnormalExponent) {
+ unsigned shift = subnormalExponent - exponent;
+ if (shift <= MantissaWidth<long double>::value) {
+ // Generate a subnormal number. Might lead to loss of precision.
+ result.exponent = 0;
+ result.mantissa = mantissa >> shift;
+ result.implicitBit = 0;
+ result.sign = sign;
+ return result;
+ } else {
+ // TODO: Should zero with the correct sign be returned?
+ return FPBits<long double>::buildNaN(1);
+ }
+ }
+
+ result.exponent = biasedExponent;
+ result.mantissa = mantissa;
+ result.implicitBit = 1;
+ result.sign = sign;
+ return result;
+}
+#endif
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H