aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Support/APFloat.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Support/APFloat.cpp')
-rw-r--r--llvm/lib/Support/APFloat.cpp353
1 files changed, 295 insertions, 58 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index e9998d7..2e7926b 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
@@ -51,49 +52,75 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
namespace llvm {
- // How the nonfinite values Inf and NaN are represented.
- enum class fltNonfiniteBehavior {
- // Represents standard IEEE 754 behavior. A value is nonfinite if the
- // exponent field is all 1s. In such cases, a value is Inf if the
- // significand bits are all zero, and NaN otherwise
- IEEE754,
-
- // Only the Float8E5M2 has this behavior. There is no Inf representation. A
- // value is NaN if the exponent field and the mantissa field are all 1s.
- // This behavior matches the FP8 E4M3 type described in
- // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
- // as non-signalling, although the paper does not state whether the NaN
- // values are signalling or not.
- NanOnly,
- };
+// How the nonfinite values Inf and NaN are represented.
+enum class fltNonfiniteBehavior {
+ // Represents standard IEEE 754 behavior. A value is nonfinite if the
+ // exponent field is all 1s. In such cases, a value is Inf if the
+ // significand bits are all zero, and NaN otherwise
+ IEEE754,
+
+ // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+ // Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
+ // and operations that would ordinarily produce Inf produce NaN instead.
+ // The details of the NaN representation(s) in this form are determined by the
+ // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
+ // encodings do not distinguish between signalling and quiet NaN.
+ NanOnly,
+};
- /* Represents floating point arithmetic semantics. */
- struct fltSemantics {
- /* The largest E such that 2^E is representable; this matches the
- definition of IEEE 754. */
- APFloatBase::ExponentType maxExponent;
+// How NaN values are represented. This is curently only used in combination
+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
+// while having IEEE non-finite behavior is liable to lead to unexpected
+// results.
+enum class fltNanEncoding {
+ // Represents the standard IEEE behavior where a value is NaN if its
+ // exponent is all 1s and the significand is non-zero.
+ IEEE,
+
+ // Represents the behavior in the Float8E4M3 floating point type where NaN is
+ // represented by having the exponent and mantissa set to all 1s.
+ // This behavior matches the FP8 E4M3 type described in
+ // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+ // as non-signalling, although the paper does not state whether the NaN
+ // values are signalling or not.
+ AllOnes,
+
+ // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+ // where NaN is represented by a sign bit of 1 and all 0s in the exponent
+ // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+ // there is only one NaN value, it is treated as quiet NaN. This matches the
+ // behavior described in https://arxiv.org/abs/2206.02915 .
+ NegativeZero,
+};
- /* The smallest E such that 2^E is a normalized number; this
- matches the definition of IEEE 754. */
- APFloatBase::ExponentType minExponent;
+/* Represents floating point arithmetic semantics. */
+struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ APFloatBase::ExponentType maxExponent;
- /* Number of bits in the significand. This includes the integer
- bit. */
- unsigned int precision;
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ APFloatBase::ExponentType minExponent;
- /* Number of bits actually used in the semantics. */
- unsigned int sizeInBits;
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
- fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+ /* Number of bits actually used in the semantics. */
+ unsigned int sizeInBits;
- // Returns true if any number described by this semantics can be precisely
- // represented by the specified semantics. Does not take into account
- // the value of fltNonfiniteBehavior.
- bool isRepresentableBy(const fltSemantics &S) const {
- return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
- precision <= S.precision;
- }
- };
+ fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+
+ fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
+ // Returns true if any number described by this semantics can be precisely
+ // represented by the specified semantics. Does not take into account
+ // the value of fltNonfiniteBehavior.
+ bool isRepresentableBy(const fltSemantics &S) const {
+ return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
+ precision <= S.precision;
+ }
+};
static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
static const fltSemantics semBFloat = {127, -126, 8, 16};
@@ -101,8 +128,16 @@ namespace llvm {
static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
- static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
- fltNonfiniteBehavior::NanOnly};
+ static const fltSemantics semFloat8E5M2FNUZ = {15,
+ -15,
+ 3,
+ 8,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::NegativeZero};
+ static const fltSemantics semFloat8E4M3FN = {
+ 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+ static const fltSemantics semFloat8E4M3FNUZ = {
+ 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
static const fltSemantics semBogus = {0, 0, 0, 0};
@@ -160,8 +195,12 @@ namespace llvm {
return PPCDoubleDouble();
case S_Float8E5M2:
return Float8E5M2();
+ case S_Float8E5M2FNUZ:
+ return Float8E5M2FNUZ();
case S_Float8E4M3FN:
return Float8E4M3FN();
+ case S_Float8E4M3FNUZ:
+ return Float8E4M3FNUZ();
case S_x87DoubleExtended:
return x87DoubleExtended();
}
@@ -184,8 +223,12 @@ namespace llvm {
return S_PPCDoubleDouble;
else if (&Sem == &llvm::APFloat::Float8E5M2())
return S_Float8E5M2;
+ else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
+ return S_Float8E5M2FNUZ;
else if (&Sem == &llvm::APFloat::Float8E4M3FN())
return S_Float8E4M3FN;
+ else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
+ return S_Float8E4M3FNUZ;
else if (&Sem == &llvm::APFloat::x87DoubleExtended())
return S_x87DoubleExtended;
else
@@ -209,7 +252,13 @@ namespace llvm {
return semPPCDoubleDouble;
}
const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
+ const fltSemantics &APFloatBase::Float8E5M2FNUZ() {
+ return semFloat8E5M2FNUZ;
+ }
const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
+ const fltSemantics &APFloatBase::Float8E4M3FNUZ() {
+ return semFloat8E4M3FNUZ;
+ }
const fltSemantics &APFloatBase::x87DoubleExtended() {
return semX87DoubleExtended;
}
@@ -808,10 +857,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
APInt fill_storage;
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
- // The only NaN representation is where the mantissa is all 1s, which is
- // non-signalling.
+ // Finite-only types do not distinguish signalling and quiet NaN, so
+ // make them all signalling.
SNaN = false;
- fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ sign = true;
+ fill_storage = APInt::getZero(semantics->precision - 1);
+ } else {
+ fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ }
fill = &fill_storage;
}
@@ -842,6 +896,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// conventionally, this is the next bit down from the QNaN bit.
if (APInt::tcIsZero(significand, numParts))
APInt::tcSetBit(significand, QNaNBit - 1);
+ } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // The only NaN is a quiet NaN, and it has no bits sets in the significand.
+ // Do nothing.
} else {
// We always have to set the QNaN bit to make it a QNaN.
APInt::tcSetBit(significand, QNaNBit);
@@ -986,7 +1043,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
}
bool IEEEFloat::isLargest() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes) {
// The largest number by magnitude in our format will be the floating point
// number with maximum exponent and with significand that is all ones except
// the LSB.
@@ -1428,7 +1486,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
exponent = semantics->maxExponent;
tcSetLeastSignificantBits(significandParts(), partCount(),
semantics->precision);
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
APInt::tcClearBit(significandParts(), 0);
return opInexact;
@@ -1529,7 +1588,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
}
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
@@ -1540,8 +1602,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
underflow for exact results. */
if (lost_fraction == lfExactlyZero) {
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return opOK;
}
@@ -1559,18 +1624,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
/* Renormalize by incrementing the exponent and shifting our
significand right one. However if we already have the
maximum exponent we overflow to infinity. */
- if (exponent == semantics->maxExponent) {
- category = fcInfinity;
-
- return (opStatus) (opOverflow | opInexact);
- }
+ if (exponent == semantics->maxExponent)
+ // Invoke overflow handling with a rounding mode that will guarantee
+ // that the result gets turned into the correct infinity representation.
+ // This is needed instead of just setting the category to infinity to
+ // account for 8-bit floating point types that have no inf, only NaN.
+ return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
shiftSignificandRight(1);
return opInexact;
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
}
@@ -1584,8 +1653,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
assert(omsb < semantics->precision);
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
/* The fcZero case is a denormal that underflowed to zero. */
return (opStatus) (opUnderflow | opInexact);
@@ -1887,6 +1959,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
/* Change sign. */
void IEEEFloat::changeSign() {
+ // With NaN-as-negative-zero, neither NaN or negative zero can change
+ // their signs.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
+ (isZero() || isNaN()))
+ return;
/* Look mummy, this one's easy. */
sign = !sign;
}
@@ -1916,6 +1993,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
if (category == fcZero) {
if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
sign = (rounding_mode == rmTowardNegative);
+ // NaN-in-negative-zero means zeros need to be normalized to +0.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
}
return fs;
@@ -1941,6 +2021,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = multiplySpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = multiplySignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -1959,6 +2041,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = divideSpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = divideSignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -2067,8 +2151,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
}
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // IEEE754 requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ // But some 8-bit floats only have positive 0.
+ sign = false;
+ }
+
else
sign ^= origSign;
return fs;
@@ -2093,8 +2182,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // fmod requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return fs;
}
@@ -2122,8 +2214,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */
- if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
+ if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
sign = (rounding_mode == rmTowardNegative);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
} else {
fs = multiplySpecials(multiplicand);
@@ -2399,6 +2494,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
return is_signaling ? opInvalidOp : opOK;
}
+ // If NaN is negative zero, we need to create a new NaN to avoid converting
+ // NaN to -Inf.
+ if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
+ semantics->nanEncoding != fltNanEncoding::NegativeZero)
+ makeNaN(false, false);
+
*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
// For x87 extended precision, we want to make a NaN, not a special NaN if
@@ -2420,6 +2521,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
makeNaN(false, sign);
*losesInfo = true;
fs = opInexact;
+ } else if (category == fcZero &&
+ semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Negative zero loses info, but positive zero doesn't.
+ *losesInfo =
+ fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
+ fs = *losesInfo ? opInexact : opOK;
+ // NaN is negative zero means -0 -> +0, which can lose information
+ sign = false;
} else {
*losesInfo = false;
fs = opOK;
@@ -2887,9 +2996,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;
fs = opOK;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
- /* Check whether the normalized exponent is high enough to overflow
- max during the log-rebasing in the max-exponent check below. */
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
fs = handleOverflow(rounding_mode);
@@ -3517,6 +3628,33 @@ APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
(mysignificand & 0x3)));
}
+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
+ assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ);
+ assert(partCount() == 1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (isFiniteNonZero()) {
+ myexponent = exponent + 16; // bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x4))
+ myexponent = 0; // denormal
+ } else if (category == fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category == fcInfinity) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
+ (mysignificand & 0x3)));
+}
+
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
assert(partCount() == 1);
@@ -3544,6 +3682,33 @@ APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
(mysignificand & 0x7)));
}
+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
+ assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ);
+ assert(partCount() == 1);
+
+ uint32_t myexponent, mysignificand;
+
+ if (isFiniteNonZero()) {
+ myexponent = exponent + 8; // bias
+ mysignificand = (uint32_t)*significandParts();
+ if (myexponent == 1 && !(mysignificand & 0x8))
+ myexponent = 0; // denormal
+ } else if (category == fcZero) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else if (category == fcInfinity) {
+ myexponent = 0;
+ mysignificand = 0;
+ } else {
+ assert(category == fcNaN && "Unknown category!");
+ myexponent = 0;
+ mysignificand = (uint32_t)*significandParts();
+ }
+
+ return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
+ (mysignificand & 0x7)));
+}
+
// This function creates an APInt that is just a bit map of the floating
// point constant as it would appear in memory. It is not a conversion,
// and treating the result as a normal integer is unlikely to be useful.
@@ -3570,9 +3735,15 @@ APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ return convertFloat8E5M2FNUZAPFloatToAPInt();
+
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ return convertFloat8E4M3FNUZAPFloatToAPInt();
+
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
@@ -3828,6 +3999,32 @@ void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
}
}
+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 2) & 0x1f;
+ uint32_t mysignificand = i & 0x3;
+
+ initialize(&semFloat8E5M2FNUZ);
+ assert(partCount() == 1);
+
+ sign = i >> 7;
+ if (myexponent == 0 && mysignificand == 0 && sign == 0) {
+ makeZero(sign);
+ } else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
+ category = fcNaN;
+ exponent = exponentNaN();
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 16; // bias
+ *significandParts() = mysignificand;
+ if (myexponent == 0) // denormal
+ exponent = -15;
+ else
+ *significandParts() |= 0x4; // integer bit
+ }
+}
+
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 3) & 0xf;
@@ -3854,6 +4051,32 @@ void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
}
}
+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
+ uint32_t i = (uint32_t)*api.getRawData();
+ uint32_t myexponent = (i >> 3) & 0xf;
+ uint32_t mysignificand = i & 0x7;
+
+ initialize(&semFloat8E4M3FNUZ);
+ assert(partCount() == 1);
+
+ sign = i >> 7;
+ if (myexponent == 0 && mysignificand == 0 && sign == 0) {
+ makeZero(sign);
+ } else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
+ category = fcNaN;
+ exponent = exponentNaN();
+ *significandParts() = mysignificand;
+ } else {
+ category = fcNormal;
+ exponent = myexponent - 8; // bias
+ *significandParts() = mysignificand;
+ if (myexponent == 0) // denormal
+ exponent = -7;
+ else
+ *significandParts() |= 0x8; // integer bit
+ }
+}
+
/// Treat api as containing the bits of a floating point number.
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
assert(api.getBitWidth() == Sem->sizeInBits);
@@ -3873,8 +4096,12 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
return initFromPPCDoubleDoubleAPInt(api);
if (Sem == &semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
+ if (Sem == &semFloat8E5M2FNUZ)
+ return initFromFloat8E5M2FNUZAPInt(api);
if (Sem == &semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
+ if (Sem == &semFloat8E4M3FNUZ)
+ return initFromFloat8E4M3FNUZAPInt(api);
llvm_unreachable(nullptr);
}
@@ -3903,7 +4130,8 @@ void IEEEFloat::makeLargest(bool Negative) {
? (~integerPart(0) >> NumUnusedHighBits)
: 0;
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
significand[0] &= ~integerPart(1);
}
@@ -4331,6 +4559,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
APInt::tcSet(significandParts(), 0, partCount());
category = fcZero;
exponent = 0;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
break;
}
@@ -4417,8 +4647,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
}
APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ return semantics->minExponent;
return semantics->maxExponent;
+ }
return semantics->maxExponent + 1;
}
@@ -4445,6 +4678,10 @@ void IEEEFloat::makeInf(bool Negative) {
void IEEEFloat::makeZero(bool Negative) {
category = fcZero;
sign = Negative;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Merge negative zero to positive because 0b10000...000 is used for NaN
+ sign = false;
+ }
exponent = exponentZero();
APInt::tcSet(significandParts(), 0, partCount());
}