1 files changed, 295 insertions, 58 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index e9998d7..2e7926b 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
@@ -51,49 +52,75 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
 
 namespace llvm {
 
-  // How the nonfinite values Inf and NaN are represented.
-  enum class fltNonfiniteBehavior {
-    // Represents standard IEEE 754 behavior. A value is nonfinite if the
-    // exponent field is all 1s. In such cases, a value is Inf if the
-    // significand bits are all zero, and NaN otherwise
-    IEEE754,
-
-    // Only the Float8E5M2 has this behavior. There is no Inf representation. A
-    // value is NaN if the exponent field and the mantissa field are all 1s.
-    // This behavior matches the FP8 E4M3 type described in
-    // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
-    // as non-signalling, although the paper does not state whether the NaN
-    // values are signalling or not.
-    NanOnly,
-  };
+// How the nonfinite values Inf and NaN are represented.
+enum class fltNonfiniteBehavior {
+  // Represents standard IEEE 754 behavior. A value is nonfinite if the
+  // exponent field is all 1s. In such cases, a value is Inf if the
+  // significand bits are all zero, and NaN otherwise
+  IEEE754,
+
+  // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+  // Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
+  // and operations that would ordinarily produce Inf produce NaN instead.
+  // The details of the NaN representation(s) in this form are determined by the
+  // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
+  // encodings do not distinguish between signalling and quiet NaN.
+  NanOnly,
+};
 
-  /* Represents floating point arithmetic semantics.  */
-  struct fltSemantics {
-    /* The largest E such that 2^E is representable; this matches the
-       definition of IEEE 754.  */
-    APFloatBase::ExponentType maxExponent;
+// How NaN values are represented. This is curently only used in combination
+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
+// while having IEEE non-finite behavior is liable to lead to unexpected
+// results.
+enum class fltNanEncoding {
+  // Represents the standard IEEE behavior where a value is NaN if its
+  // exponent is all 1s and the significand is non-zero.
+  IEEE,
+
+  // Represents the behavior in the Float8E4M3 floating point type where NaN is
+  // represented by having the exponent and mantissa set to all 1s.
+  // This behavior matches the FP8 E4M3 type described in
+  // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+  // as non-signalling, although the paper does not state whether the NaN
+  // values are signalling or not.
+  AllOnes,
+
+  // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+  // where NaN is represented by a sign bit of 1 and all 0s in the exponent
+  // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+  // there is only one NaN value, it is treated as quiet NaN. This matches the
+  // behavior described in https://arxiv.org/abs/2206.02915 .
+  NegativeZero,
+};
 
-    /* The smallest E such that 2^E is a normalized number; this
-       matches the definition of IEEE 754.  */
-    APFloatBase::ExponentType minExponent;
+/* Represents floating point arithmetic semantics.  */
+struct fltSemantics {
+  /* The largest E such that 2^E is representable; this matches the
+     definition of IEEE 754.  */
+  APFloatBase::ExponentType maxExponent;
 
-    /* Number of bits in the significand.  This includes the integer
-       bit.  */
-    unsigned int precision;
+  /* The smallest E such that 2^E is a normalized number; this
+     matches the definition of IEEE 754.  */
+  APFloatBase::ExponentType minExponent;
 
-    /* Number of bits actually used in the semantics. */
-    unsigned int sizeInBits;
+  /* Number of bits in the significand.  This includes the integer
+     bit.  */
+  unsigned int precision;
 
-    fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+  /* Number of bits actually used in the semantics. */
+  unsigned int sizeInBits;
 
-    // Returns true if any number described by this semantics can be precisely
-    // represented by the specified semantics. Does not take into account
-    // the value of fltNonfiniteBehavior.
-    bool isRepresentableBy(const fltSemantics &S) const {
-      return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
-             precision <= S.precision;
-    }
-  };
+  fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+
+  fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
+  // Returns true if any number described by this semantics can be precisely
+  // represented by the specified semantics. Does not take into account
+  // the value of fltNonfiniteBehavior.
+  bool isRepresentableBy(const fltSemantics &S) const {
+    return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
+           precision <= S.precision;
+  }
+};
 
   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
   static const fltSemantics semBFloat = {127, -126, 8, 16};
@@ -101,8 +128,16 @@ namespace llvm {
   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
   static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
-  static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
-                                               fltNonfiniteBehavior::NanOnly};
+  static const fltSemantics semFloat8E5M2FNUZ = {15,
+                                                 -15,
+                                                 3,
+                                                 8,
+                                                 fltNonfiniteBehavior::NanOnly,
+                                                 fltNanEncoding::NegativeZero};
+  static const fltSemantics semFloat8E4M3FN = {
+      8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+  static const fltSemantics semFloat8E4M3FNUZ = {
+      7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
   static const fltSemantics semBogus = {0, 0, 0, 0};
 
@@ -160,8 +195,12 @@ namespace llvm {
       return PPCDoubleDouble();
     case S_Float8E5M2:
       return Float8E5M2();
+    case S_Float8E5M2FNUZ:
+      return Float8E5M2FNUZ();
     case S_Float8E4M3FN:
       return Float8E4M3FN();
+    case S_Float8E4M3FNUZ:
+      return Float8E4M3FNUZ();
     case S_x87DoubleExtended:
       return x87DoubleExtended();
     }
@@ -184,8 +223,12 @@ namespace llvm {
       return S_PPCDoubleDouble;
     else if (&Sem == &llvm::APFloat::Float8E5M2())
       return S_Float8E5M2;
+    else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
+      return S_Float8E5M2FNUZ;
     else if (&Sem == &llvm::APFloat::Float8E4M3FN())
       return S_Float8E4M3FN;
+    else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
+      return S_Float8E4M3FNUZ;
     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
       return S_x87DoubleExtended;
     else
@@ -209,7 +252,13 @@ namespace llvm {
     return semPPCDoubleDouble;
   }
   const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
+  const fltSemantics &APFloatBase::Float8E5M2FNUZ() {
+    return semFloat8E5M2FNUZ;
+  }
   const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
+  const fltSemantics &APFloatBase::Float8E4M3FNUZ() {
+    return semFloat8E4M3FNUZ;
+  }
   const fltSemantics &APFloatBase::x87DoubleExtended() {
     return semX87DoubleExtended;
   }
@@ -808,10 +857,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
 
   APInt fill_storage;
   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
-    // The only NaN representation is where the mantissa is all 1s, which is
-    // non-signalling.
+    // Finite-only types do not distinguish signalling and quiet NaN, so
+    // make them all signalling.
     SNaN = false;
-    fill_storage = APInt::getAllOnes(semantics->precision - 1);
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+      sign = true;
+      fill_storage = APInt::getZero(semantics->precision - 1);
+    } else {
+      fill_storage = APInt::getAllOnes(semantics->precision - 1);
+    }
     fill = &fill_storage;
   }
 
@@ -842,6 +896,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
     // conventionally, this is the next bit down from the QNaN bit.
     if (APInt::tcIsZero(significand, numParts))
       APInt::tcSetBit(significand, QNaNBit - 1);
+  } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // The only NaN is a quiet NaN, and it has no bits sets in the significand.
+    // Do nothing.
   } else {
     // We always have to set the QNaN bit to make it a QNaN.
     APInt::tcSetBit(significand, QNaNBit);
@@ -986,7 +1043,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
 }
 
 bool IEEEFloat::isLargest() const {
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes) {
     // The largest number by magnitude in our format will be the floating point
     // number with maximum exponent and with significand that is all ones except
     // the LSB.
@@ -1428,7 +1486,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
   exponent = semantics->maxExponent;
   tcSetLeastSignificantBits(significandParts(), partCount(),
                             semantics->precision);
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes)
     APInt::tcClearBit(significandParts(), 0);
 
   return opInexact;
@@ -1529,7 +1588,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
     }
   }
 
+  // The all-ones values is an overflow if NaN is all ones. If NaN is
+  // represented by negative zero, then it is a valid finite value.
   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes &&
       exponent == semantics->maxExponent && isSignificandAllOnes())
     return handleOverflow(rounding_mode);
 
@@ -1540,8 +1602,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
      underflow for exact results.  */
   if (lost_fraction == lfExactlyZero) {
     /* Canonicalize zeroes.  */
-    if (omsb == 0)
+    if (omsb == 0) {
       category = fcZero;
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
+    }
 
     return opOK;
   }
@@ -1559,18 +1624,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
       /* Renormalize by incrementing the exponent and shifting our
          significand right one.  However if we already have the
          maximum exponent we overflow to infinity.  */
-      if (exponent == semantics->maxExponent) {
-        category = fcInfinity;
-
-        return (opStatus) (opOverflow | opInexact);
-      }
+      if (exponent == semantics->maxExponent)
+        // Invoke overflow handling with a rounding mode that will guarantee
+        // that the result gets turned into the correct infinity representation.
+        // This is needed instead of just setting the category to infinity to
+        // account for 8-bit floating point types that have no inf, only NaN.
+        return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
 
       shiftSignificandRight(1);
 
       return opInexact;
     }
 
+    // The all-ones values is an overflow if NaN is all ones. If NaN is
+    // represented by negative zero, then it is a valid finite value.
     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+        semantics->nanEncoding == fltNanEncoding::AllOnes &&
         exponent == semantics->maxExponent && isSignificandAllOnes())
       return handleOverflow(rounding_mode);
   }
@@ -1584,8 +1653,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
   assert(omsb < semantics->precision);
 
   /* Canonicalize zeroes.  */
-  if (omsb == 0)
+  if (omsb == 0) {
     category = fcZero;
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
+  }
 
   /* The fcZero case is a denormal that underflowed to zero.  */
   return (opStatus) (opUnderflow | opInexact);
@@ -1887,6 +1959,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
 
 /* Change sign.  */
 void IEEEFloat::changeSign() {
+  // With NaN-as-negative-zero, neither NaN or negative zero can change
+  // their signs.
+  if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
+      (isZero() || isNaN()))
+    return;
   /* Look mummy, this one's easy.  */
   sign = !sign;
 }
@@ -1916,6 +1993,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
   if (category == fcZero) {
     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
       sign = (rounding_mode == rmTowardNegative);
+    // NaN-in-negative-zero means zeros need to be normalized to +0.
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
   }
 
   return fs;
@@ -1941,6 +2021,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
   sign ^= rhs.sign;
   fs = multiplySpecials(rhs);
 
+  if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+    sign = false;
   if (isFiniteNonZero()) {
     lostFraction lost_fraction = multiplySignificand(rhs);
     fs = normalize(rounding_mode, lost_fraction);
@@ -1959,6 +2041,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
   sign ^= rhs.sign;
   fs = divideSpecials(rhs);
 
+  if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+    sign = false;
   if (isFiniteNonZero()) {
     lostFraction lost_fraction = divideSignificand(rhs);
     fs = normalize(rounding_mode, lost_fraction);
@@ -2067,8 +2151,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
     }
   }
 
-  if (isZero())
+  if (isZero()) {
     sign = origSign;    // IEEE754 requires this
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      // But some 8-bit floats only have positive 0.
+      sign = false;
+  }
+
   else
     sign ^= origSign;
   return fs;
@@ -2093,8 +2182,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
     fs = subtract(V, rmNearestTiesToEven);
     assert(fs==opOK);
   }
-  if (isZero())
+  if (isZero()) {
     sign = origSign; // fmod requires this
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
+  }
   return fs;
 }
 
@@ -2122,8 +2214,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
        positive zero unless rounding to minus infinity, except that
        adding two like-signed zeroes gives that zero.  */
-    if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
+    if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
       sign = (rounding_mode == rmTowardNegative);
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
+    }
   } else {
     fs = multiplySpecials(multiplicand);
 
@@ -2399,6 +2494,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
       return is_signaling ? opInvalidOp : opOK;
     }
 
+    // If NaN is negative zero, we need to create a new NaN to avoid converting
+    // NaN to -Inf.
+    if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
+        semantics->nanEncoding != fltNanEncoding::NegativeZero)
+      makeNaN(false, false);
+
     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
 
     // For x87 extended precision, we want to make a NaN, not a special NaN if
@@ -2420,6 +2521,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
     makeNaN(false, sign);
     *losesInfo = true;
     fs = opInexact;
+  } else if (category == fcZero &&
+             semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // Negative zero loses info, but positive zero doesn't.
+    *losesInfo =
+        fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
+    fs = *losesInfo ? opInexact : opOK;
+    // NaN is negative zero means -0 -> +0, which can lose information
+    sign = false;
   } else {
     *losesInfo = false;
     fs = opOK;
@@ -2887,9 +2996,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
     category = fcZero;
     fs = opOK;
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
 
-  /* Check whether the normalized exponent is high enough to overflow
-     max during the log-rebasing in the max-exponent check below. */
+    /* Check whether the normalized exponent is high enough to overflow
+       max during the log-rebasing in the max-exponent check below. */
   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
     fs = handleOverflow(rounding_mode);
 
@@ -3517,6 +3628,33 @@ APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
                    (mysignificand & 0x3)));
 }
 
+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ);
+  assert(partCount() == 1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (isFiniteNonZero()) {
+    myexponent = exponent + 16; // bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x4))
+      myexponent = 0; // denormal
+  } else if (category == fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category == fcInfinity) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
+                   (mysignificand & 0x3)));
+}
+
 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
   assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
   assert(partCount() == 1);
@@ -3544,6 +3682,33 @@ APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
                    (mysignificand & 0x7)));
 }
 
+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ);
+  assert(partCount() == 1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (isFiniteNonZero()) {
+    myexponent = exponent + 8; // bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x8))
+      myexponent = 0; // denormal
+  } else if (category == fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category == fcInfinity) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
+                   (mysignificand & 0x7)));
+}
+
 // This function creates an APInt that is just a bit map of the floating
 // point constant as it would appear in memory.  It is not a conversion,
 // and treating the result as a normal integer is unlikely to be useful.
@@ -3570,9 +3735,15 @@ APInt IEEEFloat::bitcastToAPInt() const {
   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
     return convertFloat8E5M2APFloatToAPInt();
 
+  if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+    return convertFloat8E5M2FNUZAPFloatToAPInt();
+
   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
     return convertFloat8E4M3FNAPFloatToAPInt();
 
+  if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+    return convertFloat8E4M3FNUZAPFloatToAPInt();
+
   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
          "unknown format!");
   return convertF80LongDoubleAPFloatToAPInt();
@@ -3828,6 +3999,32 @@ void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
   }
 }
 
+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 2) & 0x1f;
+  uint32_t mysignificand = i & 0x3;
+
+  initialize(&semFloat8E5M2FNUZ);
+  assert(partCount() == 1);
+
+  sign = i >> 7;
+  if (myexponent == 0 && mysignificand == 0 && sign == 0) {
+    makeZero(sign);
+  } else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
+    category = fcNaN;
+    exponent = exponentNaN();
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16; // bias
+    *significandParts() = mysignificand;
+    if (myexponent == 0) // denormal
+      exponent = -15;
+    else
+      *significandParts() |= 0x4; // integer bit
+  }
+}
+
 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
   uint32_t i = (uint32_t)*api.getRawData();
   uint32_t myexponent = (i >> 3) & 0xf;
@@ -3854,6 +4051,32 @@ void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
   }
 }
 
+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 3) & 0xf;
+  uint32_t mysignificand = i & 0x7;
+
+  initialize(&semFloat8E4M3FNUZ);
+  assert(partCount() == 1);
+
+  sign = i >> 7;
+  if (myexponent == 0 && mysignificand == 0 && sign == 0) {
+    makeZero(sign);
+  } else if (myexponent == 0 && mysignificand == 0 && sign == 1) {
+    category = fcNaN;
+    exponent = exponentNaN();
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 8; // bias
+    *significandParts() = mysignificand;
+    if (myexponent == 0) // denormal
+      exponent = -7;
+    else
+      *significandParts() |= 0x8; // integer bit
+  }
+}
+
 /// Treat api as containing the bits of a floating point number.
 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
   assert(api.getBitWidth() == Sem->sizeInBits);
@@ -3873,8 +4096,12 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
     return initFromPPCDoubleDoubleAPInt(api);
   if (Sem == &semFloat8E5M2)
     return initFromFloat8E5M2APInt(api);
+  if (Sem == &semFloat8E5M2FNUZ)
+    return initFromFloat8E5M2FNUZAPInt(api);
   if (Sem == &semFloat8E4M3FN)
     return initFromFloat8E4M3FNAPInt(api);
+  if (Sem == &semFloat8E4M3FNUZ)
+    return initFromFloat8E4M3FNUZAPInt(api);
 
   llvm_unreachable(nullptr);
 }
@@ -3903,7 +4130,8 @@ void IEEEFloat::makeLargest(bool Negative) {
                                    ? (~integerPart(0) >> NumUnusedHighBits)
                                    : 0;
 
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes)
     significand[0] &= ~integerPart(1);
 }
 
@@ -4331,6 +4559,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
       APInt::tcSet(significandParts(), 0, partCount());
       category = fcZero;
       exponent = 0;
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
       break;
     }
 
@@ -4417,8 +4647,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
 }
 
 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      return semantics->minExponent;
     return semantics->maxExponent;
+  }
   return semantics->maxExponent + 1;
 }
 
@@ -4445,6 +4678,10 @@ void IEEEFloat::makeInf(bool Negative) {
 void IEEEFloat::makeZero(bool Negative) {
   category = fcZero;
   sign = Negative;
+  if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // Merge negative zero to positive because 0b10000...000 is used for NaN
+    sign = false;
+  }
   exponent = exponentZero();
   APInt::tcSet(significandParts(), 0, partCount());
 }