aboutsummaryrefslogtreecommitdiff
path: root/llvm/unittests/ADT/APFloatTest.cpp
diff options
context:
space:
mode:
authorDurgadoss R <durgadossr@nvidia.com>2024-06-11 13:16:51 +0530
committerGitHub <noreply@github.com>2024-06-11 13:16:51 +0530
commitb1fe03f0840a2c488b1f07a669bfea3cc986ce3b (patch)
tree94dc61f58ba757dffafbc48e2e6f5f4d13aa54ee /llvm/unittests/ADT/APFloatTest.cpp
parentbd9a525efdab2a83cb24773d95ce8c4a2e9cce68 (diff)
downloadllvm-b1fe03f0840a2c488b1f07a669bfea3cc986ce3b.zip
llvm-b1fe03f0840a2c488b1f07a669bfea3cc986ce3b.tar.gz
llvm-b1fe03f0840a2c488b1f07a669bfea3cc986ce3b.tar.bz2
[APFloat] Add APFloat support for FP6 data types (#94735)
This patch adds APFloat type support for two FP6 data types, E2M3 and E3M2. The definitions for the two formats are detailed in section 5.3.2 of the OCP specification, which can be accessed here: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
Diffstat (limited to 'llvm/unittests/ADT/APFloatTest.cpp')
-rw-r--r--llvm/unittests/ADT/APFloatTest.cpp484
1 files changed, 462 insertions, 22 deletions
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 6e4dda8..7007d94 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -723,11 +723,13 @@ TEST(APFloatTest, IsSmallestNormalized) {
EXPECT_FALSE(APFloat::getZero(Semantics, false).isSmallestNormalized());
EXPECT_FALSE(APFloat::getZero(Semantics, true).isSmallestNormalized());
- EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized());
- EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized());
+ if (APFloat::hasNanOrInf(Semantics)) {
+ EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized());
+ EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized());
- EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized());
- EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized());
+ EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized());
+ EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized());
+ }
EXPECT_FALSE(APFloat::getLargest(Semantics).isSmallestNormalized());
EXPECT_FALSE(APFloat::getLargest(Semantics, true).isSmallestNormalized());
@@ -1823,6 +1825,9 @@ TEST(APFloatTest, getLargest) {
30, APFloat::getLargest(APFloat::Float8E4M3B11FNUZ()).convertToDouble());
EXPECT_EQ(3.40116213421e+38f,
APFloat::getLargest(APFloat::FloatTF32()).convertToFloat());
+ EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble());
+ EXPECT_EQ(7.5,
+ APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble());
}
TEST(APFloatTest, getSmallest) {
@@ -1881,6 +1886,20 @@ TEST(APFloatTest, getSmallest) {
EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_TRUE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
+ expected = APFloat(APFloat::Float6E3M2FN(), "0x0.1p0");
+ EXPECT_FALSE(test.isNegative());
+ EXPECT_TRUE(test.isFiniteNonZero());
+ EXPECT_TRUE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ test = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
+ expected = APFloat(APFloat::Float6E2M3FN(), "0x0.2p0");
+ EXPECT_FALSE(test.isNegative());
+ EXPECT_TRUE(test.isFiniteNonZero());
+ EXPECT_TRUE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
}
TEST(APFloatTest, getSmallestNormalized) {
@@ -1963,6 +1982,21 @@ TEST(APFloatTest, getSmallestNormalized) {
EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
EXPECT_TRUE(test.isSmallestNormalized());
+ test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
+ expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2");
+ EXPECT_FALSE(test.isNegative());
+ EXPECT_TRUE(test.isFiniteNonZero());
+ EXPECT_FALSE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+ EXPECT_TRUE(test.isSmallestNormalized());
+
+ test = APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
+ expected = APFloat(APFloat::Float6E2M3FN(), "0x1p0");
+ EXPECT_FALSE(test.isNegative());
+ EXPECT_TRUE(test.isFiniteNonZero());
+ EXPECT_FALSE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+ EXPECT_TRUE(test.isSmallestNormalized());
}
TEST(APFloatTest, getZero) {
@@ -1996,7 +2030,11 @@ TEST(APFloatTest, getZero) {
{&APFloat::Float8E4M3B11FNUZ(), false, false, {0, 0}, 1},
{&APFloat::Float8E4M3B11FNUZ(), true, false, {0, 0}, 1},
{&APFloat::FloatTF32(), false, true, {0, 0}, 1},
- {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}};
+ {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1},
+ {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1},
+ {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1},
+ {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1},
+ {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}};
const unsigned NumGetZeroTests = std::size(GetZeroTest);
for (unsigned i = 0; i < NumGetZeroTests; ++i) {
APFloat test = APFloat::getZero(*GetZeroTest[i].semantics,
@@ -5161,6 +5199,90 @@ TEST(APFloatTest, Float8ExhaustivePair) {
}
}
+TEST(APFloatTest, Float6ExhaustivePair) {
+ // Test each pair of 6-bit floats with non-standard semantics
+ for (APFloat::Semantics Sem :
+ {APFloat::S_Float6E3M2FN, APFloat::S_Float6E2M3FN}) {
+ const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
+ for (int i = 1; i < 64; i++) {
+ for (int j = 1; j < 64; j++) {
+ SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
+ ",j=" + std::to_string(j));
+ APFloat x(S, APInt(6, i));
+ APFloat y(S, APInt(6, j));
+
+ bool losesInfo;
+ APFloat x16 = x;
+ x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_FALSE(losesInfo);
+ APFloat y16 = y;
+ y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_FALSE(losesInfo);
+
+ // Add
+ APFloat z = x;
+ z.add(y, APFloat::rmNearestTiesToEven);
+ APFloat z16 = x16;
+ z16.add(y16, APFloat::rmNearestTiesToEven);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+
+ // Subtract
+ z = x;
+ z.subtract(y, APFloat::rmNearestTiesToEven);
+ z16 = x16;
+ z16.subtract(y16, APFloat::rmNearestTiesToEven);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+
+ // Multiply
+ z = x;
+ z.multiply(y, APFloat::rmNearestTiesToEven);
+ z16 = x16;
+ z16.multiply(y16, APFloat::rmNearestTiesToEven);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+
+ // Skip divide by 0
+ if (j == 0 || j == 32)
+ continue;
+
+ // Divide
+ z = x;
+ z.divide(y, APFloat::rmNearestTiesToEven);
+ z16 = x16;
+ z16.divide(y16, APFloat::rmNearestTiesToEven);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+
+ // Mod
+ z = x;
+ z.mod(y);
+ z16 = x16;
+ z16.mod(y16);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+
+ // Remainder
+ z = x;
+ z.remainder(y);
+ z16 = x16;
+ z16.remainder(y16);
+ z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(z.bitwiseIsEqual(z16))
+ << "sem=" << Sem << ", i=" << i << ", j=" << j;
+ }
+ }
+ }
+}
+
TEST(APFloatTest, ConvertE4M3FNToE5M2) {
bool losesInfo;
APFloat test(APFloat::Float8E4M3FN(), "1.0");
@@ -6620,28 +6742,39 @@ TEST(APFloatTest, getExactLog2) {
EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2());
EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs());
EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs());
- EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2());
- EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2());
- EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2());
- EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs());
- EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2());
- EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs());
- EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs());
- EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs());
+
+ if (I == APFloat::S_Float6E2M3FN) {
+ EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2());
+ EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2());
+ EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs());
+ EXPECT_EQ(2, APFloat(Semantics, "-4.0").getExactLog2Abs());
+ } else {
+ EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2());
+ EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2());
+ EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2());
+ EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs());
+ EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2());
+ EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs());
+ EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs());
+ EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs());
+ }
EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2());
EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2());
- EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2());
- EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2());
- EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2());
- EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2());
-
EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2Abs());
EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2Abs());
- EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs());
- EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs());
- EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs());
- EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs());
+
+ if (APFloat::hasNanOrInf(Semantics)) {
+ EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2());
+ EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2());
+ EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2());
+ EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2());
+
+ EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs());
+ EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs());
+ EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs());
+ EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs());
+ }
EXPECT_EQ(INT_MIN,
scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven)
@@ -6660,4 +6793,311 @@ TEST(APFloatTest, getExactLog2) {
}
}
+TEST(APFloatTest, Float6E3M2FNFromString) {
+ // Exactly representable
+ EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "28").convertToDouble());
+ // Round down to maximum value
+ EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "32").convertToDouble());
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+ EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "inf"),
+ "This floating point format does not support Inf");
+ EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "nan"),
+ "This floating point format does not support NaN");
+#endif
+#endif
+
+ EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "0").isPosZero());
+ EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "-0").isNegZero());
+}
+
+TEST(APFloatTest, Float6E2M3FNFromString) {
+ // Exactly representable
+ EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "7.5").convertToDouble());
+ // Round down to maximum value
+ EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "32").convertToDouble());
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+ EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "inf"),
+ "This floating point format does not support Inf");
+ EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "nan"),
+ "This floating point format does not support NaN");
+#endif
+#endif
+
+ EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "0").isPosZero());
+ EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero());
+}
+
+TEST(APFloatTest, ConvertE3M2FToE2M3F) {
+ bool losesInfo;
+ APFloat test(APFloat::Float6E3M2FN(), "1.0");
+ APFloat::opStatus status = test.convert(
+ APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(1.0f, test.convertToFloat());
+ EXPECT_FALSE(losesInfo);
+ EXPECT_EQ(status, APFloat::opOK);
+
+ test = APFloat(APFloat::Float6E3M2FN(), "0.0");
+ status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_FALSE(losesInfo);
+ EXPECT_EQ(status, APFloat::opOK);
+
+ // Test overflow
+ losesInfo = false;
+ test = APFloat(APFloat::Float6E3M2FN(), "28");
+ status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(7.5f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(status, APFloat::opInexact);
+
+ // Test underflow
+ test = APFloat(APFloat::Float6E3M2FN(), ".0625");
+ status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(0., test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
+
+ // Testing inexact rounding to denormal number
+ losesInfo = false;
+ test = APFloat(APFloat::Float6E3M2FN(), "0.1875");
+ status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(0.25, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
+}
+
+TEST(APFloatTest, ConvertE2M3FToE3M2F) {
+ bool losesInfo;
+ APFloat test(APFloat::Float6E2M3FN(), "1.0");
+ APFloat::opStatus status = test.convert(
+ APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(1.0f, test.convertToFloat());
+ EXPECT_FALSE(losesInfo);
+ EXPECT_EQ(status, APFloat::opOK);
+
+ test = APFloat(APFloat::Float6E2M3FN(), "0.0");
+ status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_FALSE(losesInfo);
+ EXPECT_EQ(status, APFloat::opOK);
+
+ test = APFloat(APFloat::Float6E2M3FN(), ".125");
+ status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(.125, test.convertToFloat());
+ EXPECT_FALSE(losesInfo);
+ EXPECT_EQ(status, APFloat::opOK);
+
+ // Test inexact rounding
+ losesInfo = false;
+ test = APFloat(APFloat::Float6E2M3FN(), "7.5");
+ status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ EXPECT_EQ(8, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(status, APFloat::opInexact);
+}
+
+TEST(APFloatTest, Float6E3M2FNNext) {
+ APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized);
+ APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized);
+
+ // 1. NextUp of largest bit pattern is the same
+ test = APFloat::getLargest(APFloat::Float6E3M2FN());
+ expected = APFloat::getLargest(APFloat::Float6E3M2FN());
+ EXPECT_EQ(test.next(false), APFloat::opOK);
+ EXPECT_FALSE(test.isInfinity());
+ EXPECT_FALSE(test.isZero());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 2. NextUp of smallest negative denormal is -0
+ test = APFloat::getSmallest(APFloat::Float6E3M2FN(), true);
+ expected = APFloat::getZero(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(test.next(false), APFloat::opOK);
+ EXPECT_TRUE(test.isNegZero());
+ EXPECT_FALSE(test.isPosZero());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 3. nextDown of negative of largest value is the same
+ test = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
+ expected = test;
+ EXPECT_EQ(test.next(true), APFloat::opOK);
+ EXPECT_FALSE(test.isInfinity());
+ EXPECT_FALSE(test.isZero());
+ EXPECT_FALSE(test.isNaN());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 4. nextDown of +0 is smallest negative denormal
+ test = APFloat::getZero(APFloat::Float6E3M2FN(), false);
+ expected = APFloat::getSmallest(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(test.next(true), APFloat::opOK);
+ EXPECT_FALSE(test.isZero());
+ EXPECT_TRUE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+}
+
+TEST(APFloatTest, Float6E2M3FNNext) {
+ APFloat test(APFloat::Float6E2M3FN(), APFloat::uninitialized);
+ APFloat expected(APFloat::Float6E2M3FN(), APFloat::uninitialized);
+
+ // 1. NextUp of largest bit pattern is the same
+ test = APFloat::getLargest(APFloat::Float6E2M3FN());
+ expected = APFloat::getLargest(APFloat::Float6E2M3FN());
+ EXPECT_EQ(test.next(false), APFloat::opOK);
+ EXPECT_FALSE(test.isInfinity());
+ EXPECT_FALSE(test.isZero());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 2. NextUp of smallest negative denormal is -0
+ test = APFloat::getSmallest(APFloat::Float6E2M3FN(), true);
+ expected = APFloat::getZero(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(test.next(false), APFloat::opOK);
+ EXPECT_TRUE(test.isNegZero());
+ EXPECT_FALSE(test.isPosZero());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 3. nextDown of negative of largest value is the same
+ test = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
+ expected = test;
+ EXPECT_EQ(test.next(true), APFloat::opOK);
+ EXPECT_FALSE(test.isInfinity());
+ EXPECT_FALSE(test.isZero());
+ EXPECT_FALSE(test.isNaN());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ // 4. nextDown of +0 is smallest negative denormal
+ test = APFloat::getZero(APFloat::Float6E2M3FN(), false);
+ expected = APFloat::getSmallest(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(test.next(true), APFloat::opOK);
+ EXPECT_FALSE(test.isZero());
+ EXPECT_TRUE(test.isDenormal());
+ EXPECT_TRUE(test.bitwiseIsEqual(expected));
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+TEST(APFloatTest, Float6E3M2FNGetInfNaN) {
+ EXPECT_DEATH(APFloat::getInf(APFloat::Float6E3M2FN()),
+ "This floating point format does not support Inf");
+ EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E3M2FN()),
+ "This floating point format does not support NaN");
+}
+
+TEST(APFloatTest, Float6E2M3FNGetInfNaN) {
+ EXPECT_DEATH(APFloat::getInf(APFloat::Float6E2M3FN()),
+ "This floating point format does not support Inf");
+ EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()),
+ "This floating point format does not support NaN");
+}
+#endif
+#endif
+
+TEST(APFloatTest, Float6E3M2FNToDouble) {
+ APFloat One(APFloat::Float6E3M2FN(), "1.0");
+ EXPECT_EQ(1.0, One.convertToDouble());
+ APFloat Two(APFloat::Float6E3M2FN(), "2.0");
+ EXPECT_EQ(2.0, Two.convertToDouble());
+ APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false);
+ EXPECT_EQ(28., PosLargest.convertToDouble());
+ APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(-28., NegLargest.convertToDouble());
+ APFloat PosSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
+ EXPECT_EQ(0x1p-2, PosSmallest.convertToDouble());
+ APFloat NegSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(-0x1p-2, NegSmallest.convertToDouble());
+
+ APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
+ EXPECT_TRUE(SmallestDenorm.isDenormal());
+ EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToDouble());
+}
+
+TEST(APFloatTest, Float6E2M3FNToDouble) {
+ APFloat One(APFloat::Float6E2M3FN(), "1.0");
+ EXPECT_EQ(1.0, One.convertToDouble());
+ APFloat Two(APFloat::Float6E2M3FN(), "2.0");
+ EXPECT_EQ(2.0, Two.convertToDouble());
+ APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false);
+ EXPECT_EQ(7.5, PosLargest.convertToDouble());
+ APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(-7.5, NegLargest.convertToDouble());
+ APFloat PosSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
+ EXPECT_EQ(0x1p0, PosSmallest.convertToDouble());
+ APFloat NegSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble());
+
+ APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
+ EXPECT_TRUE(SmallestDenorm.isDenormal());
+ EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble());
+}
+
+TEST(APFloatTest, Float6E3M2FNToFloat) {
+ APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN());
+ APFloat PosZeroToFloat(PosZero.convertToFloat());
+ EXPECT_TRUE(PosZeroToFloat.isPosZero());
+ APFloat NegZero = APFloat::getZero(APFloat::Float6E3M2FN(), true);
+ APFloat NegZeroToFloat(NegZero.convertToFloat());
+ EXPECT_TRUE(NegZeroToFloat.isNegZero());
+
+ APFloat One(APFloat::Float6E3M2FN(), "1.0");
+ EXPECT_EQ(1.0F, One.convertToFloat());
+ APFloat Two(APFloat::Float6E3M2FN(), "2.0");
+ EXPECT_EQ(2.0F, Two.convertToFloat());
+
+ APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false);
+ EXPECT_EQ(28., PosLargest.convertToFloat());
+ APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(-28, NegLargest.convertToFloat());
+ APFloat PosSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
+ EXPECT_EQ(0x1p-2, PosSmallest.convertToFloat());
+ APFloat NegSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true);
+ EXPECT_EQ(-0x1p-2, NegSmallest.convertToFloat());
+
+ APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
+ EXPECT_TRUE(SmallestDenorm.isDenormal());
+ EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToFloat());
+}
+
+TEST(APFloatTest, Float6E2M3FNToFloat) {
+ APFloat PosZero = APFloat::getZero(APFloat::Float6E2M3FN());
+ APFloat PosZeroToFloat(PosZero.convertToFloat());
+ EXPECT_TRUE(PosZeroToFloat.isPosZero());
+ APFloat NegZero = APFloat::getZero(APFloat::Float6E2M3FN(), true);
+ APFloat NegZeroToFloat(NegZero.convertToFloat());
+ EXPECT_TRUE(NegZeroToFloat.isNegZero());
+
+ APFloat One(APFloat::Float6E2M3FN(), "1.0");
+ EXPECT_EQ(1.0F, One.convertToFloat());
+ APFloat Two(APFloat::Float6E2M3FN(), "2.0");
+ EXPECT_EQ(2.0F, Two.convertToFloat());
+
+ APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false);
+ EXPECT_EQ(7.5, PosLargest.convertToFloat());
+ APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(-7.5, NegLargest.convertToFloat());
+ APFloat PosSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
+ EXPECT_EQ(0x1p0, PosSmallest.convertToFloat());
+ APFloat NegSmallest =
+ APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true);
+ EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat());
+
+ APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
+ EXPECT_TRUE(SmallestDenorm.isDenormal());
+ EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat());
+}
} // namespace