aboutsummaryrefslogtreecommitdiff
path: root/llvm/unittests/ADT/APFloatTest.cpp
diff options
context:
space:
mode:
authorSergey Kozub <skozub@nvidia.com>2024-09-10 10:41:05 +0200
committerGitHub <noreply@github.com>2024-09-10 10:41:05 +0200
commit918222ba43f6e56208ad347ed10a3e0025d8ed38 (patch)
tree0a77246c5906419df9151544223ba3d7fac0a0e2 /llvm/unittests/ADT/APFloatTest.cpp
parent7e07c1df678825c6a57d5fb1d36cdfc1584de9dc (diff)
downloadllvm-918222ba43f6e56208ad347ed10a3e0025d8ed38.zip
llvm-918222ba43f6e56208ad347ed10a3e0025d8ed38.tar.gz
llvm-918222ba43f6e56208ad347ed10a3e0025d8ed38.tar.bz2
[MLIR] Add f6E3M2FN type (#105573)
This PR adds `f6E3M2FN` type to mlir. `f6E3M2FN` type is proposed in [OpenCompute MX Specification](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf). It defines a 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754 types, there are no infinity or NaN values. ```c f6E3M2FN - Exponent bias: 3 - Maximum stored exponent value: 7 (binary 111) - Maximum unbiased exponent value: 7 - 3 = 4 - Minimum stored exponent value: 1 (binary 001) - Minimum unbiased exponent value: 1 − 3 = −2 - Has Positive and Negative zero - Doesn't have infinity - Doesn't have NaNs Additional details: - Zeros (+/-): S.000.00 - Max normal number: S.111.11 = ±2^(4) x (1 + 0.75) = ±28 - Min normal number: S.001.00 = ±2^(-2) = ±0.25 - Max subnormal number: S.000.11 = ±2^(-2) x 0.75 = ±0.1875 - Min subnormal number: S.000.01 = ±2^(-2) x 0.25 = ±0.0625 ``` Related PRs: - [PR-94735](https://github.com/llvm/llvm-project/pull/94735) [APFloat] Add APFloat support for FP6 data types - [PR-97118](https://github.com/llvm/llvm-project/pull/97118) [MLIR] Add f8E4M3 type - was used as a template for this PR
Diffstat (limited to 'llvm/unittests/ADT/APFloatTest.cpp')
-rw-r--r--llvm/unittests/ADT/APFloatTest.cpp7
1 files changed, 4 insertions, 3 deletions
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index be675bb..6c49d78e 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -2084,16 +2084,17 @@ TEST(APFloatTest, getSmallestNormalized) {
EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
EXPECT_TRUE(test.isSmallestNormalized());
+
test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2");
-
- test = APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false);
- expected = APFloat(APFloat::Float4E2M1FN(), "0x1p0");
EXPECT_FALSE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
EXPECT_TRUE(test.isSmallestNormalized());
+
+ test = APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false);
+ expected = APFloat(APFloat::Float4E2M1FN(), "0x1p0");
EXPECT_FALSE(test.isNegative());
EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_FALSE(test.isDenormal());