aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Support/APFloat.cpp
diff options
context:
space:
mode:
authorAlexander Pivovarov <pivovaa@amazon.com>2024-07-30 00:11:10 -0700
committerGitHub <noreply@github.com>2024-07-30 00:11:10 -0700
commitabc2fe31fc622c4eab3766d739576110eb6f16c3 (patch)
treed101961650708fab7e30ccfbb34c4219b551abc9 /llvm/lib/Support/APFloat.cpp
parentfcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535 (diff)
downloadllvm-abc2fe31fc622c4eab3766d739576110eb6f16c3.zip
llvm-abc2fe31fc622c4eab3766d739576110eb6f16c3.tar.gz
llvm-abc2fe31fc622c4eab3766d739576110eb6f16c3.tar.bz2
[APFloat] Add support for f8E3M4 IEEE 754 type (#99698)
This PR adds `f8E4M3` type to APFloat. `f8E3M4` type follows IEEE 754 convention ```c f8E3M4 (IEEE 754) - Exponent bias: 3 - Maximum stored exponent value: 6 (binary 110) - Maximum unbiased exponent value: 6 - 3 = 3 - Minimum stored exponent value: 1 (binary 001) - Minimum unbiased exponent value: 1 − 3 = −2 - Precision specifies the total number of bits used for the significand (mantissa), including implicit leading integer bit = 4 + 1 = 5 - Follows IEEE 754 conventions for representation of special values - Has Positive and Negative zero - Has Positive and Negative infinity - Has NaNs Additional details: - Max exp (unbiased): 3 - Min exp (unbiased): -2 - Infinities (+/-): S.111.0000 - Zeros (+/-): S.000.0000 - NaNs: S.111.{0,1}⁴ except S.111.0000 - Max normal number: S.110.1111 = +/-2^(6-3) x (1 + 15/16) = +/-2^3 x 31 x 2^(-4) = +/-15.5 - Min normal number: S.001.0000 = +/-2^(1-3) x (1 + 0) = +/-2^(-2) - Max subnormal number: S.000.1111 = +/-2^(-2) x 15/16 = +/-2^(-2) x 15 x 2^(-4) = +/-15 x 2^(-6) - Min subnormal number: S.000.0001 = +/-2^(-2) x 1/16 = +/-2^(-2) x 2^(-4) = +/-2^(-6) ``` Related PRs: - [PR-97179](https://github.com/llvm/llvm-project/pull/97179) [APFloat] Add support for f8E4M3 IEEE 754 type
Diffstat (limited to 'llvm/lib/Support/APFloat.cpp')
-rw-r--r--llvm/lib/Support/APFloat.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 26b4f8e..7f68c5a 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -143,6 +143,7 @@ static constexpr fltSemantics semFloat8E4M3FNUZ = {
7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
static constexpr fltSemantics semFloat6E3M2FN = {
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
@@ -217,6 +218,8 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
return Float8E4M3FNUZ();
case S_Float8E4M3B11FNUZ:
return Float8E4M3B11FNUZ();
+ case S_Float8E3M4:
+ return Float8E3M4();
case S_FloatTF32:
return FloatTF32();
case S_Float6E3M2FN:
@@ -257,6 +260,8 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
return S_Float8E4M3FNUZ;
else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
return S_Float8E4M3B11FNUZ;
+ else if (&Sem == &llvm::APFloat::Float8E3M4())
+ return S_Float8E3M4;
else if (&Sem == &llvm::APFloat::FloatTF32())
return S_FloatTF32;
else if (&Sem == &llvm::APFloat::Float6E3M2FN())
@@ -287,6 +292,7 @@ const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
return semFloat8E4M3B11FNUZ;
}
+const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
@@ -3643,6 +3649,11 @@ APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
}
+APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E3M4>();
+}
+
APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
assert(partCount() == 1);
return convertIEEEFloatToAPInt<semFloatTF32>();
@@ -3704,6 +3715,9 @@ APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
return convertFloat8E4M3B11FNUZAPFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
+ return convertFloat8E3M4APFloatToAPInt();
+
if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
return convertFloatTF32APFloatToAPInt();
@@ -3932,6 +3946,10 @@ void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
}
+void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E3M4>(api);
+}
+
void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
initFromIEEEAPInt<semFloatTF32>(api);
}
@@ -3977,6 +3995,8 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
return initFromFloat8E4M3FNUZAPInt(api);
if (Sem == &semFloat8E4M3B11FNUZ)
return initFromFloat8E4M3B11FNUZAPInt(api);
+ if (Sem == &semFloat8E3M4)
+ return initFromFloat8E3M4APInt(api);
if (Sem == &semFloatTF32)
return initFromFloatTF32APInt(api);
if (Sem == &semFloat6E3M2FN)