diff options
author | Nicolas Brunie <nibrunie@gmail.com> | 2023-08-12 08:00:12 +0200 |
---|---|---|
committer | Nicolas Brunie <nibrunie@gmail.com> | 2023-08-12 08:00:12 +0200 |
commit | fab20258f0ba207b30753031a33d84f6c2e7d724 (patch) | |
tree | 726d84ae272cb7e3e76146b0f0f72aa316c1a2b3 /source | |
parent | ac8688db85f554b7e165e0ee25a45bb50a2c50c7 (diff) | |
download | berkeley-softfloat-3-fab20258f0ba207b30753031a33d84f6c2e7d724.zip berkeley-softfloat-3-fab20258f0ba207b30753031a33d84f6c2e7d724.tar.gz berkeley-softfloat-3-fab20258f0ba207b30753031a33d84f6c2e7d724.tar.bz2 |
Continuing f32_to_bf16 development
Diffstat (limited to 'source')
-rw-r--r-- | source/RISCV/specialize.h | 2 | ||||
-rw-r--r-- | source/f32_to_bf16.c | 3 | ||||
-rw-r--r-- | source/s_roundPackToBF16.c | 21 |
3 files changed, 14 insertions, 12 deletions
diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index 4137bd7..ec9eda2 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -120,7 +120,7 @@ uint_fast16_t /*---------------------------------------------------------------------------- | The bit pattern for a default generated 16-bit BF16 floating-point NaN. *----------------------------------------------------------------------------*/ -#define defaultNaNF16UI 0x7FC0 +#define defaultNaNBF16UI 0x7FC0 /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point diff --git a/source/f32_to_bf16.c b/source/f32_to_bf16.c index 82d9b16..059e05a 100644 --- a/source/f32_to_bf16.c +++ b/source/f32_to_bf16.c @@ -75,7 +75,8 @@ bfloat16_t f32_to_bf16( float32_t a ) } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - frac16 = frac>>16 | ((frac & 0xFFFF) != 0); + // frac is a 24-bit mantissa, right shifted by + frac16 = frac>>9 | ((frac & 0x1FF) != 0); if ( ! (exp | frac16) ) { uiZ = packToBF16UI( sign, 0, 0 ); goto uiZ; diff --git a/source/s_roundPackToBF16.c b/source/s_roundPackToBF16.c index 66e19be..57a7ae4 100644 --- a/source/s_roundPackToBF16.c +++ b/source/s_roundPackToBF16.c @@ -40,6 +40,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "internals.h" #include "softfloat.h" +/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */ bfloat16_t softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig ) { @@ -54,18 +55,18 @@ bfloat16_t *------------------------------------------------------------------------*/ roundingMode = softfloat_roundingMode; roundNearEven = (roundingMode == softfloat_round_near_even); - roundIncrement = 0x8; + roundIncrement = 0x40; if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { roundIncrement = (roundingMode == (sign ? softfloat_round_min : softfloat_round_max)) - ? 0xF + ? 0x7F : 0; } - roundBits = sig & 0xF; + roundBits = sig & 0x7F; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - if ( 0x1D <= (unsigned int) exp ) { + if ( 0xFD <= (unsigned int) exp ) { if ( exp < 0 ) { /*---------------------------------------------------------------- *----------------------------------------------------------------*/ @@ -74,22 +75,22 @@ bfloat16_t || (exp < -1) || (sig + roundIncrement < 0x8000); sig = softfloat_shiftRightJam32( sig, -exp ); exp = 0; - roundBits = sig & 0xF; + roundBits = sig & 0x7F; if ( isTiny && roundBits ) { softfloat_raiseFlags( softfloat_flag_underflow ); } - } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) { + } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) { /*---------------------------------------------------------------- *----------------------------------------------------------------*/ softfloat_raiseFlags( softfloat_flag_overflow | softfloat_flag_inexact ); - uiZ = packToF16UI( sign, 0x1F, 0 ) - ! roundIncrement; + uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement; goto uiZ; } } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - sig = (sig + roundIncrement)>>4; + sig = (sig + roundIncrement)>>7; if ( roundBits ) { softfloat_exceptionFlags |= softfloat_flag_inexact; #ifdef SOFTFLOAT_ROUND_ODD @@ -99,12 +100,12 @@ bfloat16_t } #endif } - sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven); + sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven); if ( ! sig ) exp = 0; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ packReturn: - uiZ = packToF16UI( sign, exp, sig ); + uiZ = packToBF16UI( sign, exp, sig ); uiZ: uZ.ui = uiZ; return uZ.f; |