Fixing bf32_to_bf16 conversions

author: Nicolas Brunie <nibrunie@gmail.com> 2023-09-17 17:00:08 -0700
committer: Nicolas Brunie <nibrunie@gmail.com> 2023-09-17 17:00:08 -0700
commit: aac867e874775f14d9de4516e45de9ae0cd0cfc0 (patch)
tree: 6b90a7fbb9c37c9237b0fed5d1d5a7823e35bc52 /source/f32_to_bf16.c
parent: 54da0e1be7a9c90b4225ac29cf5b5c8b4ec8304c (diff)
download: berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.zip
berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.tar.gz
berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.tar.bz2
1 files changed, 15 insertions, 2 deletions
diff --git a/source/f32_to_bf16.c b/source/f32_to_bf16.c
index 059e05a..6f81493 100644
--- a/source/f32_to_bf16.c
+++ b/source/f32_to_bf16.c
@@ -41,6 +41,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "specialize.h"
 #include "softfloat.h"
 
+#include <inttypes.h>
+#include <stdio.h>
+
 bfloat16_t f32_to_bf16( float32_t a )
 {
     union ui32_f32 uA;
@@ -75,7 +78,8 @@ bfloat16_t f32_to_bf16( float32_t a )
     }
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
-    // frac is a 24-bit mantissa, right shifted by 
+    // frac is a 24-bit mantissa, right shifted by 9
+    // In the normal case, (24-9) = 15 are set 
     frac16 = frac>>9 | ((frac & 0x1FF) != 0);
     if ( ! (exp | frac16) ) {
         uiZ = packToBF16UI( sign, 0, 0 );
@@ -83,7 +87,16 @@ bfloat16_t f32_to_bf16( float32_t a )
     }
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
-    return softfloat_roundPackToBF16( sign, exp, frac16 | 0x4000 );
+    // softfloat_roundPackToBF16 exponent argument (2nd argument)
+    // must correspond to the exponent of fracIn[13] bits
+    // (fracIn is the 3rd and last argument) 
+    uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
+    // exponent for the lowest normal and largest subnormal should be equal
+    // but is not in IEEE encoding so mantissa must be partially normalized
+    // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
+    // to the exponent of frac16[13]
+    frac16 = frac16 << (exp ? 0 : 1);
+    return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
  uiZ:
     uZ.ui = uiZ;
     return uZ.f;
author	Nicolas Brunie <nibrunie@gmail.com>	2023-09-17 17:00:08 -0700
committer	Nicolas Brunie <nibrunie@gmail.com>	2023-09-17 17:00:08 -0700
commit	aac867e874775f14d9de4516e45de9ae0cd0cfc0 (patch)
tree	6b90a7fbb9c37c9237b0fed5d1d5a7823e35bc52 /source/f32_to_bf16.c
parent	54da0e1be7a9c90b4225ac29cf5b5c8b4ec8304c (diff)
download	berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.zip berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.tar.gz berkeley-softfloat-3-aac867e874775f14d9de4516e45de9ae0cd0cfc0.tar.bz2