From aac867e874775f14d9de4516e45de9ae0cd0cfc0 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 17 Sep 2023 17:00:08 -0700 Subject: Fixing bf32_to_bf16 conversions --- source/bf16_to_f32.c | 2 +- source/f32_to_bf16.c | 17 ++++++++++++-- source/s_normSubnormalBF16Sig.c | 52 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 source/s_normSubnormalBF16Sig.c diff --git a/source/bf16_to_f32.c b/source/bf16_to_f32.c index a30006a..a76632e 100644 --- a/source/bf16_to_f32.c +++ b/source/bf16_to_f32.c @@ -85,7 +85,7 @@ float32_t bf16_to_f32( bfloat16_t a ) } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - uiZ = packToF32UI( sign, exp, (uint_fast32_t) frac<<16 ); + uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 ); uiZ: uZ.ui = uiZ; return uZ.f; diff --git a/source/f32_to_bf16.c b/source/f32_to_bf16.c index 059e05a..6f81493 100644 --- a/source/f32_to_bf16.c +++ b/source/f32_to_bf16.c @@ -41,6 +41,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "specialize.h" #include "softfloat.h" +#include +#include + bfloat16_t f32_to_bf16( float32_t a ) { union ui32_f32 uA; @@ -75,7 +78,8 @@ bfloat16_t f32_to_bf16( float32_t a ) } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - // frac is a 24-bit mantissa, right shifted by + // frac is a 24-bit mantissa, right shifted by 9 + // In the normal case, (24-9) = 15 are set frac16 = frac>>9 | ((frac & 0x1FF) != 0); if ( ! (exp | frac16) ) { uiZ = packToBF16UI( sign, 0, 0 ); @@ -83,7 +87,16 @@ bfloat16_t f32_to_bf16( float32_t a ) } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ - return softfloat_roundPackToBF16( sign, exp, frac16 | 0x4000 ); + // softfloat_roundPackToBF16 exponent argument (2nd argument) + // must correspond to the exponent of fracIn[13] bits + // (fracIn is the 3rd and last argument) + uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number + // exponent for the lowest normal and largest subnormal should be equal + // but is not in IEEE encoding so mantissa must be partially normalized + // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds + // to the exponent of frac16[13] + frac16 = frac16 << (exp ? 0 : 1); + return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask ); uiZ: uZ.ui = uiZ; return uZ.f; diff --git a/source/s_normSubnormalBF16Sig.c b/source/s_normSubnormalBF16Sig.c new file mode 100644 index 0000000..b81baa9 --- /dev/null +++ b/source/s_normSubnormalBF16Sig.c @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "platform.h" +#include "internals.h" + +struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig16 z; + + shiftDist = softfloat_countLeadingZeros16( sig ) - 8; + z.exp = 1 - shiftDist; + z.sig = sig<