diff options
author | Jerry Zhao <jerryz123@berkeley.edu> | 2023-10-13 15:23:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-13 15:23:50 -0700 |
commit | eb498c55bab0f49c3903f69524e2674abe71c26b (patch) | |
tree | be448123945308a8bc01e831adb0c83b640aa979 | |
parent | 5c06db33fc1e2130f67c045327b0ec949032df1d (diff) | |
parent | bef5a34e1b2ef862e2c5cc0ed4f1ab6e92d01af1 (diff) | |
download | berkeley-softfloat-3-eb498c55bab0f49c3903f69524e2674abe71c26b.zip berkeley-softfloat-3-eb498c55bab0f49c3903f69524e2674abe71c26b.tar.gz berkeley-softfloat-3-eb498c55bab0f49c3903f69524e2674abe71c26b.tar.bz2 |
Merge pull request #21 from nibrunieAtSi5/bf16-support
[#18] Adding minimal BFloat16 support
-rw-r--r-- | build/Linux-x86_64-GCC/Makefile | 7 | ||||
-rw-r--r-- | build/template-FAST_INT64/Makefile | 5 | ||||
-rw-r--r-- | source/8086-SSE/s_bf16UIToCommonNaN.c | 59 | ||||
-rw-r--r-- | source/8086-SSE/s_commonNaNToBF16UI.c | 51 | ||||
-rw-r--r-- | source/8086-SSE/specialize.h | 21 | ||||
-rw-r--r-- | source/RISCV/s_bf16UIToCommonNaN.c | 5 | ||||
-rw-r--r-- | source/RISCV/s_commonNaNToBF16UI.c | 5 | ||||
-rw-r--r-- | source/RISCV/specialize.h | 26 | ||||
-rw-r--r-- | source/bf16_isSignalingNaN.c | 51 | ||||
-rw-r--r-- | source/bf16_to_f32.c | 90 | ||||
-rw-r--r-- | source/f32_to_bf16.c | 105 | ||||
-rw-r--r-- | source/f32_to_f16.c | 3 | ||||
-rw-r--r-- | source/include/internals.h | 13 | ||||
-rw-r--r-- | source/include/softfloat.h | 7 | ||||
-rw-r--r-- | source/include/softfloat_types.h | 1 | ||||
-rw-r--r-- | source/s_normSubnormalBF16Sig.c | 52 | ||||
-rw-r--r-- | source/s_roundPackToBF16.c | 114 |
17 files changed, 615 insertions, 0 deletions
diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index 2ee5dad..72f251a 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -94,6 +94,8 @@ OBJS_SPECIALIZE = \ s_f16UIToCommonNaN$(OBJ) \ s_commonNaNToF16UI$(OBJ) \ s_propagateNaNF16UI$(OBJ) \ + s_bf16UIToCommonNaN$(OBJ) \ + s_commonNaNToBF16UI$(OBJ) \ s_f32UIToCommonNaN$(OBJ) \ s_commonNaNToF32UI$(OBJ) \ s_propagateNaNF32UI$(OBJ) \ @@ -114,6 +116,8 @@ OBJS_OTHERS = \ s_roundToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundToI64$(OBJ) \ + s_normSubnormalBF16Sig$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -172,6 +176,8 @@ OBJS_OTHERS = \ i64_to_extF80M$(OBJ) \ i64_to_f128$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -209,6 +215,7 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_extF80$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index 78e7ff5..e04c216 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -115,6 +115,8 @@ OBJS_OTHERS = \ s_roundToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundToI64$(OBJ) \ + s_normSubnormalBF16Sig$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -173,6 +175,8 @@ OBJS_OTHERS = \ i64_to_extF80M$(OBJ) \ i64_to_f128$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -210,6 +214,7 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_extF80$(OBJ) \ diff --git a/source/8086-SSE/s_bf16UIToCommonNaN.c b/source/8086-SSE/s_bf16UIToCommonNaN.c new file mode 100644 index 0000000..c1c774d --- /dev/null +++ b/source/8086-SSE/s_bf16UIToCommonNaN.c @@ -0,0 +1,59 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" +#include "specialize.h" +#include "softfloat.h" + +/*---------------------------------------------------------------------------- +| Assuming `uiA' has the bit pattern of a BF16 NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr ) +{ + + if ( softfloat_isSigNaNBF16UI( uiA ) ) { + softfloat_raiseFlags( softfloat_flag_invalid ); + } + zPtr->sign = uiA>>15; + zPtr->v64 = (uint_fast64_t) uiA<<56; + zPtr->v0 = 0; + +} + diff --git a/source/8086-SSE/s_commonNaNToBF16UI.c b/source/8086-SSE/s_commonNaNToBF16UI.c new file mode 100644 index 0000000..d81cf51 --- /dev/null +++ b/source/8086-SSE/s_commonNaNToBF16UI.c @@ -0,0 +1,51 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" +#include "specialize.h" + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and +| returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr ) +{ + + return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56; + +} + diff --git a/source/8086-SSE/specialize.h b/source/8086-SSE/specialize.h index a9166e1..8ed2e75 100644 --- a/source/8086-SSE/specialize.h +++ b/source/8086-SSE/specialize.h @@ -118,6 +118,27 @@ uint_fast16_t softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB ); /*---------------------------------------------------------------------------- +| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a +| 16-bit brain floating-point (BF16) signaling NaN. +| Note: This macro evaluates its argument more than once. +*----------------------------------------------------------------------------*/ +#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F)) + +/*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr ); + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr ); + +/*---------------------------------------------------------------------------- | The bit pattern for a default generated 32-bit floating-point NaN. *----------------------------------------------------------------------------*/ #define defaultNaNF32UI 0xFFC00000 diff --git a/source/RISCV/s_bf16UIToCommonNaN.c b/source/RISCV/s_bf16UIToCommonNaN.c new file mode 100644 index 0000000..861b269 --- /dev/null +++ b/source/RISCV/s_bf16UIToCommonNaN.c @@ -0,0 +1,5 @@ + +/*---------------------------------------------------------------------------- +| This file intentionally contains no code. +*----------------------------------------------------------------------------*/ + diff --git a/source/RISCV/s_commonNaNToBF16UI.c b/source/RISCV/s_commonNaNToBF16UI.c new file mode 100644 index 0000000..861b269 --- /dev/null +++ b/source/RISCV/s_commonNaNToBF16UI.c @@ -0,0 +1,5 @@ + +/*---------------------------------------------------------------------------- +| This file intentionally contains no code. +*----------------------------------------------------------------------------*/ + diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index c638264..cb95900 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -88,6 +88,13 @@ struct commonNaN { char _unused; }; #define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF)) /*---------------------------------------------------------------------------- +| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a +| 16-bit brain floating-point (BF16) signaling NaN. +| Note: This macro evaluates its argument more than once. +*----------------------------------------------------------------------------*/ +#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F)) + +/*---------------------------------------------------------------------------- | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts | this NaN to the common NaN form, and stores the resulting common NaN at the | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid @@ -96,6 +103,14 @@ struct commonNaN { char _unused; }; #define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0040) ) softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. *----------------------------------------------------------------------------*/ @@ -111,6 +126,17 @@ uint_fast16_t softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB ); /*---------------------------------------------------------------------------- +| The bit pattern for a default generated 16-bit BF16 floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + +/*---------------------------------------------------------------------------- | The bit pattern for a default generated 32-bit floating-point NaN. *----------------------------------------------------------------------------*/ #define defaultNaNF32UI 0x7FC00000 diff --git a/source/bf16_isSignalingNaN.c b/source/bf16_isSignalingNaN.c new file mode 100644 index 0000000..79ca87f --- /dev/null +++ b/source/bf16_isSignalingNaN.c @@ -0,0 +1,51 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +bool bf16_isSignalingNaN( bfloat16_t a ) +{ + union ui16_bf16 uA; + + uA.f = a; + return softfloat_isSigNaNBF16UI( uA.ui ); + +} + diff --git a/source/bf16_to_f32.c b/source/bf16_to_f32.c new file mode 100644 index 0000000..b86482c --- /dev/null +++ b/source/bf16_to_f32.c @@ -0,0 +1,90 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +float32_t bf16_to_f32( bfloat16_t a ) +{ + union ui16_bf16 uA; + uint_fast16_t uiA; + bool sign; + int_fast16_t exp; + uint_fast16_t frac; + struct commonNaN commonNaN; + uint_fast32_t uiZ; + struct exp8_sig16 normExpSig; + union ui32_f32 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signBF16UI( uiA ); + exp = expBF16UI( uiA ); + frac = fracBF16UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + // NaN or Inf + if ( exp == 0xFF ) { + if ( frac ) { + softfloat_bf16UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToF32UI( &commonNaN ); + } else { + uiZ = packToF32UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + // packToF32UI simply packs bitfields without any numerical change + // which means it can be used directly for any BF16 to f32 conversions which + // does not require bits manipulation + // (that is everything where the 16-bit are just padded right with 16 zeros, including + // subnormal numbers) + uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + + + diff --git a/source/f32_to_bf16.c b/source/f32_to_bf16.c new file mode 100644 index 0000000..6f81493 --- /dev/null +++ b/source/f32_to_bf16.c @@ -0,0 +1,105 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +#include <inttypes.h> +#include <stdio.h> + +bfloat16_t f32_to_bf16( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + struct commonNaN commonNaN; + uint_fast16_t uiZ, frac16; + union ui16_bf16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + uA.f = a; + uiA = uA.ui; + sign = signF32UI( uiA ); + exp = expF32UI( uiA ); + frac = fracF32UI( uiA ); + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + // infinity or NaN cases + if ( exp == 0xFF ) { + if ( frac ) { + // NaN case + softfloat_f32UIToCommonNaN( uiA, &commonNaN ); + uiZ = softfloat_commonNaNToBF16UI( &commonNaN ); + } else { + // infinity case + uiZ = packToBF16UI( sign, 0xFF, 0 ); + } + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + // frac is a 24-bit mantissa, right shifted by 9 + // In the normal case, (24-9) = 15 are set + frac16 = frac>>9 | ((frac & 0x1FF) != 0); + if ( ! (exp | frac16) ) { + uiZ = packToBF16UI( sign, 0, 0 ); + goto uiZ; + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + // softfloat_roundPackToBF16 exponent argument (2nd argument) + // must correspond to the exponent of fracIn[13] bits + // (fracIn is the 3rd and last argument) + uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number + // exponent for the lowest normal and largest subnormal should be equal + // but is not in IEEE encoding so mantissa must be partially normalized + // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds + // to the exponent of frac16[13] + frac16 = frac16 << (exp ? 0 : 1); + return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + diff --git a/source/f32_to_f16.c b/source/f32_to_f16.c index a00b2e8..45005fe 100644 --- a/source/f32_to_f16.c +++ b/source/f32_to_f16.c @@ -72,6 +72,9 @@ float16_t f32_to_f16( float32_t a ) } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ + // frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red + // into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice + // is OR-red with the sticky frac16 = frac>>9 | ((frac & 0x1FF) != 0); if ( ! (exp | frac16) ) { uiZ = packToF16UI( sign, 0, 0 ); diff --git a/source/include/internals.h b/source/include/internals.h index f8eac05..5e6d4bd 100644 --- a/source/include/internals.h +++ b/source/include/internals.h @@ -43,6 +43,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "softfloat_types.h" union ui16_f16 { uint16_t ui; float16_t f; }; +union ui16_bf16 { uint16_t ui; bfloat16_t f; }; union ui32_f32 { uint32_t ui; float32_t f; }; union ui64_f64 { uint64_t ui; float64_t f; }; @@ -101,6 +102,18 @@ float16_t /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ +#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15)) +#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF) +#define fracBF16UI( a ) ((a) & 0x07F) +#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig)) + +#define isNaNBF16UI( a ) (((~(a) & 0x7FC0) == 0) && ((a) & 0x07F)) + +bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t ); +struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t ); + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ #define signF32UI( a ) ((bool) ((uint32_t) (a)>>31)) #define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF) #define fracF32UI( a ) ((a) & 0x007FFFFF) diff --git a/source/include/softfloat.h b/source/include/softfloat.h index 9ed17c1..c1757b8 100644 --- a/source/include/softfloat.h +++ b/source/include/softfloat.h @@ -170,6 +170,13 @@ bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); /*---------------------------------------------------------------------------- +| 16-bit (brain float 16) floating-point operations. +*----------------------------------------------------------------------------*/ +float32_t bf16_to_f32( bfloat16_t ); +bfloat16_t f32_to_bf16( float32_t ); +bool bf16_isSignalingNaN( bfloat16_t ); + +/*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. *----------------------------------------------------------------------------*/ uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool ); diff --git a/source/include/softfloat_types.h b/source/include/softfloat_types.h index b92d246..25039b5 100644 --- a/source/include/softfloat_types.h +++ b/source/include/softfloat_types.h @@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ typedef struct { uint16_t v; } float16_t; +typedef struct { uint16_t v; } bfloat16_t; typedef struct { uint32_t v; } float32_t; typedef struct { uint64_t v; } float64_t; typedef struct { uint64_t v[2]; } float128_t; diff --git a/source/s_normSubnormalBF16Sig.c b/source/s_normSubnormalBF16Sig.c new file mode 100644 index 0000000..b81baa9 --- /dev/null +++ b/source/s_normSubnormalBF16Sig.c @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" +#include "internals.h" + +struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig16 z; + + shiftDist = softfloat_countLeadingZeros16( sig ) - 8; + z.exp = 1 - shiftDist; + z.sig = sig<<shiftDist; + return z; + +} + diff --git a/source/s_roundPackToBF16.c b/source/s_roundPackToBF16.c new file mode 100644 index 0000000..57a7ae4 --- /dev/null +++ b/source/s_roundPackToBF16.c @@ -0,0 +1,114 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdbool.h> +#include <stdint.h> +#include "platform.h" +#include "internals.h" +#include "softfloat.h" + +/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */ +bfloat16_t + softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig ) +{ + uint_fast8_t roundingMode; + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + bool isTiny; + uint_fast16_t uiZ; + union ui16_bf16 uZ; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundingMode = softfloat_roundingMode; + roundNearEven = (roundingMode == softfloat_round_near_even); + roundIncrement = 0x40; + if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) { + roundIncrement = + (roundingMode + == (sign ? softfloat_round_min : softfloat_round_max)) + ? 0x7F + : 0; + } + roundBits = sig & 0x7F; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0xFD <= (unsigned int) exp ) { + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + isTiny = + (softfloat_detectTininess == softfloat_tininess_beforeRounding) + || (exp < -1) || (sig + roundIncrement < 0x8000); + sig = softfloat_shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0x7F; + if ( isTiny && roundBits ) { + softfloat_raiseFlags( softfloat_flag_underflow ); + } + } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + softfloat_raiseFlags( + softfloat_flag_overflow | softfloat_flag_inexact ); + uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement; + goto uiZ; + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>7; + if ( roundBits ) { + softfloat_exceptionFlags |= softfloat_flag_inexact; +#ifdef SOFTFLOAT_ROUND_ODD + if ( roundingMode == softfloat_round_odd ) { + sig |= 1; + goto packReturn; + } +#endif + } + sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven); + if ( ! sig ) exp = 0; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + packReturn: + uiZ = packToBF16UI( sign, exp, sig ); + uiZ: + uZ.ui = uiZ; + return uZ.f; + +} + |