5 files changed, 255 insertions, 159 deletions
diff --git a/source/include/internals.h b/source/include/internals.h
index cf57bdc..9045f6c 100644
--- a/source/include/internals.h
+++ b/source/include/internals.h
@@ -2,10 +2,10 @@
 /*============================================================================
 
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3a, by John R. Hauser.
+Package, Release 3b, by John R. Hauser.
 
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -42,6 +42,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "primitives.h"
 #include "softfloat_types.h"
 
+union ui16_f16 { uint16_t ui; float16_t f; };
 union ui32_f32 { uint32_t ui; float32_t f; };
 union ui64_f64 { uint64_t ui; float64_t f; };
 
@@ -82,12 +83,33 @@ int_fast64_t softfloat_roundPackMToI64( bool, uint32_t *, uint_fast8_t, bool );
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
+#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
+#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
+#define fracF16UI( a ) ((a) & 0x03FF)
+#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
+
+#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
+
+struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
+struct exp8_sig16 softfloat_normSubnormalF16Sig( uint_fast16_t );
+
+float16_t softfloat_roundPackToF16( bool, int_fast16_t, uint_fast16_t );
+float16_t softfloat_normRoundPackToF16( bool, int_fast16_t, uint_fast16_t );
+
+float16_t softfloat_addMagsF16( uint_fast16_t, uint_fast16_t );
+float16_t softfloat_subMagsF16( uint_fast16_t, uint_fast16_t );
+float16_t
+ softfloat_mulAddF16(
+     uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
 #define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
 #define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
 #define fracF32UI( a ) ((a) & 0x007FFFFF)
 #define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
 
-#define isNaNF32UI( a ) ((((a) & 0x7F800000) == 0x7F800000) && ((a) & 0x007FFFFF))
+#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
 
 struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
 struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
@@ -95,8 +117,8 @@ struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
 float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
 float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
 
-float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t, bool );
-float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t, bool );
+float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
+float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
 float32_t
  softfloat_mulAddF32(
      uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );
@@ -108,7 +130,7 @@ float32_t
 #define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
 #define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
 
-#define isNaNF64UI( a ) ((((a) & UINT64_C( 0x7FF0000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
+#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
 
 struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
 struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
@@ -159,7 +181,7 @@ extFloat80_t
 #define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
 #define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))
 
-#define isNaNF128UI( a64, a0 ) ((((a64) & UINT64_C( 0x7FFF000000000000 )) == UINT64_C( 0x7FFF000000000000 )) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
+#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
 
 struct exp32_sig128 { int_fast32_t exp; struct uint128 sig; };
 struct exp32_sig128
diff --git a/source/include/primitiveTypes.h b/source/include/primitiveTypes.h
index 16aca67..4790a3c 100644
--- a/source/include/primitiveTypes.h
+++ b/source/include/primitiveTypes.h
@@ -2,7 +2,7 @@
 /*============================================================================
 
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3a, by John R. Hauser.
+Package, Release 3b, by John R. Hauser.
 
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
diff --git a/source/include/primitives.h b/source/include/primitives.h
index 9995556..279e5ea 100644
--- a/source/include/primitives.h
+++ b/source/include/primitives.h
@@ -2,10 +2,10 @@
 /*============================================================================
 
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3a, by John R. Hauser.
+Package, Release 3b, by John R. Hauser.
 
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -43,61 +43,59 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #ifndef softfloat_shortShiftRightJam64
 /*----------------------------------------------------------------------------
-| Shifts `a' right by the number of bits given in `count', which must be in
+| Shifts `a' right by the number of bits given in `dist', which must be in
 | the range 1 to 63.  If any nonzero bits are shifted off, they are "jammed"
 | into the least-significant bit of the shifted value by setting the least-
 | significant bit to 1.  This shifted-and-jammed value is returned.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t count )
-    { return a>>count | ((a & (((uint_fast64_t) 1<<count) - 1)) != 0); }
+uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
+    { return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
 #else
-uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t count );
+uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shiftRightJam32
 /*----------------------------------------------------------------------------
-| Shifts `a' right by the number of bits given in `count', which must not
+| Shifts `a' right by the number of bits given in `dist', which must not
 | be zero.  If any nonzero bits are shifted off, they are "jammed" into the
 | least-significant bit of the shifted value by setting the least-significant
 | bit to 1.  This shifted-and-jammed value is returned.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
-| is greater than 32, the result will be either 0 or 1, depending on whether
-| `a' is zero or nonzero.
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist' is
+| greater than 32, the result will be either 0 or 1, depending on whether `a'
+| is zero or nonzero.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
-INLINE uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t count )
+INLINE uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
 {
     return
-        (count < 31) ? a>>count | ((uint32_t) (a<<(-count & 31)) != 0)
-            : (a != 0);
+        (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0);
 }
 #else
-uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t count );
+uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shiftRightJam64
 /*----------------------------------------------------------------------------
-| Shifts `a' right by the number of bits given in `count', which must not
+| Shifts `a' right by the number of bits given in `dist', which must not
 | be zero.  If any nonzero bits are shifted off, they are "jammed" into the
 | least-significant bit of the shifted value by setting the least-significant
 | bit to 1.  This shifted-and-jammed value is returned.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
-| is greater than 64, the result will be either 0 or 1, depending on whether
-| `a' is zero or nonzero.
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist' is
+| greater than 64, the result will be either 0 or 1, depending on whether `a'
+| is zero or nonzero.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
-INLINE uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t count )
+INLINE uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
 {
     return
-        (count < 63) ? a>>count | ((uint64_t) (a<<(-count & 63)) != 0)
-            : (a != 0);
+        (dist < 63) ? a>>dist | ((uint64_t) (a<<(-dist & 63)) != 0) : (a != 0);
 }
 #else
-uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t count );
+uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist );
 #endif
 #endif
 
@@ -108,6 +106,27 @@ uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t count );
 *----------------------------------------------------------------------------*/
 extern const uint_least8_t softfloat_countLeadingZeros8[256];
 
+#ifndef softfloat_countLeadingZeros16
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 16 is returned.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+INLINE uint_fast8_t softfloat_countLeadingZeros16( uint16_t a )
+{
+    uint_fast8_t count = 8;
+    if ( 0x100 <= a ) {
+        count = 0;
+        a >>= 8;
+    }
+    count += softfloat_countLeadingZeros8[a];
+    return count;
+}
+#else
+uint_fast8_t softfloat_countLeadingZeros16( uint16_t a );
+#endif
+#endif
+
 #ifndef softfloat_countLeadingZeros32
 /*----------------------------------------------------------------------------
 | Returns the number of leading 0 bits before the most-significant 1 bit of
@@ -141,6 +160,9 @@ uint_fast8_t softfloat_countLeadingZeros32( uint32_t a );
 uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
 #endif
 
+extern const uint16_t softfloat_approxRecip_1k0s[16];
+extern const uint16_t softfloat_approxRecip_1k1s[16];
+
 #ifndef softfloat_approxRecip32_1
 /*----------------------------------------------------------------------------
 | Returns an approximation to the reciprocal of the number represented by `a',
@@ -160,6 +182,9 @@ uint32_t softfloat_approxRecip32_1( uint32_t a );
 #endif
 #endif
 
+extern const uint16_t softfloat_approxRecipSqrt_1k0s[16];
+extern const uint16_t softfloat_approxRecipSqrt_1k1s[16];
+
 #ifndef softfloat_approxRecipSqrt32_1
 /*----------------------------------------------------------------------------
 | Returns an approximation to the reciprocal of the square root of the number
@@ -197,7 +222,7 @@ uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (1 <= INLINE_LEVEL)
 INLINE
- bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
     { return (a64 == b64) && (a0 == b0); }
 #else
 bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
@@ -212,7 +237,7 @@ bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
     { return (a64 < b64) || ((a64 == b64) && (a0 <= b0)); }
 #else
 bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
@@ -227,7 +252,7 @@ bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
     { return (a64 < b64) || ((a64 == b64) && (a0 < b0)); }
 #else
 bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
@@ -237,126 +262,126 @@ bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 #ifndef softfloat_shortShiftLeft128
 /*----------------------------------------------------------------------------
 | Shifts the 128 bits formed by concatenating `a64' and `a0' left by the
-| number of bits given in `count', which must be in the range 1 to 63.
+| number of bits given in `dist', which must be in the range 1 to 63.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- struct uint128
-  softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t count )
+struct uint128
+ softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
 {
     struct uint128 z;
-    z.v64 = a64<<count | a0>>(-count & 63);
-    z.v0 = a0<<count;
+    z.v64 = a64<<dist | a0>>(-dist & 63);
+    z.v0 = a0<<dist;
     return z;
 }
 #else
 struct uint128
- softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t count );
+ softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shortShiftRight128
 /*----------------------------------------------------------------------------
 | Shifts the 128 bits formed by concatenating `a64' and `a0' right by the
-| number of bits given in `count', which must be in the range 1 to 63.
+| number of bits given in `dist', which must be in the range 1 to 63.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- struct uint128
-  softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t count )
+struct uint128
+ softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
 {
     struct uint128 z;
-    z.v64 = a64>>count;
-    z.v0 = a64<<(-count & 63) | a0>>count;
+    z.v64 = a64>>dist;
+    z.v0 = a64<<(-dist & 63) | a0>>dist;
     return z;
 }
 #else
 struct uint128
- softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t count );
+ softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shortShiftRightJam64Extra
 /*----------------------------------------------------------------------------
 | This function is the same as `softfloat_shiftRightJam64Extra' (below),
-| except that `count' must be in the range 1 to 63.
+| except that `dist' must be in the range 1 to 63.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- struct uint64_extra
-  softfloat_shortShiftRightJam64Extra(
-      uint64_t a, uint64_t extra, uint_fast8_t count )
+struct uint64_extra
+ softfloat_shortShiftRightJam64Extra(
+     uint64_t a, uint64_t extra, uint_fast8_t dist )
 {
     struct uint64_extra z;
-    z.v = a>>count;
-    z.extra = a<<(-count & 63) | (extra != 0);
+    z.v = a>>dist;
+    z.extra = a<<(-dist & 63) | (extra != 0);
     return z;
 }
 #else
 struct uint64_extra
  softfloat_shortShiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast8_t count );
+     uint64_t a, uint64_t extra, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shortShiftRightJam128
 /*----------------------------------------------------------------------------
 | Shifts the 128 bits formed by concatenating `a64' and `a0' right by the
-| number of bits given in `count', which must be in the range 1 to 63.  If any
+| number of bits given in `dist', which must be in the range 1 to 63.  If any
 | nonzero bits are shifted off, they are "jammed" into the least-significant
 | bit of the shifted value by setting the least-significant bit to 1.  This
 | shifted-and-jammed value is returned.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
 INLINE
- struct uint128
-  softfloat_shortShiftRightJam128(
-      uint64_t a64, uint64_t a0, uint_fast8_t count )
+struct uint128
+ softfloat_shortShiftRightJam128(
+     uint64_t a64, uint64_t a0, uint_fast8_t dist )
 {
-    uint_fast8_t negCount = -count;
+    uint_fast8_t negDist = -dist;
     struct uint128 z;
-    z.v64 = a64>>count;
+    z.v64 = a64>>dist;
     z.v0 =
-        a64<<(negCount & 63) | a0>>count
-            | ((uint64_t) (a0<<(negCount & 63)) != 0);
+        a64<<(negDist & 63) | a0>>dist
+            | ((uint64_t) (a0<<(negDist & 63)) != 0);
     return z;
 }
 #else
 struct uint128
  softfloat_shortShiftRightJam128(
-     uint64_t a64, uint64_t a0, uint_fast8_t count );
+     uint64_t a64, uint64_t a0, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shortShiftRightJam128Extra
 /*----------------------------------------------------------------------------
 | This function is the same as `softfloat_shiftRightJam128Extra' (below),
-| except that `count' must be in the range 1 to 63.
+| except that `dist' must be in the range 1 to 63.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
 INLINE
- struct uint128_extra
-  softfloat_shortShiftRightJam128Extra(
-      uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t count )
+struct uint128_extra
+ softfloat_shortShiftRightJam128Extra(
+     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
 {
-    uint_fast8_t negCount = -count;
+    uint_fast8_t negDist = -dist;
     struct uint128_extra z;
-    z.v.v64 = a64>>count;
-    z.v.v0 = a64<<(negCount & 63) | a0>>count;
-    z.extra = a0<<(negCount & 63) | (extra != 0);
+    z.v.v64 = a64>>dist;
+    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
+    z.extra = a0<<(negDist & 63) | (extra != 0);
     return z;
 }
 #else
 struct uint128_extra
  softfloat_shortShiftRightJam128Extra(
-     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t count );
+     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shiftRightJam64Extra
 /*----------------------------------------------------------------------------
 | Shifts the 128 bits formed by concatenating `a' and `extra' right by 64
-| _plus_ the number of bits given in `count', which must not be zero.  This
+| _plus_ the number of bits given in `dist', which must not be zero.  This
 | shifted value is at most 64 nonzero bits and is returned in the `v' field
 | of the `struct uint64_extra' result.  The 64-bit `extra' field of the result
 | contains a value formed as follows from the bits that were shifted off:  The
@@ -366,23 +391,23 @@ struct uint128_extra
 |   (This function makes more sense if `a' and `extra' are considered to form
 | an unsigned fixed-point number with binary point between `a' and `extra'.
 | This fixed-point value is shifted right by the number of bits given in
-| `count', and the integer part of this shifted value is returned in the `v'
+| `dist', and the integer part of this shifted value is returned in the `v'
 | field of the result.  The fractional part of the shifted value is modified
 | as described above and returned in the `extra' field of the result.)
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
 INLINE
- struct uint64_extra
-  softfloat_shiftRightJam64Extra(
-      uint64_t a, uint64_t extra, uint_fast32_t count )
+struct uint64_extra
+ softfloat_shiftRightJam64Extra(
+     uint64_t a, uint64_t extra, uint_fast32_t dist )
 {
     struct uint64_extra z;
-    if ( count < 64 ) {
-        z.v = a>>count;
-        z.extra = a<<(-count & 63);
+    if ( dist < 64 ) {
+        z.v = a>>dist;
+        z.extra = a<<(-dist & 63);
     } else {
         z.v = 0;
-        z.extra = (count == 64) ? a : (a != 0);
+        z.extra = (dist == 64) ? a : (a != 0);
     }
     z.extra |= (extra != 0);
     return z;
@@ -390,29 +415,29 @@ INLINE
 #else
 struct uint64_extra
  softfloat_shiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast32_t count );
+     uint64_t a, uint64_t extra, uint_fast32_t dist );
 #endif
 #endif
 
 #ifndef softfloat_shiftRightJam128
 /*----------------------------------------------------------------------------
 | Shifts the 128 bits formed by concatenating `a64' and `a0' right by the
-| number of bits given in `count', which must not be zero.  If any nonzero
-| bits are shifted off, they are "jammed" into the least-significant bit of
-| the shifted value by setting the least-significant bit to 1.  This shifted-
-| and-jammed value is returned.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
-| is greater than 128, the result will be either 0 or 1, depending on whether
-| the original 128 bits are all zeros.
+| number of bits given in `dist', which must not be zero.  If any nonzero bits
+| are shifted off, they are "jammed" into the least-significant bit of the
+| shifted value by setting the least-significant bit to 1.  This shifted-and-
+| jammed value is returned.
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist' is
+| greater than 128, the result will be either 0 or 1, depending on whether the
+| original 128 bits are all zeros.
 *----------------------------------------------------------------------------*/
 struct uint128
- softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t count );
+ softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
 #endif
 
 #ifndef softfloat_shiftRightJam128Extra
 /*----------------------------------------------------------------------------
 | Shifts the 192 bits formed by concatenating `a64', `a0', and `extra' right
-| by 64 _plus_ the number of bits given in `count', which must not be zero.
+| by 64 _plus_ the number of bits given in `dist', which must not be zero.
 | This shifted value is at most 128 nonzero bits and is returned in the `v'
 | field of the `struct uint128_extra' result.  The 64-bit `extra' field of the
 | result contains a value formed as follows from the bits that were shifted
@@ -422,32 +447,32 @@ struct uint128
 |   (This function makes more sense if `a64', `a0', and `extra' are considered
 | to form an unsigned fixed-point number with binary point between `a0' and
 | `extra'.  This fixed-point value is shifted right by the number of bits
-| given in `count', and the integer part of this shifted value is returned
+| given in `dist', and the integer part of this shifted value is returned
 | in the `v' field of the result.  The fractional part of the shifted value
 | is modified as described above and returned in the `extra' field of the
 | result.)
 *----------------------------------------------------------------------------*/
 struct uint128_extra
  softfloat_shiftRightJam128Extra(
-     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t count );
+     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist );
 #endif
 
 #ifndef softfloat_shiftRightJam256M
 /*----------------------------------------------------------------------------
 | Shifts the 256-bit unsigned integer pointed to by `aPtr' right by the number
-| of bits given in `count', which must not be zero.  If any nonzero bits are
+| of bits given in `dist', which must not be zero.  If any nonzero bits are
 | shifted off, they are "jammed" into the least-significant bit of the shifted
 | value by setting the least-significant bit to 1.  This shifted-and-jammed
 | value is stored at the location pointed to by `zPtr'.  Each of `aPtr' and
 | `zPtr' points to an array of four 64-bit elements that concatenate in the
 | platform's normal endian order to form a 256-bit integer.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist'
 | is greater than 256, the stored result will be either 0 or 1, depending on
 | whether the original 256 bits are all zeros.
 *----------------------------------------------------------------------------*/
 void
  softfloat_shiftRightJam256M(
-     const uint64_t *aPtr, uint_fast32_t count, uint64_t *zPtr );
+     const uint64_t *aPtr, uint_fast32_t dist, uint64_t *zPtr );
 #endif
 
 #ifndef softfloat_add128
@@ -458,8 +483,8 @@ void
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- struct uint128
-  softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+struct uint128
+ softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
 {
     struct uint128 z;
     z.v0 = a0 + b0;
@@ -493,8 +518,8 @@ void
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- struct uint128
-  softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+struct uint128
+ softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
 {
     struct uint128 z;
     z.v0 = a0 - b0;
@@ -556,7 +581,7 @@ struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
 INLINE
- struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
+struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
 {
     struct uint128 z;
     uint_fast64_t mid;
@@ -620,33 +645,33 @@ int_fast8_t
 #ifndef softfloat_shortShiftLeft64To96M
 /*----------------------------------------------------------------------------
 | Extends `a' to 96 bits and shifts the value left by the number of bits given
-| in `count', which must be in the range 1 to 31.  The result is stored at the
+| in `dist', which must be in the range 1 to 31.  The result is stored at the
 | location pointed to by `zPtr'.  Argument `zPtr' points to an array of three
 | 32-bit elements that concatenate in the platform's normal endian order to
 | form a 96-bit integer.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
- void
-  softfloat_shortShiftLeft64To96M(
-      uint64_t a, uint_fast8_t count, uint32_t *zPtr )
+void
+ softfloat_shortShiftLeft64To96M(
+     uint64_t a, uint_fast8_t dist, uint32_t *zPtr )
 {
-    zPtr[indexWord( 3, 0 )] = (uint32_t) a<<count;
-    a >>= 32 - count;
+    zPtr[indexWord( 3, 0 )] = (uint32_t) a<<dist;
+    a >>= 32 - dist;
     zPtr[indexWord( 3, 2 )] = a>>32;
     zPtr[indexWord( 3, 1 )] = a;
 }
 #else
 void
  softfloat_shortShiftLeft64To96M(
-     uint64_t a, uint_fast8_t count, uint32_t *zPtr );
+     uint64_t a, uint_fast8_t dist, uint32_t *zPtr );
 #endif
 #endif
 
 #ifndef softfloat_shortShiftLeftM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' left by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
 | shifted N-bit result is stored at the location pointed to by `zPtr'.  Each
 | of `aPtr' and `zPtr' points to a `size_words'-long array of 32-bit elements
@@ -657,7 +682,7 @@ void
  softfloat_shortShiftLeftM(
      uint_fast8_t size_words,
      const uint32_t *aPtr,
-     uint_fast8_t count,
+     uint_fast8_t dist,
      uint32_t *zPtr
  );
 #endif
@@ -667,7 +692,7 @@ void
 | This function or macro is the same as `softfloat_shortShiftLeftM' with
 | `size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft96M( aPtr, count, zPtr ) softfloat_shortShiftLeftM( 3, aPtr, count, zPtr )
+#define softfloat_shortShiftLeft96M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 3, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shortShiftLeft128M
@@ -675,7 +700,7 @@ void
 | This function or macro is the same as `softfloat_shortShiftLeftM' with
 | `size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft128M( aPtr, count, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, count, zPtr )
+#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shortShiftLeft160M
@@ -683,25 +708,25 @@ void
 | This function or macro is the same as `softfloat_shortShiftLeftM' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft160M( aPtr, count, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, count, zPtr )
+#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftLeftM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' left by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must not be zero.  Any nonzero bits shifted off are lost.  The shifted
 | N-bit result is stored at the location pointed to by `zPtr'.  Each of `aPtr'
 | and `zPtr' points to a `size_words'-long array of 32-bit elements that
 | concatenate in the platform's normal endian order to form an N-bit integer.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
-| is greater than N, the stored result will be 0.
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist' is
+| greater than N, the stored result will be 0.
 *----------------------------------------------------------------------------*/
 void
  softfloat_shiftLeftM(
      uint_fast8_t size_words,
      const uint32_t *aPtr,
-     uint32_t count,
+     uint32_t dist,
      uint32_t *zPtr
  );
 #endif
@@ -711,7 +736,7 @@ void
 | This function or macro is the same as `softfloat_shiftLeftM' with
 | `size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft96M( aPtr, count, zPtr ) softfloat_shiftLeftM( 3, aPtr, count, zPtr )
+#define softfloat_shiftLeft96M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 3, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftLeft128M
@@ -719,7 +744,7 @@ void
 | This function or macro is the same as `softfloat_shiftLeftM' with
 | `size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft128M( aPtr, count, zPtr ) softfloat_shiftLeftM( 4, aPtr, count, zPtr )
+#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftLeft160M
@@ -727,13 +752,13 @@ void
 | This function or macro is the same as `softfloat_shiftLeftM' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft160M( aPtr, count, zPtr ) softfloat_shiftLeftM( 5, aPtr, count, zPtr )
+#define softfloat_shiftLeft160M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 5, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shortShiftRightM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' right by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
 | shifted N-bit result is stored at the location pointed to by `zPtr'.  Each
 | of `aPtr' and `zPtr' points to a `size_words'-long array of 32-bit elements
@@ -744,7 +769,7 @@ void
  softfloat_shortShiftRightM(
      uint_fast8_t size_words,
      const uint32_t *aPtr,
-     uint_fast8_t count,
+     uint_fast8_t dist,
      uint32_t *zPtr
  );
 #endif
@@ -754,7 +779,7 @@ void
 | This function or macro is the same as `softfloat_shortShiftRightM' with
 | `size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRight128M( aPtr, count, zPtr ) softfloat_shortShiftRightM( 4, aPtr, count, zPtr )
+#define softfloat_shortShiftRight128M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 4, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shortShiftRight160M
@@ -762,13 +787,13 @@ void
 | This function or macro is the same as `softfloat_shortShiftRightM' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRight160M( aPtr, count, zPtr ) softfloat_shortShiftRightM( 5, aPtr, count, zPtr )
+#define softfloat_shortShiftRight160M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 5, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shortShiftRightJamM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' right by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must be in the range 1 to 31.  If any nonzero bits are shifted off, they are
 | "jammed" into the least-significant bit of the shifted value by setting the
 | least-significant bit to 1.  This shifted-and-jammed N-bit result is stored
@@ -786,25 +811,25 @@ void
 | This function or macro is the same as `softfloat_shortShiftRightJamM' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRightJam160M( aPtr, count, zPtr ) softfloat_shortShiftRightJamM( 5, aPtr, count, zPtr )
+#define softfloat_shortShiftRightJam160M( aPtr, dist, zPtr ) softfloat_shortShiftRightJamM( 5, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftRightM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' right by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must not be zero.  Any nonzero bits shifted off are lost.  The shifted
 | N-bit result is stored at the location pointed to by `zPtr'.  Each of `aPtr'
 | and `zPtr' points to a `size_words'-long array of 32-bit elements that
 | concatenate in the platform's normal endian order to form an N-bit integer.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
-| is greater than N, the stored result will be 0.
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist' is
+| greater than N, the stored result will be 0.
 *----------------------------------------------------------------------------*/
 void
  softfloat_shiftRightM(
      uint_fast8_t size_words,
      const uint32_t *aPtr,
-     uint32_t count,
+     uint32_t dist,
      uint32_t *zPtr
  );
 #endif
@@ -814,20 +839,20 @@ void
 | This function or macro is the same as `softfloat_shiftRightM' with
 | `size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRight96M( aPtr, count, zPtr ) softfloat_shiftRightM( 3, aPtr, count, zPtr )
+#define softfloat_shiftRight96M( aPtr, dist, zPtr ) softfloat_shiftRightM( 3, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftRightJamM
 /*----------------------------------------------------------------------------
 | Shifts the N-bit unsigned integer pointed to by `aPtr' right by the number
-| of bits given in `count', where N = `size_words' * 32.  The value of `count'
+| of bits given in `dist', where N = `size_words' * 32.  The value of `dist'
 | must not be zero.  If any nonzero bits are shifted off, they are "jammed"
 | into the least-significant bit of the shifted value by setting the least-
 | significant bit to 1.  This shifted-and-jammed N-bit result is stored
 | at the location pointed to by `zPtr'.  Each of `aPtr' and `zPtr' points
 | to a `size_words'-long array of 32-bit elements that concatenate in the
 | platform's normal endian order to form an N-bit integer.
-|   The value of `count' can be arbitrarily large.  In particular, if `count'
+|   The value of `dist' can be arbitrarily large.  In particular, if `dist'
 | is greater than N, the stored result will be either 0 or 1, depending on
 | whether the original N bits are all zeros.
 *----------------------------------------------------------------------------*/
@@ -835,7 +860,7 @@ void
  softfloat_shiftRightJamM(
      uint_fast8_t size_words,
      const uint32_t *aPtr,
-     uint32_t count,
+     uint32_t dist,
      uint32_t *zPtr
  );
 #endif
@@ -845,7 +870,7 @@ void
 | This function or macro is the same as `softfloat_shiftRightJamM' with
 | `size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam96M( aPtr, count, zPtr ) softfloat_shiftRightJamM( 3, aPtr, count, zPtr )
+#define softfloat_shiftRightJam96M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 3, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftRightJam128M
@@ -853,7 +878,7 @@ void
 | This function or macro is the same as `softfloat_shiftRightJamM' with
 | `size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam128M( aPtr, count, zPtr ) softfloat_shiftRightJamM( 4, aPtr, count, zPtr )
+#define softfloat_shiftRightJam128M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 4, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_shiftRightJam160M
@@ -861,7 +886,7 @@ void
 | This function or macro is the same as `softfloat_shiftRightJamM' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam160M( aPtr, count, zPtr ) softfloat_shiftRightJamM( 5, aPtr, count, zPtr )
+#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
 #endif
 
 #ifndef softfloat_addM
@@ -1088,7 +1113,7 @@ void
 /*----------------------------------------------------------------------------
 | Performs a "remainder reduction step" as follows:  Arguments `remPtr' and
 | `bPtr' both point to N-bit unsigned integers, where N = `size_words' * 32.
-| Defining R and B as the values of those integers, the expression (R<<`count')
+| Defining R and B as the values of those integers, the expression (R<<`dist')
 | - B * q is computed modulo 2^N, and the N-bit result is stored at the
 | location pointed to by `zPtr'.  Each of `remPtr', `bPtr', and `zPtr' points
 | to a `size_words'-long array of 32-bit elements that concatenate in the
@@ -1098,7 +1123,7 @@ void
  softfloat_remStepMBy32(
      uint_fast8_t size_words,
      const uint32_t *remPtr,
-     uint_fast8_t count,
+     uint_fast8_t dist,
      const uint32_t *bPtr,
      uint32_t q,
      uint32_t *zPtr
@@ -1110,7 +1135,7 @@ void
 | This function or macro is the same as `softfloat_remStepMBy32' with
 | `size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep96MBy32( remPtr, count, bPtr, q, zPtr ) softfloat_remStepMBy32( 3, remPtr, count, bPtr, q, zPtr )
+#define softfloat_remStep96MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 3, remPtr, dist, bPtr, q, zPtr )
 #endif
 
 #ifndef softfloat_remStep128MBy32
@@ -1118,7 +1143,7 @@ void
 | This function or macro is the same as `softfloat_remStepMBy32' with
 | `size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep128MBy32( remPtr, count, bPtr, q, zPtr ) softfloat_remStepMBy32( 4, remPtr, count, bPtr, q, zPtr )
+#define softfloat_remStep128MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 4, remPtr, dist, bPtr, q, zPtr )
 #endif
 
 #ifndef softfloat_remStep160MBy32
@@ -1126,7 +1151,7 @@ void
 | This function or macro is the same as `softfloat_remStepMBy32' with
 | `size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep160MBy32( remPtr, count, bPtr, q, zPtr ) softfloat_remStepMBy32( 5, remPtr, count, bPtr, q, zPtr )
+#define softfloat_remStep160MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 5, remPtr, dist, bPtr, q, zPtr )
 #endif
 
 #endif
diff --git a/source/include/softfloat.h b/source/include/softfloat.h
index 3f424fc..8f82288 100644
--- a/source/include/softfloat.h
+++ b/source/include/softfloat.h
@@ -2,10 +2,10 @@
 /*============================================================================
 
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3a, by John R. Hauser.
+Package, Release 3b, by John R. Hauser.
 
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -48,13 +48,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "softfloat_types.h"
 
+#ifndef THREAD_LOCAL
+#define THREAD_LOCAL
+#endif
+
 /*----------------------------------------------------------------------------
 | Software floating-point underflow tininess-detection mode.
 *----------------------------------------------------------------------------*/
-extern uint_fast8_t softfloat_detectTininess;
+extern THREAD_LOCAL uint_fast8_t softfloat_detectTininess;
 enum {
     softfloat_tininess_beforeRounding = 0,
     softfloat_tininess_afterRounding  = 1
@@ -63,7 +66,7 @@ enum {
 /*----------------------------------------------------------------------------
 | Software floating-point rounding mode.
 *----------------------------------------------------------------------------*/
-extern uint_fast8_t softfloat_roundingMode;
+extern THREAD_LOCAL uint_fast8_t softfloat_roundingMode;
 enum {
     softfloat_round_near_even   = 0,
     softfloat_round_minMag      = 1,
@@ -75,7 +78,7 @@ enum {
 /*----------------------------------------------------------------------------
 | Software floating-point exception flags.
 *----------------------------------------------------------------------------*/
-extern uint_fast8_t softfloat_exceptionFlags;
+extern THREAD_LOCAL uint_fast8_t softfloat_exceptionFlags;
 enum {
     softfloat_flag_inexact   =  1,
     softfloat_flag_underflow =  2,
@@ -92,6 +95,7 @@ void softfloat_raiseFlags( uint_fast8_t );
 /*----------------------------------------------------------------------------
 | Integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
+float16_t ui32_to_f16( uint32_t );
 float32_t ui32_to_f32( uint32_t );
 float64_t ui32_to_f64( uint32_t );
 #ifdef SOFTFLOAT_FAST_INT64
@@ -100,6 +104,7 @@ float128_t ui32_to_f128( uint32_t );
 #endif
 void ui32_to_extF80M( uint32_t, extFloat80_t * );
 void ui32_to_f128M( uint32_t, float128_t * );
+float16_t ui64_to_f16( uint64_t );
 float32_t ui64_to_f32( uint64_t );
 float64_t ui64_to_f64( uint64_t );
 #ifdef SOFTFLOAT_FAST_INT64
@@ -108,6 +113,7 @@ float128_t ui64_to_f128( uint64_t );
 #endif
 void ui64_to_extF80M( uint64_t, extFloat80_t * );
 void ui64_to_f128M( uint64_t, float128_t * );
+float16_t i32_to_f16( int32_t );
 float32_t i32_to_f32( int32_t );
 float64_t i32_to_f64( int32_t );
 #ifdef SOFTFLOAT_FAST_INT64
@@ -116,6 +122,7 @@ float128_t i32_to_f128( int32_t );
 #endif
 void i32_to_extF80M( int32_t, extFloat80_t * );
 void i32_to_f128M( int32_t, float128_t * );
+float16_t i64_to_f16( int64_t );
 float32_t i64_to_f32( int64_t );
 float64_t i64_to_f64( int64_t );
 #ifdef SOFTFLOAT_FAST_INT64
@@ -126,6 +133,41 @@ void i64_to_extF80M( int64_t, extFloat80_t * );
 void i64_to_f128M( int64_t, float128_t * );
 
 /*----------------------------------------------------------------------------
+| 16-bit (half-precision) floating-point operations.
+*----------------------------------------------------------------------------*/
+uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
+uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
+int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
+int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
+uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
+uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
+int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
+int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
+float32_t f16_to_f32( float16_t );
+float64_t f16_to_f64( float16_t );
+#ifdef SOFTFLOAT_FAST_INT64
+extFloat80_t f16_to_extF80( float16_t );
+float128_t f16_to_f128( float16_t );
+#endif
+void f16_to_extF80M( float16_t, extFloat80_t * );
+void f16_to_f128M( float16_t, float128_t * );
+float16_t f16_roundToInt( float16_t, uint_fast8_t, bool );
+float16_t f16_add( float16_t, float16_t );
+float16_t f16_sub( float16_t, float16_t );
+float16_t f16_mul( float16_t, float16_t );
+float16_t f16_mulAdd( float16_t, float16_t, float16_t );
+float16_t f16_div( float16_t, float16_t );
+float16_t f16_rem( float16_t, float16_t );
+float16_t f16_sqrt( float16_t );
+bool f16_eq( float16_t, float16_t );
+bool f16_le( float16_t, float16_t );
+bool f16_lt( float16_t, float16_t );
+bool f16_eq_signaling( float16_t, float16_t );
+bool f16_le_quiet( float16_t, float16_t );
+bool f16_lt_quiet( float16_t, float16_t );
+bool f16_isSignalingNaN( float16_t );
+
+/*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
@@ -136,6 +178,7 @@ uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
 uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
 int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
 int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
+float16_t f32_to_f16( float32_t );
 float64_t f32_to_f64( float32_t );
 #ifdef SOFTFLOAT_FAST_INT64
 extFloat80_t f32_to_extF80( float32_t );
@@ -170,6 +213,7 @@ uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
 uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
 int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
 int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
+float16_t f64_to_f16( float64_t );
 float32_t f64_to_f32( float64_t );
 #ifdef SOFTFLOAT_FAST_INT64
 extFloat80_t f64_to_extF80( float64_t );
@@ -197,7 +241,7 @@ bool f64_isSignalingNaN( float64_t );
 | Rounding precision for 80-bit extended double-precision floating-point.
 | Valid values are 32, 64, and 80.
 *----------------------------------------------------------------------------*/
-extern uint_fast8_t extF80_roundingPrecision;
+extern THREAD_LOCAL uint_fast8_t extF80_roundingPrecision;
 
 /*----------------------------------------------------------------------------
 | 80-bit extended double-precision floating-point operations.
@@ -211,6 +255,7 @@ uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t, bool );
 uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t, bool );
 int_fast32_t extF80_to_i32_r_minMag( extFloat80_t, bool );
 int_fast64_t extF80_to_i64_r_minMag( extFloat80_t, bool );
+float16_t extF80_to_f16( extFloat80_t );
 float32_t extF80_to_f32( extFloat80_t );
 float64_t extF80_to_f64( extFloat80_t );
 float128_t extF80_to_f128( extFloat80_t );
@@ -237,6 +282,7 @@ uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *, bool );
 uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *, bool );
 int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *, bool );
 int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *, bool );
+float16_t extF80M_to_f16( const extFloat80_t * );
 float32_t extF80M_to_f32( const extFloat80_t * );
 float64_t extF80M_to_f64( const extFloat80_t * );
 void extF80M_to_f128M( const extFloat80_t *, float128_t * );
@@ -269,6 +315,7 @@ uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );
 uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );
 int_fast32_t f128_to_i32_r_minMag( float128_t, bool );
 int_fast64_t f128_to_i64_r_minMag( float128_t, bool );
+float16_t f128_to_f16( float128_t );
 float32_t f128_to_f32( float128_t );
 float64_t f128_to_f64( float128_t );
 extFloat80_t f128_to_extF80( float128_t );
@@ -296,6 +343,7 @@ uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *, bool );
 uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *, bool );
 int_fast32_t f128M_to_i32_r_minMag( const float128_t *, bool );
 int_fast64_t f128M_to_i64_r_minMag( const float128_t *, bool );
+float16_t f128M_to_f16( const float128_t * );
 float32_t f128M_to_f32( const float128_t * );
 float64_t f128M_to_f64( const float128_t * );
 void f128M_to_extF80M( const float128_t *, extFloat80_t * );
diff --git a/source/include/softfloat_types.h b/source/include/softfloat_types.h
index c7b67ae..e882515 100644
--- a/source/include/softfloat_types.h
+++ b/source/include/softfloat_types.h
@@ -2,10 +2,10 @@
 /*============================================================================
 
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3a, by John R. Hauser.
+Package, Release 3b, by John R. Hauser.
 
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -40,13 +40,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <stdint.h>
 
 /*----------------------------------------------------------------------------
-| Types used to pass 32-bit, 64-bit, and 128-bit floating-point arguments and
-| results to/from functions.  These types must be exactly 32 bits, 64 bits,
-| and 128 bits in size, respectively.  Where a platform has "native" support
-| for IEEE-Standard floating-point formats, the types below may, if desired,
-| be defined as aliases for the native types (typically `float' and `double',
-| and possibly `long double').
+| Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point
+| arguments and results to/from functions.  These types must be exactly
+| 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively.  Where a
+| platform has "native" support for IEEE-Standard floating-point formats,
+| the types below may, if desired, be defined as aliases for the native types
+| (typically `float' and `double', and possibly `long double').
 *----------------------------------------------------------------------------*/
+typedef struct { uint16_t v; } float16_t;
 typedef struct { uint32_t v; } float32_t;
 typedef struct { uint64_t v; } float64_t;
 typedef struct { uint64_t v[2]; } float128_t;