diff options
Diffstat (limited to 'fpu/softfloat.c')
-rw-r--r-- | fpu/softfloat.c | 726 |
1 files changed, 259 insertions, 467 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index bc0f52f..8cd2400 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -96,16 +96,6 @@ this code that are retained. #include "fpu/softfloat-macros.h" /*---------------------------------------------------------------------------- -| Functions and definitions to determine: (1) whether tininess for underflow -| is detected before or after rounding by default, (2) what (if anything) -| happens when exceptions are raised, (3) how signaling NaNs are distinguished -| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs -| are propagated from function inputs to output. These details are target- -| specific. -*----------------------------------------------------------------------------*/ -#include "softfloat-specialize.h" - -/*---------------------------------------------------------------------------- | Returns the fraction bits of the half-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -124,15 +114,6 @@ static inline int extractFloat16Exp(float16 a) } /*---------------------------------------------------------------------------- -| Returns the sign bit of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloat16Sign(float16 a) -{ - return float16_val(a)>>15; -} - -/*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -198,10 +179,24 @@ typedef enum __attribute__ ((__packed__)) { float_class_inf, float_class_qnan, /* all NaNs from here */ float_class_snan, - float_class_dnan, - float_class_msnan, /* maybe silenced */ } FloatClass; +/* Simple helpers for checking if, or what kind of, NaN we have */ +static inline __attribute__((unused)) bool is_nan(FloatClass c) +{ + return unlikely(c >= float_class_qnan); +} + +static inline __attribute__((unused)) bool is_snan(FloatClass c) +{ + return c == float_class_snan; +} + +static inline __attribute__((unused)) bool is_qnan(FloatClass c) +{ + return c == float_class_qnan; +} + /* * Structure holding all of the decomposed parts of a float. The * exponent is unbiased and the fraction is normalized. All @@ -232,8 +227,10 @@ typedef struct { * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT * The following are computed based the size of fraction * frac_lsb: least significant bit of fraction - * fram_lsbm1: the bit bellow the least significant bit (for rounding) + * frac_lsbm1: the bit below the least significant bit (for rounding) * round_mask/roundeven_mask: masks used for rounding + * The following optional modifiers are available: + * arm_althp: handle ARM Alternative Half Precision */ typedef struct { int exp_size; @@ -245,6 +242,7 @@ typedef struct { uint64_t frac_lsbm1; uint64_t round_mask; uint64_t roundeven_mask; + bool arm_althp; } FloatFmt; /* Expand fields based on the size of exponent and fraction */ @@ -263,6 +261,11 @@ static const FloatFmt float16_params = { FLOAT_PARAMS(5, 10) }; +static const FloatFmt float16_params_ahp = { + FLOAT_PARAMS(5, 10), + .arm_althp = true +}; + static const FloatFmt float32_params = { FLOAT_PARAMS(8, 23) }; @@ -322,24 +325,27 @@ static inline float64 float64_pack_raw(FloatParts p) return make_float64(pack_raw(float64_params, p)); } +/*---------------------------------------------------------------------------- +| Functions and definitions to determine: (1) whether tininess for underflow +| is detected before or after rounding by default, (2) what (if anything) +| happens when exceptions are raised, (3) how signaling NaNs are distinguished +| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +| are propagated from function inputs to output. These details are target- +| specific. +*----------------------------------------------------------------------------*/ +#include "softfloat-specialize.h" + /* Canonicalize EXP and FRAC, setting CLS. */ static FloatParts canonicalize(FloatParts part, const FloatFmt *parm, float_status *status) { - if (part.exp == parm->exp_max) { + if (part.exp == parm->exp_max && !parm->arm_althp) { if (part.frac == 0) { part.cls = float_class_inf; } else { -#ifdef NO_SIGNALING_NANS - part.cls = float_class_qnan; -#else - int64_t msb = part.frac << (parm->frac_shift + 2); - if ((msb < 0) == status->snan_bit_is_one) { - part.cls = float_class_snan; - } else { - part.cls = float_class_qnan; - } -#endif + part.frac <<= parm->frac_shift; + part.cls = (parts_is_snan_frac(part.frac, status) + ? float_class_snan : float_class_qnan); } } else if (part.exp == 0) { if (likely(part.frac == 0)) { @@ -422,7 +428,15 @@ static FloatParts round_canonical(FloatParts p, float_status *s, } frac >>= frac_shift; - if (unlikely(exp >= exp_max)) { + if (parm->arm_althp) { + /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ + if (unlikely(exp > exp_max)) { + /* Overflow. Return the maximum normal. */ + flags = float_flag_invalid; + exp = exp_max; + frac = -1; + } + } else if (unlikely(exp >= exp_max)) { flags |= float_flag_overflow | float_flag_inexact; if (overflow_norm) { exp = exp_max - 1; @@ -473,13 +487,16 @@ static FloatParts round_canonical(FloatParts p, float_status *s, case float_class_inf: do_inf: + assert(!parm->arm_althp); exp = exp_max; frac = 0; break; case float_class_qnan: case float_class_snan: + assert(!parm->arm_althp); exp = exp_max; + frac >>= parm->frac_shift; break; default: @@ -492,22 +509,27 @@ static FloatParts round_canonical(FloatParts p, float_status *s, return p; } +/* Explicit FloatFmt version */ +static FloatParts float16a_unpack_canonical(float16 f, float_status *s, + const FloatFmt *params) +{ + return canonicalize(float16_unpack_raw(f), params, s); +} + static FloatParts float16_unpack_canonical(float16 f, float_status *s) { - return canonicalize(float16_unpack_raw(f), &float16_params, s); + return float16a_unpack_canonical(f, s, &float16_params); +} + +static float16 float16a_round_pack_canonical(FloatParts p, float_status *s, + const FloatFmt *params) +{ + return float16_pack_raw(round_canonical(p, s, params)); } static float16 float16_round_pack_canonical(FloatParts p, float_status *s) { - switch (p.cls) { - case float_class_dnan: - return float16_default_nan(s); - case float_class_msnan: - return float16_maybe_silence_nan(float16_pack_raw(p), s); - default: - p = round_canonical(p, s, &float16_params); - return float16_pack_raw(p); - } + return float16a_round_pack_canonical(p, s, &float16_params); } static FloatParts float32_unpack_canonical(float32 f, float_status *s) @@ -517,15 +539,7 @@ static FloatParts float32_unpack_canonical(float32 f, float_status *s) static float32 float32_round_pack_canonical(FloatParts p, float_status *s) { - switch (p.cls) { - case float_class_dnan: - return float32_default_nan(s); - case float_class_msnan: - return float32_maybe_silence_nan(float32_pack_raw(p), s); - default: - p = round_canonical(p, s, &float32_params); - return float32_pack_raw(p); - } + return float32_pack_raw(round_canonical(p, s, &float32_params)); } static FloatParts float64_unpack_canonical(float64 f, float_status *s) @@ -535,29 +549,7 @@ static FloatParts float64_unpack_canonical(float64 f, float_status *s) static float64 float64_round_pack_canonical(FloatParts p, float_status *s) { - switch (p.cls) { - case float_class_dnan: - return float64_default_nan(s); - case float_class_msnan: - return float64_maybe_silence_nan(float64_pack_raw(p), s); - default: - p = round_canonical(p, s, &float64_params); - return float64_pack_raw(p); - } -} - -/* Simple helpers for checking if what NaN we have */ -static bool is_nan(FloatClass c) -{ - return unlikely(c >= float_class_qnan); -} -static bool is_snan(FloatClass c) -{ - return c == float_class_snan; -} -static bool is_qnan(FloatClass c) -{ - return c == float_class_qnan; + return float64_pack_raw(round_canonical(p, s, &float64_params)); } static FloatParts return_nan(FloatParts a, float_status *s) @@ -565,11 +557,11 @@ static FloatParts return_nan(FloatParts a, float_status *s) switch (a.cls) { case float_class_snan: s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_msnan; + a = parts_silence_nan(a, s); /* fall through */ case float_class_qnan: if (s->default_nan_mode) { - a.cls = float_class_dnan; + return parts_default_nan(s); } break; @@ -586,15 +578,16 @@ static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s) } if (s->default_nan_mode) { - a.cls = float_class_dnan; + return parts_default_nan(s); } else { - if (pickNaN(is_qnan(a.cls), is_snan(a.cls), - is_qnan(b.cls), is_snan(b.cls), + if (pickNaN(a.cls, b.cls, a.frac > b.frac || (a.frac == b.frac && a.sign < b.sign))) { a = b; } - a.cls = float_class_msnan; + if (is_snan(a.cls)) { + return parts_silence_nan(a, s); + } } return a; } @@ -608,17 +601,13 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, s->float_exception_flags |= float_flag_invalid; } - which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls), - is_qnan(b.cls), is_snan(b.cls), - is_qnan(c.cls), is_snan(c.cls), - inf_zero, s); + which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s); if (s->default_nan_mode) { /* Note that this check is after pickNaNMulAdd so that function * has an opportunity to set the Invalid flag. */ - a.cls = float_class_dnan; - return a; + which = 3; } switch (which) { @@ -631,13 +620,14 @@ static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, a = c; break; case 3: - a.cls = float_class_dnan; - return a; + return parts_default_nan(s); default: g_assert_not_reached(); } - a.cls = float_class_msnan; + if (is_snan(a.cls)) { + return parts_silence_nan(a, s); + } return a; } @@ -685,7 +675,7 @@ static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract, if (a.cls == float_class_inf) { if (b.cls == float_class_inf) { float_raise(float_flag_invalid, s); - a.cls = float_class_dnan; + return parts_default_nan(s); } return a; } @@ -831,9 +821,7 @@ static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s) if ((a.cls == float_class_inf && b.cls == float_class_zero) || (a.cls == float_class_zero && b.cls == float_class_inf)) { s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_dnan; - a.sign = sign; - return a; + return parts_default_nan(s); } /* Multiply by 0 or Inf */ if (a.cls == float_class_inf || a.cls == float_class_zero) { @@ -911,8 +899,7 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c, if (inf_zero) { s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_dnan; - return a; + return parts_default_nan(s); } if (flags & float_muladd_negate_c) { @@ -936,12 +923,12 @@ static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c, if (c.cls == float_class_inf) { if (p_class == float_class_inf && p_sign != c.sign) { s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_dnan; + return parts_default_nan(s); } else { a.cls = float_class_inf; a.sign = c.sign ^ sign_flip; + return a; } - return a; } if (p_class == float_class_inf) { @@ -1151,8 +1138,7 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s) && (a.cls == float_class_inf || a.cls == float_class_zero)) { s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_dnan; - return a; + return parts_default_nan(s); } /* Inf / x or 0 / x */ if (a.cls == float_class_inf || a.cls == float_class_zero) { @@ -1203,6 +1189,104 @@ float64 float64_div(float64 a, float64 b, float_status *status) } /* + * Float to Float conversions + * + * Returns the result of converting one float format to another. The + * conversion is performed according to the IEC/IEEE Standard for + * Binary Floating-Point Arithmetic. + * + * The float_to_float helper only needs to take care of raising + * invalid exceptions and handling the conversion on NaNs. + */ + +static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf, + float_status *s) +{ + if (dstf->arm_althp) { + switch (a.cls) { + case float_class_qnan: + case float_class_snan: + /* There is no NaN in the destination format. Raise Invalid + * and return a zero with the sign of the input NaN. + */ + s->float_exception_flags |= float_flag_invalid; + a.cls = float_class_zero; + a.frac = 0; + a.exp = 0; + break; + + case float_class_inf: + /* There is no Inf in the destination format. Raise Invalid + * and return the maximum normal with the correct sign. + */ + s->float_exception_flags |= float_flag_invalid; + a.cls = float_class_normal; + a.exp = dstf->exp_max; + a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift; + break; + + default: + break; + } + } else if (is_nan(a.cls)) { + if (is_snan(a.cls)) { + s->float_exception_flags |= float_flag_invalid; + a = parts_silence_nan(a, s); + } + if (s->default_nan_mode) { + return parts_default_nan(s); + } + } + return a; +} + +float32 float16_to_float32(float16 a, bool ieee, float_status *s) +{ + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts p = float16a_unpack_canonical(a, s, fmt16); + FloatParts pr = float_to_float(p, &float32_params, s); + return float32_round_pack_canonical(pr, s); +} + +float64 float16_to_float64(float16 a, bool ieee, float_status *s) +{ + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts p = float16a_unpack_canonical(a, s, fmt16); + FloatParts pr = float_to_float(p, &float64_params, s); + return float64_round_pack_canonical(pr, s); +} + +float16 float32_to_float16(float32 a, bool ieee, float_status *s) +{ + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts p = float32_unpack_canonical(a, s); + FloatParts pr = float_to_float(p, fmt16, s); + return float16a_round_pack_canonical(pr, s, fmt16); +} + +float64 float32_to_float64(float32 a, float_status *s) +{ + FloatParts p = float32_unpack_canonical(a, s); + FloatParts pr = float_to_float(p, &float64_params, s); + return float64_round_pack_canonical(pr, s); +} + +float16 float64_to_float16(float64 a, bool ieee, float_status *s) +{ + const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; + FloatParts p = float64_unpack_canonical(a, s); + FloatParts pr = float_to_float(p, fmt16, s); + return float16a_round_pack_canonical(pr, s, fmt16); +} + +float32 float64_to_float32(float64 a, float_status *s) +{ + FloatParts p = float64_unpack_canonical(a, s); + FloatParts pr = float_to_float(p, &float32_params, s); + return float32_round_pack_canonical(pr, s); +} + +/* * Rounds the floating-point value `a' to an integer, and returns the * result as a floating-point value. The operation is performed * according to the IEC/IEEE Standard for Binary Floating-Point @@ -1350,8 +1434,6 @@ static int64_t round_to_int_and_pack(FloatParts in, int rmode, switch (p.cls) { case float_class_snan: case float_class_qnan: - case float_class_dnan: - case float_class_msnan: s->float_exception_flags = orig_flags | float_flag_invalid; return max; case float_class_inf: @@ -1442,8 +1524,6 @@ static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max, switch (p.cls) { case float_class_snan: case float_class_qnan: - case float_class_dnan: - case float_class_msnan: s->float_exception_flags = orig_flags | float_flag_invalid; return max; case float_class_inf: @@ -1943,8 +2023,7 @@ static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p) } if (a.sign) { s->float_exception_flags |= float_flag_invalid; - a.cls = float_class_dnan; - return a; + return parts_default_nan(s); } if (a.cls == float_class_inf) { return a; /* sqrt(+inf) = +inf */ @@ -2013,6 +2092,78 @@ float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status) return float64_round_pack_canonical(pr, status); } +/*---------------------------------------------------------------------------- +| The pattern for a default generated NaN. +*----------------------------------------------------------------------------*/ + +float16 float16_default_nan(float_status *status) +{ + FloatParts p = parts_default_nan(status); + p.frac >>= float16_params.frac_shift; + return float16_pack_raw(p); +} + +float32 float32_default_nan(float_status *status) +{ + FloatParts p = parts_default_nan(status); + p.frac >>= float32_params.frac_shift; + return float32_pack_raw(p); +} + +float64 float64_default_nan(float_status *status) +{ + FloatParts p = parts_default_nan(status); + p.frac >>= float64_params.frac_shift; + return float64_pack_raw(p); +} + +float128 float128_default_nan(float_status *status) +{ + FloatParts p = parts_default_nan(status); + float128 r; + + /* Extrapolate from the choices made by parts_default_nan to fill + * in the quad-floating format. If the low bit is set, assume we + * want to set all non-snan bits. + */ + r.low = -(p.frac & 1); + r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48); + r.high |= LIT64(0x7FFF000000000000); + r.high |= (uint64_t)p.sign << 63; + + return r; +} + +/*---------------------------------------------------------------------------- +| Returns a quiet NaN from a signalling NaN for the floating point value `a'. +*----------------------------------------------------------------------------*/ + +float16 float16_silence_nan(float16 a, float_status *status) +{ + FloatParts p = float16_unpack_raw(a); + p.frac <<= float16_params.frac_shift; + p = parts_silence_nan(p, status); + p.frac >>= float16_params.frac_shift; + return float16_pack_raw(p); +} + +float32 float32_silence_nan(float32 a, float_status *status) +{ + FloatParts p = float32_unpack_raw(a); + p.frac <<= float32_params.frac_shift; + p = parts_silence_nan(p, status); + p.frac >>= float32_params.frac_shift; + return float32_pack_raw(p); +} + +float64 float64_silence_nan(float64 a, float_status *status) +{ + FloatParts p = float64_unpack_raw(a); + p.frac <<= float64_params.frac_shift; + p = parts_silence_nan(p, status); + p.frac >>= float64_params.frac_shift; + return float64_pack_raw(p); +} /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 @@ -3147,42 +3298,7 @@ float128 uint64_to_float128(uint64_t a, float_status *status) if (a == 0) { return float128_zero; } - return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status); -} - - - - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float32_to_float64(float32 a, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - a = float32_squash_input_denormal(a, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - return commonNaNToFloat64(float32ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 ); - + return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status); } /*---------------------------------------------------------------------------- @@ -3703,173 +3819,6 @@ int float32_unordered_quiet(float32 a, float32 b, float_status *status) return 0; } - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the single-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float64_to_float32(float64 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig; - uint32_t zSig; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return commonNaNToFloat32(float64ToCommonNaN(a, status), status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 22, &aSig ); - zSig = aSig; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x381; - } - return roundAndPackFloat32(aSign, aExp, zSig, status); - -} - - -/*---------------------------------------------------------------------------- -| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a -| half-precision floating-point value, returning the result. After being -| shifted into the proper positions, the three fields are simply added -| together to form the result. This means that any integer portion of `zSig' -| will be added into the exponent. Since a properly normalized significand -| will have an integer portion equal to 1, the `zExp' input should be 1 less -| than the desired result exponent whenever `zSig' is a complete, normalized -| significand. -*----------------------------------------------------------------------------*/ -static float16 packFloat16(flag zSign, int zExp, uint16_t zSig) -{ - return make_float16( - (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); -} - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper half-precision floating- -| point value corresponding to the abstract input. Ordinarily, the abstract -| value is simply rounded and packed into the half-precision format, with -| the inexact exception raised if the abstract input cannot be represented -| exactly. However, if the abstract value is too large, the overflow and -| inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded to -| a subnormal number, and the underflow and inexact exceptions are raised if -| the abstract input cannot be represented exactly as a subnormal half- -| precision floating-point number. -| The `ieee' flag indicates whether to use IEEE standard half precision, or -| ARM-style "alternative representation", which omits the NaN and Inf -| encodings in order to raise the maximum representable exponent by one. -| The input significand `zSig' has its binary point between bits 22 -| and 23, which is 13 bits to the left of the usual location. This shifted -| significand must be normalized or smaller. If `zSig' is not normalized, -| `zExp' must be 0; in that case, the result returned is a subnormal number, -| and it must not require rounding. In the usual case that `zSig' is -| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. -| Note the slightly odd position of the binary point in zSig compared with the -| other roundAndPackFloat functions. This should probably be fixed if we -| need to implement more float16 routines than just conversion. -| The handling of underflow and overflow follows the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float16 roundAndPackFloat16(flag zSign, int zExp, - uint32_t zSig, flag ieee, - float_status *status) -{ - int maxexp = ieee ? 29 : 30; - uint32_t mask; - uint32_t increment; - bool rounding_bumps_exp; - bool is_tiny = false; - - /* Calculate the mask of bits of the mantissa which are not - * representable in half-precision and will be lost. - */ - if (zExp < 1) { - /* Will be denormal in halfprec */ - mask = 0x00ffffff; - if (zExp >= -11) { - mask >>= 11 + zExp; - } - } else { - /* Normal number in halfprec */ - mask = 0x00001fff; - } - - switch (status->float_rounding_mode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((zSig & mask) == increment) { - increment = zSig & (increment << 1); - } - break; - case float_round_ties_away: - increment = (mask + 1) >> 1; - break; - case float_round_up: - increment = zSign ? 0 : mask; - break; - case float_round_down: - increment = zSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - - rounding_bumps_exp = (zSig + increment >= 0x01000000); - - if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { - if (ieee) { - float_raise(float_flag_overflow | float_flag_inexact, status); - return packFloat16(zSign, 0x1f, 0); - } else { - float_raise(float_flag_invalid, status); - return packFloat16(zSign, 0x1f, 0x3ff); - } - } - - if (zExp < 0) { - /* Note that flush-to-zero does not affect half-precision results */ - is_tiny = - (status->float_detect_tininess == float_tininess_before_rounding) - || (zExp < -1) - || (!rounding_bumps_exp); - } - if (zSig & mask) { - float_raise(float_flag_inexact, status); - if (is_tiny) { - float_raise(float_flag_underflow, status); - } - } - - zSig += increment; - if (rounding_bumps_exp) { - zSig >>= 1; - zExp++; - } - - if (zExp < -10) { - return packFloat16(zSign, 0, 0); - } - if (zExp < 0) { - zSig >>= -zExp; - zExp = 0; - } - return packFloat16(zSign, zExp, zSig >> 13); -} - /*---------------------------------------------------------------------------- | If `a' is denormal and we are in flush-to-zero mode then set the | input-denormal exception and return zero. Otherwise just return the value. @@ -3885,163 +3834,6 @@ float16 float16_squash_input_denormal(float16 a, float_status *status) return a; } -static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr, - uint32_t *zSigPtr) -{ - int8_t shiftCount = countLeadingZeros32(aSig) - 21; - *zSigPtr = aSig << shiftCount; - *zExpPtr = 1 - shiftCount; -} - -/* Half precision floats come in two formats: standard IEEE and "ARM" format. - The latter gains extra exponent range by omitting the NaN/Inf encodings. */ - -float32 float16_to_float32(float16 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - aSign = extractFloat16Sign(a); - aExp = extractFloat16Exp(a); - aSig = extractFloat16Frac(a); - - if (aExp == 0x1f && ieee) { - if (aSig) { - return commonNaNToFloat32(float16ToCommonNaN(a, status), status); - } - return packFloat32(aSign, 0xff, 0); - } - if (aExp == 0) { - if (aSig == 0) { - return packFloat32(aSign, 0, 0); - } - - normalizeFloat16Subnormal(aSig, &aExp, &aSig); - aExp--; - } - return packFloat32( aSign, aExp + 0x70, aSig << 13); -} - -float16 float32_to_float16(float32 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - a = float32_squash_input_denormal(a, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - /* Input is a NaN */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0, 0); - } - return commonNaNToFloat16( - float32ToCommonNaN(a, status), status); - } - /* Infinity */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0x1f, 0x3ff); - } - return packFloat16(aSign, 0x1f, 0); - } - if (aExp == 0 && aSig == 0) { - return packFloat16(aSign, 0, 0); - } - /* Decimal point between bits 22 and 23. Note that we add the 1 bit - * even if the input is denormal; however this is harmless because - * the largest possible single-precision denormal is still smaller - * than the smallest representable half-precision denormal, and so we - * will end up ignoring aSig and returning via the "always return zero" - * codepath. - */ - aSig |= 0x00800000; - aExp -= 0x71; - - return roundAndPackFloat16(aSign, aExp, aSig, ieee, status); -} - -float64 float16_to_float64(float16 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - aSign = extractFloat16Sign(a); - aExp = extractFloat16Exp(a); - aSig = extractFloat16Frac(a); - - if (aExp == 0x1f && ieee) { - if (aSig) { - return commonNaNToFloat64( - float16ToCommonNaN(a, status), status); - } - return packFloat64(aSign, 0x7ff, 0); - } - if (aExp == 0) { - if (aSig == 0) { - return packFloat64(aSign, 0, 0); - } - - normalizeFloat16Subnormal(aSig, &aExp, &aSig); - aExp--; - } - return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); -} - -float16 float64_to_float16(float64 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig; - uint32_t zSig; - - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac(a); - aExp = extractFloat64Exp(a); - aSign = extractFloat64Sign(a); - if (aExp == 0x7FF) { - if (aSig) { - /* Input is a NaN */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0, 0); - } - return commonNaNToFloat16( - float64ToCommonNaN(a, status), status); - } - /* Infinity */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0x1f, 0x3ff); - } - return packFloat16(aSign, 0x1f, 0); - } - shift64RightJamming(aSig, 29, &aSig); - zSig = aSig; - if (aExp == 0 && zSig == 0) { - return packFloat16(aSign, 0, 0); - } - /* Decimal point between bits 22 and 23. Note that we add the 1 bit - * even if the input is denormal; however this is harmless because - * the largest possible single-precision denormal is still smaller - * than the smallest representable half-precision denormal, and so we - * will end up ignoring aSig and returning via the "always return zero" - * codepath. - */ - zSig |= 0x00800000; - aExp -= 0x3F1; - - return roundAndPackFloat16(aSign, aExp, zSig, ieee, status); -} - /*---------------------------------------------------------------------------- | Returns the result of converting the double-precision floating-point value | `a' to the extended double-precision floating-point format. The conversion |