/*** UPDATE COMMENTS. ***/ #include "softfloat_types.h" union ui32_f32 { uint32_t ui; float32_t f; }; union ui64_f64 { uint64_t ui; float64_t f; }; #ifdef LITTLEENDIAN union ui128_f128 { uint64_t ui0, ui64; float128_t f; }; #else union ui128_f128 { uint64_t ui64, ui0; float128_t f; }; #endif uint_fast32_t softfloat_roundPackToUI32( bool, uint_fast64_t, int_fast8_t, bool ); uint_fast64_t softfloat_roundPackToUI64( bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool ); /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the | input. If `zSign' is 1, the input is negated before being converted to an | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input | is simply rounded to an integer, with the inexact exception raised if the | input cannot be represented exactly as an integer. However, if the fixed- | point input is too large, the invalid exception is raised and the largest | positive or negative integer is returned. *----------------------------------------------------------------------------*/ int_fast32_t softfloat_roundPackToI32( bool, uint_fast64_t, int_fast8_t, bool ); /*---------------------------------------------------------------------------- | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and | `absZ1', with binary point between bits 63 and 64 (between the input words), | and returns the properly rounded 64-bit integer corresponding to the input. | If `zSign' is 1, the input is negated before being converted to an integer. | Ordinarily, the fixed-point input is simply rounded to an integer, with | the inexact exception raised if the input cannot be represented exactly as | an integer. However, if the fixed-point input is too large, the invalid | exception is raised and the largest positive or negative integer is | returned. *----------------------------------------------------------------------------*/ int_fast64_t softfloat_roundPackToI64( bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool ); /*---------------------------------------------------------------------------- | Returns the sign bit of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define signF32UI( a ) ((bool)((uint32_t)(a)>>31)) /*---------------------------------------------------------------------------- | Returns the exponent bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define expF32UI( a ) ((int_fast16_t)((a)>>23)&0xFF) /*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define fracF32UI( a ) ((a)&0x007FFFFF) /*---------------------------------------------------------------------------- | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a | single-precision floating-point value, returning the result. After being | shifted into the proper positions, the three fields are simply added | together to form the result. This means that any integer portion of `zSig' | will be added into the exponent. Since a properly normalized significand | will have an integer portion equal to 1, the `zExp' input should be 1 less | than the desired result exponent whenever `zSig' is a complete, normalized | significand. *----------------------------------------------------------------------------*/ #define packToF32UI( sign, exp, sig ) (((uint32_t)(sign)<<31)+((uint32_t)(exp)<<23)+(sig)) /*---------------------------------------------------------------------------- | Normalizes the subnormal single-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; }; struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t ); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper single-precision floating- | point value corresponding to the abstract input. Ordinarily, the abstract | value is simply rounded and packed into the single-precision format, with | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded to | a subnormal number, and the underflow and inexact exceptions are raised if | the abstract input cannot be represented exactly as a subnormal single- | precision floating-point number. | The input significand `zSig' has its binary point between bits 30 | and 29, which is 7 bits to the left of the usual location. This shifted | significand must be normalized or smaller. If `zSig' is not normalized, | `zExp' must be 0; in that case, the result returned is a subnormal number, | and it must not require rounding. In the usual case that `zSig' is | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. | The handling of underflow and overflow follows the IEC/IEEE Standard for | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t ); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper single-precision floating- | point value corresponding to the abstract input. This routine is just like | `roundAndPackFloat32' except that `zSig' does not have to be normalized. | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' | floating-point exponent. *----------------------------------------------------------------------------*/ float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t ); /*---------------------------------------------------------------------------- | Returns the result of adding the absolute values of the single-precision | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated | before being returned. `zSign' is ignored if the result is a NaN. | The addition is performed according to the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t, bool ); /*---------------------------------------------------------------------------- | Returns the result of subtracting the absolute values of the single- | precision floating-point values `a' and `b'. If `zSign' is 1, the | difference is negated before being returned. `zSign' is ignored if the | result is a NaN. The subtraction is performed according to the IEC/IEEE | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t, bool ); /*---------------------------------------------------------------------------- | Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define signF64UI( a ) ((bool)((uint64_t)(a)>>63)) /*---------------------------------------------------------------------------- | Returns the exponent bits of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define expF64UI( a ) ((int_fast16_t)((a)>>52)&0x7FF) /*---------------------------------------------------------------------------- | Returns the fraction bits of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ #define fracF64UI( a ) ((a)&UINT64_C(0x000FFFFFFFFFFFFF)) /*---------------------------------------------------------------------------- | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a | double-precision floating-point value, returning the result. After being | shifted into the proper positions, the three fields are simply added | together to form the result. This means that any integer portion of `zSig' | will be added into the exponent. Since a properly normalized significand | will have an integer portion equal to 1, the `zExp' input should be 1 less | than the desired result exponent whenever `zSig' is a complete, normalized | significand. *----------------------------------------------------------------------------*/ #define packToF64UI( sign, exp, sig ) (((uint64_t)(sign)<<63)+((uint64_t)(exp)<<52)+(sig)) /*---------------------------------------------------------------------------- | Normalizes the subnormal double-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; }; struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t ); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper double-precision floating- | point value corresponding to the abstract input. Ordinarily, the abstract | value is simply rounded and packed into the double-precision format, with | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded | to a subnormal number, and the underflow and inexact exceptions are raised | if the abstract input cannot be represented exactly as a subnormal double- | precision floating-point number. | The input significand `zSig' has its binary point between bits 62 | and 61, which is 10 bits to the left of the usual location. This shifted | significand must be normalized or smaller. If `zSig' is not normalized, | `zExp' must be 0; in that case, the result returned is a subnormal number, | and it must not require rounding. In the usual case that `zSig' is | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. | The handling of underflow and overflow follows the IEC/IEEE Standard for | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t ); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper double-precision floating- | point value corresponding to the abstract input. This routine is just like | `roundAndPackFloat64' except that `zSig' does not have to be normalized. | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' | floating-point exponent. *----------------------------------------------------------------------------*/ float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t ); /*---------------------------------------------------------------------------- | Returns the result of adding the absolute values of the double-precision | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated | before being returned. `zSign' is ignored if the result is a NaN. | The addition is performed according to the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool ); /*---------------------------------------------------------------------------- | Returns the result of subtracting the absolute values of the double- | precision floating-point values `a' and `b'. If `zSign' is 1, the | difference is negated before being returned. `zSign' is ignored if the | result is a NaN. The subtraction is performed according to the IEC/IEEE | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );