diff options
Diffstat (limited to 'include/fpu')
-rw-r--r-- | include/fpu/softfloat-helpers.h | 89 | ||||
-rw-r--r-- | include/fpu/softfloat-types.h | 225 | ||||
-rw-r--r-- | include/fpu/softfloat.h | 107 |
3 files changed, 359 insertions, 62 deletions
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h index 94cbe07..90862f5 100644 --- a/include/fpu/softfloat-helpers.h +++ b/include/fpu/softfloat-helpers.h @@ -75,6 +75,36 @@ static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val, status->floatx80_rounding_precision = val; } +static inline void set_floatx80_behaviour(FloatX80Behaviour b, + float_status *status) +{ + status->floatx80_behaviour = b; +} + +static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule, + float_status *status) +{ + status->float_2nan_prop_rule = rule; +} + +static inline void set_float_3nan_prop_rule(Float3NaNPropRule rule, + float_status *status) +{ + status->float_3nan_prop_rule = rule; +} + +static inline void set_float_infzeronan_rule(FloatInfZeroNaNRule rule, + float_status *status) +{ + status->float_infzeronan_rule = rule; +} + +static inline void set_float_default_nan_pattern(uint8_t dnan_pattern, + float_status *status) +{ + status->default_nan_pattern = dnan_pattern; +} + static inline void set_flush_to_zero(bool val, float_status *status) { status->flush_to_zero = val; @@ -85,6 +115,12 @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status) status->flush_inputs_to_zero = val; } +static inline void set_float_ftz_detection(FloatFTZDetection d, + float_status *status) +{ + status->ftz_detection = d; +} + static inline void set_default_nan_mode(bool val, float_status *status) { status->default_nan_mode = val; @@ -95,50 +131,79 @@ static inline void set_snan_bit_is_one(bool val, float_status *status) status->snan_bit_is_one = val; } -static inline void set_use_first_nan(bool val, float_status *status) -{ - status->use_first_nan = val; -} - static inline void set_no_signaling_nans(bool val, float_status *status) { status->no_signaling_nans = val; } -static inline bool get_float_detect_tininess(float_status *status) +static inline bool get_float_detect_tininess(const float_status *status) { return status->tininess_before_rounding; } -static inline FloatRoundMode get_float_rounding_mode(float_status *status) +static inline FloatRoundMode get_float_rounding_mode(const float_status *status) { return status->float_rounding_mode; } -static inline int get_float_exception_flags(float_status *status) +static inline int get_float_exception_flags(const float_status *status) { return status->float_exception_flags; } static inline FloatX80RoundPrec -get_floatx80_rounding_precision(float_status *status) +get_floatx80_rounding_precision(const float_status *status) { return status->floatx80_rounding_precision; } -static inline bool get_flush_to_zero(float_status *status) +static inline FloatX80Behaviour +get_floatx80_behaviour(const float_status *status) +{ + return status->floatx80_behaviour; +} + +static inline Float2NaNPropRule +get_float_2nan_prop_rule(const float_status *status) +{ + return status->float_2nan_prop_rule; +} + +static inline Float3NaNPropRule +get_float_3nan_prop_rule(const float_status *status) +{ + return status->float_3nan_prop_rule; +} + +static inline FloatInfZeroNaNRule +get_float_infzeronan_rule(const float_status *status) +{ + return status->float_infzeronan_rule; +} + +static inline uint8_t get_float_default_nan_pattern(const float_status *status) +{ + return status->default_nan_pattern; +} + +static inline bool get_flush_to_zero(const float_status *status) { return status->flush_to_zero; } -static inline bool get_flush_inputs_to_zero(float_status *status) +static inline bool get_flush_inputs_to_zero(const float_status *status) { return status->flush_inputs_to_zero; } -static inline bool get_default_nan_mode(float_status *status) +static inline bool get_default_nan_mode(const float_status *status) { return status->default_nan_mode; } +static inline FloatFTZDetection get_float_ftz_detection(const float_status *status) +{ + return status->ftz_detection; +} + #endif /* SOFTFLOAT_HELPERS_H */ diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 0884ec4..1af2a0c 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -80,6 +80,8 @@ this code that are retained. #ifndef SOFTFLOAT_TYPES_H #define SOFTFLOAT_TYPES_H +#include "hw/registerfields.h" + /* * Software IEC/IEEE floating-point types. */ @@ -138,6 +140,8 @@ typedef enum __attribute__((__packed__)) { float_round_to_odd = 5, /* Not an IEEE rounding mode: round to closest odd, overflow to inf */ float_round_to_odd_inf = 6, + /* Not an IEEE rounding mode: round to nearest even, overflow to max */ + float_round_nearest_even_max = 7, } FloatRoundMode; /* @@ -150,8 +154,10 @@ enum { float_flag_overflow = 0x0004, float_flag_underflow = 0x0008, float_flag_inexact = 0x0010, - float_flag_input_denormal = 0x0020, - float_flag_output_denormal = 0x0040, + /* We flushed an input denormal to 0 (because of flush_inputs_to_zero) */ + float_flag_input_denormal_flushed = 0x0020, + /* We flushed an output denormal to 0 (because of flush_to_zero) */ + float_flag_output_denormal_flushed = 0x0040, float_flag_invalid_isi = 0x0080, /* inf - inf */ float_flag_invalid_imz = 0x0100, /* inf * 0 */ float_flag_invalid_idi = 0x0200, /* inf / inf */ @@ -159,6 +165,13 @@ enum { float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */ float_flag_invalid_cvti = 0x1000, /* non-nan to integer */ float_flag_invalid_snan = 0x2000, /* any operand was snan */ + /* + * An input was denormal and we used it (without flushing it to zero). + * Not set if we do not actually use the denormal input (e.g. + * because some other input was a NaN, or because the operation + * wasn't actually carried out (divide-by-zero; invalid)) + */ + float_flag_input_denormal_used = 0x4000, }; /* @@ -171,6 +184,193 @@ typedef enum __attribute__((__packed__)) { } FloatX80RoundPrec; /* + * 2-input NaN propagation rule. Individual architectures have + * different rules for which input NaN is propagated to the output + * when there is more than one NaN on the input. + * + * If default_nan_mode is enabled then it is valid not to set a + * NaN propagation rule, because the softfloat code guarantees + * not to try to pick a NaN to propagate in default NaN mode. + * When not in default-NaN mode, it is an error for the target + * not to set the rule in float_status, and we will assert if + * we need to handle an input NaN and no rule was selected. + */ +typedef enum __attribute__((__packed__)) { + /* No propagation rule specified */ + float_2nan_prop_none = 0, + /* Prefer SNaN over QNaN, then operand A over B */ + float_2nan_prop_s_ab, + /* Prefer SNaN over QNaN, then operand B over A */ + float_2nan_prop_s_ba, + /* Prefer A over B regardless of SNaN vs QNaN */ + float_2nan_prop_ab, + /* Prefer B over A regardless of SNaN vs QNaN */ + float_2nan_prop_ba, + /* + * This implements x87 NaN propagation rules: + * SNaN + QNaN => return the QNaN + * two SNaNs => return the one with the larger significand, silenced + * two QNaNs => return the one with the larger significand + * SNaN and a non-NaN => return the SNaN, silenced + * QNaN and a non-NaN => return the QNaN + * + * If we get down to comparing significands and they are the same, + * return the NaN with the positive sign bit (if any). + */ + float_2nan_prop_x87, +} Float2NaNPropRule; + +/* + * 3-input NaN propagation rule, for fused multiply-add. Individual + * architectures have different rules for which input NaN is + * propagated to the output when there is more than one NaN on the + * input. + * + * If default_nan_mode is enabled then it is valid not to set a NaN + * propagation rule, because the softfloat code guarantees not to try + * to pick a NaN to propagate in default NaN mode. When not in + * default-NaN mode, it is an error for the target not to set the rule + * in float_status if it uses a muladd, and we will assert if we need + * to handle an input NaN and no rule was selected. + * + * The naming scheme for Float3NaNPropRule values is: + * float_3nan_prop_s_abc: + * = "Prefer SNaN over QNaN, then operand A over B over C" + * float_3nan_prop_abc: + * = "Prefer A over B over C regardless of SNaN vs QNAN" + * + * For QEMU, the multiply-add operation is A * B + C. + */ + +/* + * We set the Float3NaNPropRule enum values up so we can select the + * right value in pickNaNMulAdd in a data driven way. + */ +FIELD(3NAN, 1ST, 0, 2) /* which operand is most preferred ? */ +FIELD(3NAN, 2ND, 2, 2) /* which operand is next most preferred ? */ +FIELD(3NAN, 3RD, 4, 2) /* which operand is least preferred ? */ +FIELD(3NAN, SNAN, 6, 1) /* do we prefer SNaN over QNaN ? */ + +#define PROPRULE(X, Y, Z) \ + ((X << R_3NAN_1ST_SHIFT) | (Y << R_3NAN_2ND_SHIFT) | (Z << R_3NAN_3RD_SHIFT)) + +typedef enum __attribute__((__packed__)) { + float_3nan_prop_none = 0, /* No propagation rule specified */ + float_3nan_prop_abc = PROPRULE(0, 1, 2), + float_3nan_prop_acb = PROPRULE(0, 2, 1), + float_3nan_prop_bac = PROPRULE(1, 0, 2), + float_3nan_prop_bca = PROPRULE(1, 2, 0), + float_3nan_prop_cab = PROPRULE(2, 0, 1), + float_3nan_prop_cba = PROPRULE(2, 1, 0), + float_3nan_prop_s_abc = float_3nan_prop_abc | R_3NAN_SNAN_MASK, + float_3nan_prop_s_acb = float_3nan_prop_acb | R_3NAN_SNAN_MASK, + float_3nan_prop_s_bac = float_3nan_prop_bac | R_3NAN_SNAN_MASK, + float_3nan_prop_s_bca = float_3nan_prop_bca | R_3NAN_SNAN_MASK, + float_3nan_prop_s_cab = float_3nan_prop_cab | R_3NAN_SNAN_MASK, + float_3nan_prop_s_cba = float_3nan_prop_cba | R_3NAN_SNAN_MASK, +} Float3NaNPropRule; + +#undef PROPRULE + +/* + * Rule for result of fused multiply-add 0 * Inf + NaN. + * This must be a NaN, but implementations differ on whether this + * is the input NaN or the default NaN. + * + * You don't need to set this if default_nan_mode is enabled. + * When not in default-NaN mode, it is an error for the target + * not to set the rule in float_status if it uses muladd, and we + * will assert if we need to handle an input NaN and no rule was + * selected. + */ +typedef enum __attribute__((__packed__)) { + /* No propagation rule specified */ + float_infzeronan_none = 0, + /* Result is never the default NaN (so always the input NaN) */ + float_infzeronan_dnan_never = 1, + /* Result is always the default NaN */ + float_infzeronan_dnan_always = 2, + /* Result is the default NaN if the input NaN is quiet */ + float_infzeronan_dnan_if_qnan = 3, + /* + * Don't raise Invalid for 0 * Inf + NaN. Default is to raise. + * IEEE 754-2008 section 7.2 makes it implementation defined whether + * 0 * Inf + QNaN raises Invalid or not. Note that 0 * Inf + SNaN will + * raise the Invalid flag for the SNaN anyway. + * + * This is a flag which can be ORed in with any of the above + * DNaN behaviour options. + */ + float_infzeronan_suppress_invalid = (1 << 7), +} FloatInfZeroNaNRule; + +/* + * When flush_to_zero is set, should we detect denormal results to + * be flushed before or after rounding? For most architectures this + * should be set to match the tininess_before_rounding setting, + * but a few architectures, e.g. MIPS MSA, detect FTZ before + * rounding but tininess after rounding. + * + * This enum is arranged so that the default if the target doesn't + * configure it matches the default for tininess_before_rounding + * (i.e. "after rounding"). + */ +typedef enum __attribute__((__packed__)) { + float_ftz_after_rounding = 0, + float_ftz_before_rounding = 1, +} FloatFTZDetection; + +/* + * floatx80 is primarily used by x86 and m68k, and there are + * differences in the handling, largely related to the explicit + * Integer bit which floatx80 has and the other float formats do not. + * These flag values allow specification of the target's requirements + * and can be ORed together to set floatx80_behaviour. + */ +typedef enum __attribute__((__packed__)) { + /* In the default Infinity value, is the Integer bit 0 ? */ + floatx80_default_inf_int_bit_is_zero = 1, + /* + * Are Pseudo-infinities (Inf with the Integer bit zero) valid? + * If so, floatx80_is_infinity() will return true for them. + * If not, floatx80_invalid_encoding will return false for them, + * and using them as inputs to a float op will raise Invalid. + */ + floatx80_pseudo_inf_valid = 2, + /* + * Are Pseudo-NaNs (NaNs where the Integer bit is zero) valid? + * If not, floatx80_invalid_encoding() will return false for them, + * and using them as inputs to a float op will raise Invalid. + */ + floatx80_pseudo_nan_valid = 4, + /* + * Are Unnormals (0 < exp < 0x7fff, Integer bit zero) valid? + * If not, floatx80_invalid_encoding() will return false for them, + * and using them as inputs to a float op will raise Invalid. + */ + floatx80_unnormal_valid = 8, + + /* + * If the exponent is 0 and the Integer bit is set, Intel call + * this a "pseudo-denormal"; x86 supports that only on input + * (treating them as denormals by ignoring the Integer bit). + * For m68k, the integer bit is considered validly part of the + * input value when the exponent is 0, and may be 0 or 1, + * giving extra range. They may also be generated as outputs. + * (The m68k manual actually calls these values part of the + * normalized number range, not the denormalized number range.) + * + * By default you get the Intel behaviour where the Integer + * bit is ignored; if this is set then the Integer bit value + * is honoured, m68k-style. + * + * Either way, floatx80_invalid_encoding() will always accept + * pseudo-denormals. + */ + floatx80_pseudo_denormal_valid = 16, +} FloatX80Behaviour; + +/* * Floating Point Status. Individual architectures may maintain * several versions of float_status for different functions. The * correct status for the operation is then passed by reference to @@ -181,19 +381,34 @@ typedef struct float_status { uint16_t float_exception_flags; FloatRoundMode float_rounding_mode; FloatX80RoundPrec floatx80_rounding_precision; + FloatX80Behaviour floatx80_behaviour; + Float2NaNPropRule float_2nan_prop_rule; + Float3NaNPropRule float_3nan_prop_rule; + FloatInfZeroNaNRule float_infzeronan_rule; bool tininess_before_rounding; - /* should denormalised results go to zero and set the inexact flag? */ + /* should denormalised results go to zero and set output_denormal_flushed? */ bool flush_to_zero; - /* should denormalised inputs go to zero and set the input_denormal flag? */ + /* do we detect and flush denormal results before or after rounding? */ + FloatFTZDetection ftz_detection; + /* should denormalised inputs go to zero and set input_denormal_flushed? */ bool flush_inputs_to_zero; bool default_nan_mode; /* + * The pattern to use for the default NaN. Here the high bit specifies + * the default NaN's sign bit, and bits 6..0 specify the high bits of the + * fractional part. The low bits of the fractional part are copies of bit 0. + * The exponent of the default NaN is (as for any NaN) always all 1s. + * Note that a value of 0 here is not a valid NaN. The target must set + * this to the correct non-zero value, or we will assert when trying to + * create a default NaN. + */ + uint8_t default_nan_pattern; + /* * The flags below are not used on all specializations and may * constant fold away (see snan_bit_is_one()/no_signalling_nans() in * softfloat-specialize.inc.c) */ bool snan_bit_is_one; - bool use_first_nan; bool no_signaling_nans; /* should overflowed results subtract re_bias to its exponent? */ bool rebias_overflow; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index eb64075..c18ab2c 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -120,14 +120,16 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status); | Using these differs from negating an input or output before calling | the muladd function in that this means that a NaN doesn't have its | sign bit inverted before it is propagated. -| We also support halving the result before rounding, as a special -| case to support the ARM fused-sqrt-step instruction FRSQRTS. +| +| With float_muladd_suppress_add_product_zero, if A or B is zero +| such that the product is a true zero, then return C without addition. +| This preserves the sign of C when C is +/- 0. Used for Hexagon. *----------------------------------------------------------------------------*/ enum { float_muladd_negate_c = 1, float_muladd_negate_product = 2, float_muladd_negate_result = 4, - float_muladd_halve_result = 8, + float_muladd_suppress_add_product_zero = 8, }; /*---------------------------------------------------------------------------- @@ -238,6 +240,8 @@ float16 float16_add(float16, float16, float_status *status); float16 float16_sub(float16, float16, float_status *status); float16 float16_mul(float16, float16, float_status *status); float16 float16_muladd(float16, float16, float16, int, float_status *status); +float16 float16_muladd_scalbn(float16, float16, float16, + int, int, float_status *status); float16 float16_div(float16, float16, float_status *status); float16 float16_scalbn(float16, int, float_status *status); float16 float16_min(float16, float16, float_status *status); @@ -597,6 +601,8 @@ float32 float32_mul(float32, float32, float_status *status); float32 float32_div(float32, float32, float_status *status); float32 float32_rem(float32, float32, float_status *status); float32 float32_muladd(float32, float32, float32, int, float_status *status); +float32 float32_muladd_scalbn(float32, float32, float32, + int, int, float_status *status); float32 float32_sqrt(float32, float_status *status); float32 float32_exp2(float32, float_status *status); float32 float32_log2(float32, float_status *status); @@ -792,6 +798,8 @@ float64 float64_mul(float64, float64, float_status *status); float64 float64_div(float64, float64, float_status *status); float64 float64_rem(float64, float64, float_status *status); float64 float64_muladd(float64, float64, float64, int, float_status *status); +float64 float64_muladd_scalbn(float64, float64, float64, + int, int, float_status *status); float64 float64_sqrt(float64, float_status *status); float64 float64_log2(float64, float_status *status); FloatRelation float64_compare(float64, float64, float_status *status); @@ -952,7 +960,7 @@ float128 floatx80_to_float128(floatx80, float_status *status); /*---------------------------------------------------------------------------- | The pattern for an extended double-precision inf. *----------------------------------------------------------------------------*/ -extern const floatx80 floatx80_infinity; +floatx80 floatx80_default_inf(bool zSign, float_status *status); /*---------------------------------------------------------------------------- | Software IEC/IEEE extended double-precision operations. @@ -987,14 +995,19 @@ static inline floatx80 floatx80_chs(floatx80 a) return a; } -static inline bool floatx80_is_infinity(floatx80 a) +static inline bool floatx80_is_infinity(floatx80 a, float_status *status) { -#if defined(TARGET_M68K) - return (a.high & 0x7fff) == floatx80_infinity.high && !(a.low << 1); -#else - return (a.high & 0x7fff) == floatx80_infinity.high && - a.low == floatx80_infinity.low; -#endif + /* + * It's target-specific whether the Integer bit is permitted + * to be 0 in a valid Infinity value. (x86 says no, m68k says yes). + */ + bool intbit = a.low >> 63; + + if (!intbit && + !(status->floatx80_behaviour & floatx80_pseudo_inf_valid)) { + return false; + } + return (a.high & 0x7fff) == 0x7fff && !(a.low << 1); } static inline bool floatx80_is_neg(floatx80 a) @@ -1060,41 +1073,45 @@ static inline bool floatx80_unordered_quiet(floatx80 a, floatx80 b, /*---------------------------------------------------------------------------- | Return whether the given value is an invalid floatx80 encoding. -| Invalid floatx80 encodings arise when the integer bit is not set, but -| the exponent is not zero. The only times the integer bit is permitted to -| be zero is in subnormal numbers and the value zero. -| This includes what the Intel software developer's manual calls pseudo-NaNs, -| pseudo-infinities and un-normal numbers. It does not include -| pseudo-denormals, which must still be correctly handled as inputs even -| if they are never generated as outputs. +| Invalid floatx80 encodings may arise when the integer bit is not set +| correctly; this is target-specific. In Intel terminology the +| categories are: +| exp == 0, int = 0, mantissa == 0 : zeroes +| exp == 0, int = 0, mantissa != 0 : denormals +| exp == 0, int = 1 : pseudo-denormals +| 0 < exp < 0x7fff, int = 0 : unnormals +| 0 < exp < 0x7fff, int = 1 : normals +| exp == 0x7fff, int = 0, mantissa == 0 : pseudo-infinities +| exp == 0x7fff, int = 1, mantissa == 0 : infinities +| exp == 0x7fff, int = 0, mantissa != 0 : pseudo-NaNs +| exp == 0x7fff, int = 1, mantissa == 0 : NaNs +| +| The usual IEEE cases of zero, denormal, normal, inf and NaN are always valid. +| x87 permits as input also pseudo-denormals. +| m68k permits all those and also pseudo-infinities, pseudo-NaNs and unnormals. +| +| Since we don't have a target that handles floatx80 but prohibits +| pseudo-denormals in input, we don't currently have a floatx80_behaviour +| flag for that case, but instead always accept it. Conveniently this +| means that all cases with either exponent 0 or the integer bit set are +| valid for all targets. *----------------------------------------------------------------------------*/ -static inline bool floatx80_invalid_encoding(floatx80 a) -{ -#if defined(TARGET_M68K) - /*------------------------------------------------------------------------- - | With m68k, the explicit integer bit can be zero in the case of: - | - zeros (exp == 0, mantissa == 0) - | - denormalized numbers (exp == 0, mantissa != 0) - | - unnormalized numbers (exp != 0, exp < 0x7FFF) - | - infinities (exp == 0x7FFF, mantissa == 0) - | - not-a-numbers (exp == 0x7FFF, mantissa != 0) - | - | For infinities and NaNs, the explicit integer bit can be either one or - | zero. - | - | The IEEE 754 standard does not define a zero integer bit. Such a number - | is an unnormalized number. Hardware does not directly support - | denormalized and unnormalized numbers, but implicitly supports them by - | trapping them as unimplemented data types, allowing efficient conversion - | in software. - | - | See "M68000 FAMILY PROGRAMMER’S REFERENCE MANUAL", - | "1.6 FLOATING-POINT DATA TYPES" - *------------------------------------------------------------------------*/ - return false; -#else - return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0; -#endif +static inline bool floatx80_invalid_encoding(floatx80 a, float_status *s) +{ + if ((a.low >> 63) || (a.high & 0x7fff) == 0) { + /* Anything with the Integer bit set or the exponent 0 is valid */ + return false; + } + + if ((a.high & 0x7fff) == 0x7fff) { + if (a.low) { + return !(s->floatx80_behaviour & floatx80_pseudo_nan_valid); + } else { + return !(s->floatx80_behaviour & floatx80_pseudo_inf_valid); + } + } else { + return !(s->floatx80_behaviour & floatx80_unnormal_valid); + } } #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) |