aboutsummaryrefslogtreecommitdiff
path: root/target/i386
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2020-06-17 23:24:29 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2020-06-26 09:39:39 -0400
commit1f18a1e6ab8368a4eab2d22894d3b2ae75250cd3 (patch)
tree74f4efb45983d3373128b282ddd31c2fb6689391 /target/i386
parent5eebc49d2d0aa5fc7e90eeac97533051bb7b72fa (diff)
downloadqemu-1f18a1e6ab8368a4eab2d22894d3b2ae75250cd3.zip
qemu-1f18a1e6ab8368a4eab2d22894d3b2ae75250cd3.tar.gz
qemu-1f18a1e6ab8368a4eab2d22894d3b2ae75250cd3.tar.bz2
target/i386: reimplement fyl2x using floatx80 operations
The x87 fyl2x emulation is currently based around conversion to double. This is inherently unsuitable for a good emulation of any floatx80 operation. Reimplement using the soft-float operations, building on top of the reimplementation of fyl2xp1 and factoring out code to be shared between the two instructions. The included test assumes that the result in round-to-nearest mode should always be one of the two closest floating-point numbers to the mathematically exact result (including that it should be exact, in the exact cases which cover more cases than for fyl2xp1). Signed-off-by: Joseph Myers <joseph@codesourcery.com> Message-Id: <alpine.DEB.2.21.2006172321530.20587@digraph.polyomino.org.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'target/i386')
-rw-r--r--target/i386/fpu_helper.c373
1 files changed, 262 insertions, 111 deletions
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 884e84c..62820bc 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -1223,21 +1223,6 @@ void helper_f2xm1(CPUX86State *env)
merge_exception_flags(env, old_flags);
}
-void helper_fyl2x(CPUX86State *env)
-{
- double fptemp = floatx80_to_double(env, ST0);
-
- if (fptemp > 0.0) {
- fptemp = log(fptemp) / log(2.0); /* log2(ST) */
- fptemp *= floatx80_to_double(env, ST1);
- ST1 = double_to_floatx80(env, fptemp);
- fpop(env);
- } else {
- env->fpus &= ~0x4700;
- env->fpus |= 0x400;
- }
-}
-
void helper_fptan(CPUX86State *env)
{
double fptemp = floatx80_to_double(env, ST0);
@@ -1395,6 +1380,118 @@ void helper_fprem(CPUX86State *env)
#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
+/*
+ * Compute an approximation of log2(1+arg), where 1+arg is in the
+ * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
+ * function is called, rounding precision is set to 80 and the
+ * round-to-nearest mode is in effect. arg must not be exactly zero,
+ * and must not be so close to zero that underflow might occur.
+ */
+static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
+ uint64_t *sig0, uint64_t *sig1)
+{
+ uint64_t arg0_sig = extractFloatx80Frac(arg);
+ int32_t arg0_exp = extractFloatx80Exp(arg);
+ bool arg0_sign = extractFloatx80Sign(arg);
+ bool asign;
+ int32_t dexp, texp, aexp;
+ uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
+ uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
+ uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
+ floatx80 t2, accum;
+
+ /*
+ * Compute an approximation of arg/(2+arg), with extra precision,
+ * as the argument to a polynomial approximation. The extra
+ * precision is only needed for the first term of the
+ * approximation, with subsequent terms being significantly
+ * smaller; the approximation only uses odd exponents, and the
+ * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
+ */
+ if (arg0_sign) {
+ dexp = 0x3fff;
+ shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
+ sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
+ } else {
+ dexp = 0x4000;
+ shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
+ dsig0 |= 0x8000000000000000ULL;
+ }
+ texp = arg0_exp - dexp + 0x3ffe;
+ rsig0 = arg0_sig;
+ rsig1 = 0;
+ rsig2 = 0;
+ if (dsig0 <= rsig0) {
+ shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
+ ++texp;
+ }
+ tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
+ mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
+ sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
+ &rsig0, &rsig1, &rsig2);
+ while ((int64_t) rsig0 < 0) {
+ --tsig0;
+ add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
+ &rsig0, &rsig1, &rsig2);
+ }
+ tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
+ /*
+ * No need to correct any estimation error in tsig1; even with
+ * such error, it is accurate enough. Now compute the square of
+ * that approximation.
+ */
+ mul128To256(tsig0, tsig1, tsig0, tsig1,
+ &t2sig0, &t2sig1, &t2sig2, &t2sig3);
+ t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
+ t2sig0, t2sig1, &env->fp_status);
+
+ /* Compute the lower parts of the polynomial expansion. */
+ accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
+ accum = floatx80_mul(accum, t2, &env->fp_status);
+ accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
+
+ /*
+ * The full polynomial expansion is fyl2x_coeff_0 + accum (where
+ * accum has much lower magnitude, and so, in particular, carry
+ * out of the addition is not possible), multiplied by t. (This
+ * expansion is only accurate to about 70 bits, not 128 bits.)
+ */
+ aexp = extractFloatx80Exp(fyl2x_coeff_0);
+ asign = extractFloatx80Sign(fyl2x_coeff_0);
+ shift128RightJamming(extractFloatx80Frac(accum), 0,
+ aexp - extractFloatx80Exp(accum),
+ &asig0, &asig1);
+ bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
+ bsig1 = 0;
+ if (asign == extractFloatx80Sign(accum)) {
+ add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+ } else {
+ sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
+ }
+ /* Multiply by t to compute the required result. */
+ mul128To256(asig0, asig1, tsig0, tsig1,
+ &asig0, &asig1, &asig2, &asig3);
+ aexp += texp - 0x3ffe;
+ *exp = aexp;
+ *sig0 = asig0;
+ *sig1 = asig1;
+}
+
void helper_fyl2xp1(CPUX86State *env)
{
uint8_t old_flags = save_exception_flags(env);
@@ -1462,109 +1559,18 @@ void helper_fyl2xp1(CPUX86State *env)
ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
sig0, sig1, &env->fp_status);
} else {
- bool asign;
- uint32_t dexp, texp, aexp;
- uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
- uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
- uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
- floatx80 t2, accum;
+ int32_t aexp;
+ uint64_t asig0, asig1, asig2;
FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
signed char save_prec = env->fp_status.floatx80_rounding_precision;
env->fp_status.float_rounding_mode = float_round_nearest_even;
env->fp_status.floatx80_rounding_precision = 80;
+ helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
/*
- * Compute an approximation of ST0/(2+ST0), with extra
- * precision, as the argument to a polynomial approximation.
- * The extra precision is only needed for the first term of
- * the approximation, with subsequent terms being
- * significantly smaller; the approximation only uses odd
- * exponents, and the square of ST0/(2+ST0) is at most
- * 17-12*sqrt(2) = 0.029....
+ * Multiply by the second argument to compute the required
+ * result.
*/
- if (arg0_sign) {
- dexp = 0x3fff;
- shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
- sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
- } else {
- dexp = 0x4000;
- shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
- dsig0 |= 0x8000000000000000ULL;
- }
- texp = arg0_exp - dexp + 0x3ffe;
- rsig0 = arg0_sig;
- rsig1 = 0;
- rsig2 = 0;
- if (dsig0 <= rsig0) {
- shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
- ++texp;
- }
- tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
- mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
- sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
- &rsig0, &rsig1, &rsig2);
- while ((int64_t) rsig0 < 0) {
- --tsig0;
- add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
- &rsig0, &rsig1, &rsig2);
- }
- tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
- /*
- * No need to correct any estimation error in tsig1; even with
- * such error, it is accurate enough. Now compute the square
- * of that approximation.
- */
- mul128To256(tsig0, tsig1, tsig0, tsig1,
- &t2sig0, &t2sig1, &t2sig2, &t2sig3);
- t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
- t2sig0, t2sig1, &env->fp_status);
-
- /* Compute the lower parts of the polynomial expansion. */
- accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
- accum = floatx80_mul(accum, t2, &env->fp_status);
- accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
-
- /*
- * The full polynomial expansion is fyl2x_coeff_0 + accum
- * (where accum has much lower magnitude, and so, in
- * particular, carry out of the addition is not possible),
- * multiplied by t. (This expansion is only accurate to about
- * 70 bits, not 128 bits.)
- */
- aexp = extractFloatx80Exp(fyl2x_coeff_0);
- asign = extractFloatx80Sign(fyl2x_coeff_0);
- shift128RightJamming(extractFloatx80Frac(accum), 0,
- aexp - extractFloatx80Exp(accum),
- &asig0, &asig1);
- bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
- bsig1 = 0;
- if (asign == extractFloatx80Sign(accum)) {
- add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
- } else {
- sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
- }
- /*
- * Multiply by t and by the second argument to compute the
- * required result.
- */
- mul128To256(asig0, asig1, tsig0, tsig1,
- &asig0, &asig1, &asig2, &asig3);
- aexp += texp - 0x3ffe;
if (arg1_exp == 0) {
normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
}
@@ -1581,6 +1587,151 @@ void helper_fyl2xp1(CPUX86State *env)
merge_exception_flags(env, old_flags);
}
+void helper_fyl2x(CPUX86State *env)
+{
+ uint8_t old_flags = save_exception_flags(env);
+ uint64_t arg0_sig = extractFloatx80Frac(ST0);
+ int32_t arg0_exp = extractFloatx80Exp(ST0);
+ bool arg0_sign = extractFloatx80Sign(ST0);
+ uint64_t arg1_sig = extractFloatx80Frac(ST1);
+ int32_t arg1_exp = extractFloatx80Exp(ST1);
+ bool arg1_sign = extractFloatx80Sign(ST1);
+
+ if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_silence_nan(ST0, &env->fp_status);
+ } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_silence_nan(ST1, &env->fp_status);
+ } else if (floatx80_invalid_encoding(ST0) ||
+ floatx80_invalid_encoding(ST1)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_default_nan(&env->fp_status);
+ } else if (floatx80_is_any_nan(ST0)) {
+ ST1 = ST0;
+ } else if (floatx80_is_any_nan(ST1)) {
+ /* Pass this NaN through. */
+ } else if (arg0_sign && !floatx80_is_zero(ST0)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_default_nan(&env->fp_status);
+ } else if (floatx80_is_infinity(ST1)) {
+ FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
+ &env->fp_status);
+ switch (cmp) {
+ case float_relation_less:
+ ST1 = floatx80_chs(ST1);
+ break;
+ case float_relation_greater:
+ /* Result is infinity of the same sign as ST1. */
+ break;
+ default:
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_default_nan(&env->fp_status);
+ break;
+ }
+ } else if (floatx80_is_infinity(ST0)) {
+ if (floatx80_is_zero(ST1)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_default_nan(&env->fp_status);
+ } else if (arg1_sign) {
+ ST1 = floatx80_chs(ST0);
+ } else {
+ ST1 = ST0;
+ }
+ } else if (floatx80_is_zero(ST0)) {
+ if (floatx80_is_zero(ST1)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ ST1 = floatx80_default_nan(&env->fp_status);
+ } else {
+ /* Result is infinity with opposite sign to ST1. */
+ float_raise(float_flag_divbyzero, &env->fp_status);
+ ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
+ 0x8000000000000000ULL);
+ }
+ } else if (floatx80_is_zero(ST1)) {
+ if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
+ ST1 = floatx80_chs(ST1);
+ }
+ /* Otherwise, ST1 is already the correct result. */
+ } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
+ if (arg1_sign) {
+ ST1 = floatx80_chs(floatx80_zero);
+ } else {
+ ST1 = floatx80_zero;
+ }
+ } else {
+ int32_t int_exp;
+ floatx80 arg0_m1;
+ FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
+ signed char save_prec = env->fp_status.floatx80_rounding_precision;
+ env->fp_status.float_rounding_mode = float_round_nearest_even;
+ env->fp_status.floatx80_rounding_precision = 80;
+
+ if (arg0_exp == 0) {
+ normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
+ }
+ if (arg1_exp == 0) {
+ normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+ }
+ int_exp = arg0_exp - 0x3fff;
+ if (arg0_sig > 0xb504f333f9de6484ULL) {
+ ++int_exp;
+ }
+ arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
+ &env->fp_status),
+ floatx80_one, &env->fp_status);
+ if (floatx80_is_zero(arg0_m1)) {
+ /* Exact power of 2; multiply by ST1. */
+ env->fp_status.float_rounding_mode = save_mode;
+ ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
+ ST1, &env->fp_status);
+ } else {
+ bool asign = extractFloatx80Sign(arg0_m1);
+ int32_t aexp;
+ uint64_t asig0, asig1, asig2;
+ helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
+ if (int_exp != 0) {
+ bool isign = (int_exp < 0);
+ int32_t iexp;
+ uint64_t isig;
+ int shift;
+ int_exp = isign ? -int_exp : int_exp;
+ shift = clz32(int_exp) + 32;
+ isig = int_exp;
+ isig <<= shift;
+ iexp = 0x403e - shift;
+ shift128RightJamming(asig0, asig1, iexp - aexp,
+ &asig0, &asig1);
+ if (asign == isign) {
+ add128(isig, 0, asig0, asig1, &asig0, &asig1);
+ } else {
+ sub128(isig, 0, asig0, asig1, &asig0, &asig1);
+ }
+ aexp = iexp;
+ asign = isign;
+ }
+ /*
+ * Multiply by the second argument to compute the required
+ * result.
+ */
+ if (arg1_exp == 0) {
+ normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
+ }
+ mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
+ aexp += arg1_exp - 0x3ffe;
+ /* This result is inexact. */
+ asig1 |= 1;
+ env->fp_status.float_rounding_mode = save_mode;
+ ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
+ asig0, asig1, &env->fp_status);
+ }
+
+ env->fp_status.floatx80_rounding_precision = save_prec;
+ }
+ fpop(env);
+ merge_exception_flags(env, old_flags);
+}
+
void helper_fsqrt(CPUX86State *env)
{
uint8_t old_flags = save_exception_flags(env);