From 4da6db51880289f0bf41b39e05cf9bb1c4769c47 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Mon, 23 Jun 2014 20:12:33 +0000 Subject: Fix pow overflow in non-default rounding modes (bug 16315). This patch fixes bug 16315, bad pow handling of overflow/underflow in non-default rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run all tests in all rounding modes. There are two main issues here. First, various implementations compute a negative result by negating a positive result, but this yields inappropriate overflow / underflow values for directed rounding, so either overflow / underflow results need recomputing in the correct sign, or the relevant overflowing / underflowing operation needs to be made to have a result of the correct sign. Second, the dbl-64 implementation sets FE_TONEAREST internally; in the overflow / underflow case, the result needs recomputing in the original rounding mode. Tested x86_64 and x86 and ulps updated accordingly. [BZ #16315] * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. * sysdeps/ieee754/dbl-64/e_pow.c: Include . (__ieee754_pow): Recompute overflowing and underflowing results in original rounding mode. * sysdeps/x86/fpu/powl_helper.c: Include . (__powl_helper): Allow negative argument X and scale negated value as needed. Avoid passing value outside [-1, 1] to f2xm1. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: Include . * math/auto-libm-test-in: Add more tests of pow. * math/auto-libm-test-out: Regenerated. * math/libm-test.inc (pow_test): Use ALL_RM_TEST. (pow_tonearest_test_data): Remove. (pow_test_tonearest): Likewise. (pow_towardzero_test_data): Likewise. (pow_test_towardzero): Likewise. (pow_downward_test_data): Likewise. (pow_test_downward): Likewise. (pow_upward_test_data): Likewise. (pow_test_upward): Likewise. (main): Don't call removed functions. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise. --- sysdeps/x86_64/fpu/e_powl.S | 71 +++++++++++------------------------- sysdeps/x86_64/fpu/libm-test-ulps | 14 ++++++- sysdeps/x86_64/fpu/multiarch/e_pow.c | 1 + 3 files changed, 34 insertions(+), 52 deletions(-) (limited to 'sysdeps/x86_64/fpu') diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index 47f129f..a960a1f 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -141,7 +141,7 @@ ENTRY(__ieee754_powl) fabs // |y| : 8 : y : x fcomip %st(1), %st // 8 : y : x fstp %st(0) // y : x - jnc 2f + jnc 3f mov -8(%rsp),%eax mov -4(%rsp),%edx orl $0, %edx @@ -154,12 +154,22 @@ ENTRY(__ieee754_powl) 4: fldl MO(one) // 1 : x fxch + /* If y is even, take the absolute value of x. Otherwise, + ensure all intermediate values that might overflow have the + sign of x. */ + testb $1, %al + jnz 6f + fabs + 6: shrdl $1, %edx, %eax jnc 5f fxch + fabs fmul %st(1) // x : ST*x fxch -5: fmul %st(0), %st // x*x : ST*x +5: fld %st // x : x : ST*x + fabs // |x| : x : ST*x + fmulp // |x|*x : ST*x shrl $1, %edx movl %eax, %ecx orl %edx, %ecx @@ -177,71 +187,32 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -2: // y is a large integer (absolute value at least 8), but - // may be odd unless at least 1L<<64. So it may be necessary - // to adjust the sign of a negative result afterwards. - fxch // x : y - fabs // |x| : y - fxch // y : |x| +2: // y is a large integer (absolute value at least 1L<<63). // If y has absolute value at least 1L<<78, then any finite // nonzero x will result in 0 (underflow), 1 or infinity (overflow). // Saturate y to those bounds to avoid overflow in the calculation // of y*log2(x). - fldl MO(p78) // 1L<<78 : y : |x| - fld %st(1) // y : 1L<<78 : y : |x| - fabs // |y| : 1L<<78 : y : |x| - fcomip %st(1), %st // 1L<<78 : y : |x| - fstp %st(0) // y : |x| + fldl MO(p78) // 1L<<78 : y : x + fld %st(1) // y : 1L<<78 : y : x + fabs // |y| : 1L<<78 : y : x + fcomip %st(1), %st // 1L<<78 : y : x + fstp %st(0) // y : x jc 3f fstp %st(0) // pop y - fldl MO(p78) // 1L<<78 : |x| + fldl MO(p78) // 1L<<78 : x testb $2, %dl jz 3f // y > 0 - fchs // -(1L<<78) : |x| + fchs // -(1L<<78) : x .align ALIGNARG(4) 3: /* y is a real number. */ subq $40, %rsp cfi_adjust_cfa_offset (40) fstpt 16(%rsp) // x fstpt (%rsp) // - mov %edx, 32(%rsp) call HIDDEN_JUMPTARGET (__powl_helper) // - mov 32(%rsp), %edx addq $40, %rsp cfi_adjust_cfa_offset (-40) - testb $2, %dh - jz 292f - // x is negative. If y is an odd integer, negate the result. - fldt 24(%rsp) // y : abs(result) - fldl MO(p64) // 1L<<64 : y : abs(result) - fld %st(1) // y : 1L<<64 : y : abs(result) - fabs // |y| : 1L<<64 : y : abs(result) - fcomip %st(1), %st // 1L<<64 : y : abs(result) - fstp %st(0) // y : abs(result) - jnc 291f - fldl MO(p63) // p63 : y : abs(result) - fxch // y : p63 : abs(result) - fprem // y%p63 : p63 : abs(result) - fstp %st(1) // y%p63 : abs(result) - - // We must find out whether y is an odd integer. - fld %st // y : y : abs(result) - fistpll -8(%rsp) // y : abs(result) - fildll -8(%rsp) // int(y) : y : abs(result) - fucomip %st(1),%st // y : abs(result) - ffreep %st // abs(result) - jne 292f - - // OK, the value is an integer, but is it odd? - mov -8(%rsp), %eax - mov -4(%rsp), %edx - andb $1, %al - jz 290f // jump if not odd - // It's an odd integer. - fchs -290: ret -291: fstp %st(0) // abs(result) -292: ret + ret // pow(x,±0) = 1 .align ALIGNARG(4) diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 92fef5a..8edd40b 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1736,8 +1736,12 @@ ildouble: 1 ldouble: 1 Function: "pow_downward": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 4 +ldouble: 4 Function: "pow_tonearest": float: 1 @@ -1746,14 +1750,20 @@ ildouble: 1 ldouble: 1 Function: "pow_towardzero": +double: 1 float: 1 +idouble: 1 ifloat: 1 +ildouble: 1 +ldouble: 1 Function: "pow_upward": +double: 1 float: 1 +idouble: 1 ifloat: 1 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "sin": ildouble: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c index a740b6c..433cce0 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -1,5 +1,6 @@ #ifdef HAVE_FMA4_SUPPORT # include +# include # include extern double __ieee754_pow_sse2 (double, double); -- cgit v1.1