From 4da6db51880289f0bf41b39e05cf9bb1c4769c47 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Mon, 23 Jun 2014 20:12:33 +0000
Subject: Fix pow overflow in non-default rounding modes (bug 16315).

This patch fixes bug 16315, bad pow handling of overflow/underflow in
non-default rounding modes.  Tests of pow are duly converted to
ALL_RM_TEST to run all tests in all rounding modes.

There are two main issues here.  First, various implementations
compute a negative result by negating a positive result, but this
yields inappropriate overflow / underflow values for directed
rounding, so either overflow / underflow results need recomputing in
the correct sign, or the relevant overflowing / underflowing operation
needs to be made to have a result of the correct sign.  Second, the
dbl-64 implementation sets FE_TONEAREST internally; in the overflow /
underflow case, the result needs recomputing in the original rounding
mode.

Tested x86_64 and x86 and ulps updated accordingly.

	[BZ #16315]
	* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly
	overflowing or underflowing operations take place with sign of
	result.
	* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.
	* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
	* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.
	(__ieee754_pow): Recompute overflowing and underflowing results in
	original rounding mode.
	* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.
	(__powl_helper): Allow negative argument X and scale negated value
	as needed.  Avoid passing value outside [-1, 1] to f2xm1.
	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly
	overflowing or underflowing operations take place with sign of
	result.
	* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:
	Include <math.h>.
	* math/auto-libm-test-in: Add more tests of pow.
	* math/auto-libm-test-out: Regenerated.
	* math/libm-test.inc (pow_test): Use ALL_RM_TEST.
	(pow_tonearest_test_data): Remove.
	(pow_test_tonearest): Likewise.
	(pow_towardzero_test_data): Likewise.
	(pow_test_towardzero): Likewise.
	(pow_downward_test_data): Likewise.
	(pow_test_downward): Likewise.
	(pow_upward_test_data): Likewise.
	(pow_test_upward): Likewise.
	(main): Don't call removed functions.
	* sysdeps/i386/fpu/libm-test-ulps: Update.
	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
---
 sysdeps/x86_64/fpu/e_powl.S          | 71 +++++++++++-------------------------
 sysdeps/x86_64/fpu/libm-test-ulps    | 14 ++++++-
 sysdeps/x86_64/fpu/multiarch/e_pow.c |  1 +
 3 files changed, 34 insertions(+), 52 deletions(-)

(limited to 'sysdeps/x86_64/fpu')

diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 47f129f..a960a1f 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -141,7 +141,7 @@ ENTRY(__ieee754_powl)
 	fabs			// |y| : 8 : y : x
 	fcomip	%st(1), %st	// 8 : y : x
 	fstp	%st(0)		// y : x
-	jnc	2f
+	jnc	3f
 	mov	-8(%rsp),%eax
 	mov	-4(%rsp),%edx
 	orl	$0, %edx
@@ -154,12 +154,22 @@ ENTRY(__ieee754_powl)
 4:	fldl	MO(one)		// 1 : x
 	fxch
 
+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@@ -177,71 +187,32 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 8), but
-	// may be odd unless at least 1L<<64.  So it may be necessary
-	// to adjust the sign of a negative result afterwards.
-	fxch			// x : y
-	fabs			// |x| : y
-	fxch			// y : |x|
+2:	// y is a large integer (absolute value at least 1L<<63).
 	// If y has absolute value at least 1L<<78, then any finite
 	// nonzero x will result in 0 (underflow), 1 or infinity (overflow).
 	// Saturate y to those bounds to avoid overflow in the calculation
 	// of y*log2(x).
-	fldl	MO(p78)		// 1L<<78 : y : |x|
-	fld	%st(1)		// y : 1L<<78 : y : |x|
-	fabs			// |y| : 1L<<78 : y : |x|
-	fcomip	%st(1), %st	// 1L<<78 : y : |x|
-	fstp	%st(0)		// y : |x|
+	fldl	MO(p78)		// 1L<<78 : y : x
+	fld	%st(1)		// y : 1L<<78 : y : x
+	fabs			// |y| : 1L<<78 : y : x
+	fcomip	%st(1), %st	// 1L<<78 : y : x
+	fstp	%st(0)		// y : x
 	jc	3f
 	fstp	%st(0)		// pop y
-	fldl	MO(p78)		// 1L<<78 : |x|
+	fldl	MO(p78)		// 1L<<78 : x
 	testb	$2, %dl
 	jz	3f		// y > 0
-	fchs			// -(1L<<78) : |x|
+	fchs			// -(1L<<78) : x
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
 	subq	$40, %rsp
 	cfi_adjust_cfa_offset (40)
 	fstpt	16(%rsp)	// x
 	fstpt	(%rsp)		// <empty>
-	mov	%edx, 32(%rsp)
 	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
-	mov	32(%rsp), %edx
 	addq	$40, %rsp
 	cfi_adjust_cfa_offset (-40)
-	testb	$2, %dh
-	jz	292f
-	// x is negative.  If y is an odd integer, negate the result.
-	fldt	24(%rsp)	// y : abs(result)
-	fldl	MO(p64)		// 1L<<64 : y : abs(result)
-	fld	%st(1)		// y : 1L<<64 : y : abs(result)
-	fabs			// |y| : 1L<<64 : y : abs(result)
-	fcomip	%st(1), %st	// 1L<<64 : y : abs(result)
-	fstp	%st(0)		// y : abs(result)
-	jnc	291f
-	fldl	MO(p63)		// p63 : y : abs(result)
-	fxch			// y : p63 : abs(result)
-	fprem			// y%p63 : p63 : abs(result)
-	fstp	%st(1)		// y%p63 : abs(result)
-
-	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	-8(%rsp)	// y : abs(result)
-	fildll	-8(%rsp)	// int(y) : y : abs(result)
-	fucomip	%st(1),%st	// y : abs(result)
-	ffreep	%st		// abs(result)
-	jne	292f
-
-	// OK, the value is an integer, but is it odd?
-	mov	-8(%rsp), %eax
-	mov	-4(%rsp), %edx
-	andb	$1, %al
-	jz	290f		// jump if not odd
-	// It's an odd integer.
-	fchs
-290:	ret
-291:	fstp	%st(0)		// abs(result)
-292:	ret
+	ret
 
 	// pow(x,ą0) = 1
 	.align ALIGNARG(4)
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 92fef5a..8edd40b 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1736,8 +1736,12 @@ ildouble: 1
 ldouble: 1
 
 Function: "pow_downward":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
+ildouble: 4
+ldouble: 4
 
 Function: "pow_tonearest":
 float: 1
@@ -1746,14 +1750,20 @@ ildouble: 1
 ldouble: 1
 
 Function: "pow_towardzero":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1
 
 Function: "pow_upward":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
 
 Function: "sin":
 ildouble: 1
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index a740b6c..433cce0 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -1,5 +1,6 @@
 #ifdef HAVE_FMA4_SUPPORT
 # include <init-arch.h>
+# include <math.h>
 # include <math_private.h>
 
 extern double __ieee754_pow_sse2 (double, double);
-- 
cgit v1.1