Fix pow overflow in non-default rounding modes (bug 16315).

This patch fixes bug 16315, bad pow handling of overflow/underflow in non-default rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run all tests in all rounding modes. There are two main issues here. First, various implementations compute a negative result by negating a positive result, but this yields inappropriate overflow / underflow values for directed rounding, so either overflow / underflow results need recomputing in the correct sign, or the relevant overflowing / underflowing operation needs to be made to have a result of the correct sign. Second, the dbl-64 implementation sets FE_TONEAREST internally; in the overflow / underflow case, the result needs recomputing in the original rounding mode. Tested x86_64 and x86 and ulps updated accordingly. [BZ #16315] * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. * sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>. (__ieee754_pow): Recompute overflowing and underflowing results in original rounding mode. * sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>. (__powl_helper): Allow negative argument X and scale negated value as needed. Avoid passing value outside [-1, 1] to f2xm1. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly overflowing or underflowing operations take place with sign of result. * sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: Include <math.h>. * math/auto-libm-test-in: Add more tests of pow. * math/auto-libm-test-out: Regenerated. * math/libm-test.inc (pow_test): Use ALL_RM_TEST. (pow_tonearest_test_data): Remove. (pow_test_tonearest): Likewise. (pow_towardzero_test_data): Likewise. (pow_test_towardzero): Likewise. (pow_downward_test_data): Likewise. (pow_test_downward): Likewise. (pow_upward_test_data): Likewise. (pow_test_upward): Likewise. (main): Don't call removed functions. * sysdeps/i386/fpu/libm-test-ulps: Update. * sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
author: Joseph Myers <joseph@codesourcery.com> 2014-06-23 20:12:33 +0000
committer: Joseph Myers <joseph@codesourcery.com> 2014-06-23 20:12:33 +0000
commit: 4da6db51880289f0bf41b39e05cf9bb1c4769c47 (patch)
tree: 9f57512f4561f512366c93cf7430d5b75d4852fd /sysdeps/i386
parent: 5686b236cccdc8c72788b7996537ed92ac3a3c8c (diff)
download: glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.zip
glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.gz
glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.bz2
4 files changed, 69 insertions, 79 deletions
diff --git a/sysdeps/i386/fpu/e_pow.S b/sysdeps/i386/fpu/e_pow.S
index 8b641bb..efe3256 100644
--- a/sysdeps/i386/fpu/e_pow.S
+++ b/sysdeps/i386/fpu/e_pow.S
@@ -144,12 +144,22 @@ ENTRY(__ieee754_pow)
 4:	fldl	MO(one)		// 1 : x
 	fxch
 
+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@@ -207,27 +217,28 @@ ENTRY(__ieee754_pow)
 	fxch			// fract(y*log2(x)) : int(y*log2(x))
 	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
 	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
-	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+
+	// Before scaling, we must negate if x is negative and y is an
+	// odd integer.
 	testb	$2, %dh
-	jz	292f
+	jz	291f
 	// x is negative.  If y is an odd integer, negate the result.
-	fldl	20(%esp)	// y : abs(result)
-	fld	%st		// y : y : abs(result)
-	fabs			// |y| : y : abs(result)
-	fcompl	MO(p63)		// y : abs(result)
+	fldl	20(%esp)	// y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fld	%st		// y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fabs			// |y| : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fcompl	MO(p63)		// y : 2^fract(y*log2(x)) : int(y*log2(x))
 	fnstsw
 	sahf
-	jnc	291f
+	jnc	290f
 
 	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	(%esp)		// y : abs(result)
-	fildll	(%esp)		// int(y) : y : abs(result)
-	fucompp			// abs(result)
+	fld	%st		// y : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fistpll	(%esp)		// y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fildll	(%esp)		// int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x))
+	fucompp			// 2^fract(y*log2(x)) : int(y*log2(x))
 	fnstsw
 	sahf
-	jne	292f
+	jne	291f
 
 	// OK, the value is an integer, but is it odd?
 	popl	%eax
@@ -235,14 +246,17 @@ ENTRY(__ieee754_pow)
 	popl	%edx
 	cfi_adjust_cfa_offset (-4)
 	andb	$1, %al
-	jz	290f		// jump if not odd
+	jz	292f		// jump if not odd
 	// It's an odd integer.
 	fchs
-290:	ret
+	jmp	292f
+
 	cfi_adjust_cfa_offset (8)
-291:	fstp	%st(0)		// abs(result)
-292:	addl	$8, %esp
+290:	fstp	%st(0)		// 2^fract(y*log2(x)) : int(y*log2(x))
+291:	addl	$8, %esp
 	cfi_adjust_cfa_offset (-8)
+292:	fscale			// +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
+	fstp	%st(1)		// +/- 2^fract(y*log2(x))*2^int(y*log2(x))
 	ret
 
 
diff --git a/sysdeps/i386/fpu/e_powf.S b/sysdeps/i386/fpu/e_powf.S
index 89d30d8..3735654 100644
--- a/sysdeps/i386/fpu/e_powf.S
+++ b/sysdeps/i386/fpu/e_powf.S
@@ -129,12 +129,22 @@ ENTRY(__ieee754_powf)
 4:	fldl	MO(one)		// 1 : x
 	fxch
 
+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %dl
+	jnz	6f
+	fabs
+
 6:	shrl	$1, %edx
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	testl	%edx, %edx
 	jnz	6b
 	fstp	%st(0)		// ST*x
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S
index cc6dcd1..44cf238 100644
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -151,7 +151,7 @@ ENTRY(__ieee754_powl)
 	fcompl	MO(p3)		// y : x
 	fnstsw
 	sahf
-	jnc	2f
+	jnc	3f
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
@@ -166,12 +166,22 @@ ENTRY(__ieee754_powl)
 4:	fldl	MO(one)		// 1 : x
 	fxch
 
+	/* If y is even, take the absolute value of x.  Otherwise,
+	   ensure all intermediate values that might overflow have the
+	   sign of x.  */
+	testb	$1, %al
+	jnz	6f
+	fabs
+
 6:	shrdl	$1, %edx, %eax
 	jnc	5f
 	fxch
+	fabs
 	fmul	%st(1)		// x : ST*x
 	fxch
-5:	fmul	%st(0), %st	// x*x : ST*x
+5:	fld	%st		// x : x : ST*x
+	fabs			// |x| : x : ST*x
+	fmulp			// |x|*x : ST*x
 	shrl	$1, %edx
 	movl	%eax, %ecx
 	orl	%edx, %ecx
@@ -198,79 +208,31 @@ ENTRY(__ieee754_powl)
 
 	cfi_adjust_cfa_offset (8)
 	.align ALIGNARG(4)
-2:	// y is a large integer (absolute value at least 8), but
-	// may be odd unless at least 1L<<64.  So it may be necessary
-	// to adjust the sign of a negative result afterwards.
-	fxch			// x : y
-	fabs			// |x| : y
-	fxch			// y : |x|
+2:	// y is a large integer (absolute value at least 1L<<63).
 	// If y has absolute value at least 1L<<78, then any finite
 	// nonzero x will result in 0 (underflow), 1 or infinity (overflow).
 	// Saturate y to those bounds to avoid overflow in the calculation
 	// of y*log2(x).
-	fld	%st		// y : y : |x|
-	fabs			// |y| : y : |x|
-	fcompl	MO(p78)		// y : |x|
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p78)		// y : x
 	fnstsw
 	sahf
 	jc	3f
 	fstp	%st(0)		// pop y
-	fldl	MO(p78)		// 1L<<78 : |x|
+	fldl	MO(p78)		// 1L<<78 : x
 	testb	$2, %dl
 	jz	3f		// y > 0
-	fchs			// -(1L<<78) : |x|
+	fchs			// -(1L<<78) : x
 	.align ALIGNARG(4)
 3:	/* y is a real number.  */
 	subl	$28, %esp
 	cfi_adjust_cfa_offset (28)
 	fstpt	12(%esp)	// x
 	fstpt	(%esp)		// <empty>
-	mov	%edx, 24(%esp)
 	call	HIDDEN_JUMPTARGET (__powl_helper)	// <result>
-	mov	24(%esp), %edx
-	addl	$28, %esp
-	cfi_adjust_cfa_offset (-28)
-	testb	$2, %dh
-	jz	292f
-	// x is negative.  If y is an odd integer, negate the result.
-#ifdef	PIC
-	LOAD_PIC_REG (cx)
-#endif
-	fldt	24(%esp)	// y : abs(result)
-	fld	%st		// y : y : abs(result)
-	fabs			// |y| : y : abs(result)
-	fcompl	MO(p64)		// y : abs(result)
-	fnstsw
-	sahf
-	jnc	291f
-	fldl	MO(p63)		// p63 : y : abs(result)
-	fxch			// y : p63 : abs(result)
-	fprem			// y%p63 : p63 : abs(result)
-	fstp	%st(1)		// y%p63 : abs(result)
-
-	// We must find out whether y is an odd integer.
-	fld	%st		// y : y : abs(result)
-	fistpll	(%esp)		// y : abs(result)
-	fildll	(%esp)		// int(y) : y : abs(result)
-	fucompp			// abs(result)
-	fnstsw
-	sahf
-	jne	292f
-
-	// OK, the value is an integer, but is it odd?
-	popl	%eax
-	cfi_adjust_cfa_offset (-4)
-	popl	%edx
-	cfi_adjust_cfa_offset (-4)
-	andb	$1, %al
-	jz	290f		// jump if not odd
-	// It's an odd integer.
-	fchs
-290:	ret
-	cfi_adjust_cfa_offset (8)
-291:	fstp	%st(0)		// abs(result)
-292:	addl	$8, %esp
-	cfi_adjust_cfa_offset (-8)
+	addl	$36, %esp
+	cfi_adjust_cfa_offset (-36)
 	ret
 
 	// pow(x,�0) = 1
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index d7424a6..eb934e3 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -1653,6 +1653,8 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+ildouble: 4
+ldouble: 4
 
 Function: "pow_tonearest":
 ildouble: 1
@@ -1663,14 +1665,16 @@ double: 1
 float: 1
 idouble: 1
 ifloat: 1
+ildouble: 1
+ldouble: 1
 
 Function: "pow_upward":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
-ildouble: 1
-ldouble: 1
+ildouble: 2
+ldouble: 2
 
 Function: "sin":
 ildouble: 1
author	Joseph Myers <joseph@codesourcery.com>	2014-06-23 20:12:33 +0000
committer	Joseph Myers <joseph@codesourcery.com>	2014-06-23 20:12:33 +0000
commit	4da6db51880289f0bf41b39e05cf9bb1c4769c47 (patch)
tree	9f57512f4561f512366c93cf7430d5b75d4852fd /sysdeps/i386
parent	5686b236cccdc8c72788b7996537ed92ac3a3c8c (diff)
download	glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.zip glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.gz glibc-4da6db51880289f0bf41b39e05cf9bb1c4769c47.tar.bz2