aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@gmail.com>2012-09-10 11:44:49 +0200
committerAndreas Jaeger <aj@suse.de>2012-09-10 11:44:49 +0200
commit80ccd52c95bda018899d83f21c797dd0fd028512 (patch)
tree428a02774c0b6a050ce30b929e14c6250bd1173c
parent3d9b46b3500566163815747173002d3d0bbb9b2f (diff)
downloadglibc-80ccd52c95bda018899d83f21c797dd0fd028512.zip
glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.gz
glibc-80ccd52c95bda018899d83f21c797dd0fd028512.tar.bz2
Fix x86 SSE cosf, sinf issues
* sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix unwind info if defined PIC. Fix special cases description. * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise. * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix DP_HI_MASK entry. * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
-rw-r--r--ChangeLog10
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S46
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S49
-rw-r--r--sysdeps/x86_64/fpu/s_cosf.S24
-rw-r--r--sysdeps/x86_64/fpu/s_sinf.S21
5 files changed, 66 insertions, 84 deletions
diff --git a/ChangeLog b/ChangeLog
index 30a0727..e87c0a3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-09-10 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ * sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
+ unwind info if defined PIC. Fix special cases description.
+ * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.
+
+ * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
+ DP_HI_MASK entry.
+ * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
+
2012-09-07 H.J. Lu <hongjiu.lu@intel.com>
* scripts/check-local-headers.sh: Add "shopt -s nullglob".
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
index 2b5a2a5..405c6ea 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
@@ -50,25 +50,29 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
- * cos(+-0)==+-0 not raising inexact/underflow,
- * cos(subnormal) raises inexact/underflow
- * cos(min_normalized) raises inexact/underflow
- * cos(normalized) raises inexact
- * cos(Inf) = NaN, raises invalid, sets errno to EDOM
- * cos(NaN) = NaN
+ * cos(+-0) = 1 not raising inexact,
+ * cos(subnormal) raises inexact,
+ * cos(min_normalized) raises inexact,
+ * cos(normalized) raises inexact,
+ * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * cos(NaN) = NaN.
*/
#ifdef PIC
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX pushl %ebx
-# define RESTORE_BX popl %ebx
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG) pushl REG; CFI_PUSH(REG)
+# define POP(REG) popl REG; CFI_POP(REG)
+# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
# define ARG_X 8(%esp)
#else
# define MO1(symbol) L(symbol)
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN ret
# define ARG_X 4(%esp)
#endif
@@ -76,11 +80,7 @@
ENTRY(__cosf_sse2)
/* Input: single precision x on stack at address ARG_X */
-#ifdef PIC
- SAVE_BX
- LOAD_PIC_REG(bx)
-#endif
-
+ ENTRANCE
movl ARG_X, %eax /* Bits of x */
cvtss2sd ARG_X, %xmm0 /* DP x */
andl $0x7fffffff, %eax /* |x| */
@@ -143,8 +143,7 @@ L(reconstruction):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
- RESTORE_BX
- ret
+ RETURN
.p2align 4
L(sin_poly):
@@ -183,9 +182,7 @@ L(sin_poly):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
- RESTORE_BX
- ret
-
+ RETURN
.p2align 4
L(large_args):
@@ -275,7 +272,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
-
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@@ -307,8 +303,7 @@ L(epilogue):
flds 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 4(%esp), %esp
- RESTORE_BX
- ret
+ RETURN
.p2align 4
L(arg_less_2pn5):
@@ -353,7 +348,6 @@ L(skip_errno_setting):
jmp L(epilogue)
END(__cosf_sse2)
-
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
index cda1750..49d59b5 100644
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
@@ -50,25 +50,29 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
- * sin(+-0)==+-0 not raising inexact/underflow,
- * sin(subnormal) raises inexact/underflow
- * sin(min_normalized) raises inexact/underflow
- * sin(normalized) raises inexact
- * sin(Inf) = NaN, raises invalid, sets errno to EDOM
- * sin(NaN) = NaN
+ * sin(+-0) = +-0 not raising inexact/underflow,
+ * sin(subnormal) raises inexact/underflow,
+ * sin(min_normalized) raises inexact/underflow,
+ * sin(normalized) raises inexact,
+ * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * sin(NaN) = NaN.
*/
#ifdef PIC
# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX pushl %ebx
-# define RESTORE_BX popl %ebx
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG) pushl REG; CFI_PUSH(REG)
+# define POP(REG) popl REG; CFI_POP(REG)
+# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
# define ARG_X 8(%esp)
#else
# define MO1(symbol) L(symbol)
# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN ret
# define ARG_X 4(%esp)
#endif
@@ -76,11 +80,7 @@
ENTRY(__sinf_sse2)
/* Input: single precision x on stack at address ARG_X */
-#ifdef PIC
- SAVE_BX
- LOAD_PIC_REG(bx)
-#endif
-
+ ENTRANCE
movl ARG_X, %eax /* Bits of x */
cvtss2sd ARG_X, %xmm0 /* DP x */
andl $0x7fffffff, %eax /* |x| */
@@ -145,8 +145,7 @@ L(reconstruction):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
- RESTORE_BX
- ret
+ RETURN
.p2align 4
L(sin_poly):
@@ -186,9 +185,7 @@ L(sin_poly):
fldl 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 8(%esp), %esp
- RESTORE_BX
- ret
-
+ RETURN
.p2align 4
L(large_args):
@@ -281,10 +278,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
-
-
-
-
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@@ -320,8 +313,7 @@ L(epilogue):
flds 0(%esp) /* ...to FPU. */
/* Return back 4 bytes of stack frame */
lea 4(%esp), %esp
- RESTORE_BX
- ret
+ RETURN
.p2align 4
L(arg_less_2pn5):
@@ -376,7 +368,6 @@ L(skip_errno_setting):
jmp L(epilogue)
END(__sinf_sse2)
-
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
weak_alias (__sinf, sinf)
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S
index 7eeefe8..dc8c76a 100644
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ b/sysdeps/x86_64/fpu/s_cosf.S
@@ -50,12 +50,12 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
- * cos(+-0)==+-0 not raising inexact/underflow,
- * cos(subnormal) raises inexact/underflow
- * cos(min_normalized) raises inexact/underflow
- * cos(normalized) raises inexact
- * cos(Inf) = NaN, raises invalid, sets errno to EDOM
- * cos(NaN) = NaN
+ * cos(+-0) = 1 not raising inexact,
+ * cos(subnormal) raises inexact,
+ * cos(min_normalized) raises inexact,
+ * cos(normalized) raises inexact,
+ * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * cos(NaN) = NaN.
*/
.text
@@ -163,10 +163,6 @@ L(sin_poly):
cvtsd2ss %xmm3, %xmm0 /* SP result */
ret
-
-
-
-
.p2align 4
L(large_args):
/* Here if |x|>=9*Pi/4 */
@@ -257,7 +253,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
-
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@@ -317,7 +312,6 @@ L(arg_inf_or_nan):
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
- lea (%rax), %rax
movl $EDOM, (%rax)
.p2align 4
@@ -328,8 +322,6 @@ L(skip_errno_setting):
ret
END(__cosf)
-
-
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+ .type L(DP_HI_MASK), @object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S
index 295ba3d..9a6c87f 100644
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ b/sysdeps/x86_64/fpu/s_sinf.S
@@ -50,12 +50,12 @@
* 9) if x is NaN, return x-x.
*
* Special cases:
- * sin(+-0)==+-0 not raising inexact/underflow,
- * sin(subnormal) raises inexact/underflow
- * sin(min_normalized) raises inexact/underflow
- * sin(normalized) raises inexact
- * sin(Inf) = NaN, raises invalid, sets errno to EDOM
- * sin(NaN) = NaN
+ * sin(+-0) = +-0 not raising inexact/underflow,
+ * sin(subnormal) raises inexact/underflow,
+ * sin(min_normalized) raises inexact/underflow,
+ * sin(normalized) raises inexact,
+ * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ * sin(NaN) = NaN.
*/
.text
@@ -168,7 +168,6 @@ L(sin_poly):
cvtsd2ss %xmm3, %xmm0 /* SP result */
ret
-
.p2align 4
L(large_args):
/* Here if |x|>=9*Pi/4 */
@@ -262,7 +261,6 @@ L(very_large_skip2):
jmp L(reconstruction) /* end of very_large_args peth */
-
.p2align 4
L(arg_less_pio4):
/* Here if |x|<Pi/4 */
@@ -340,7 +338,6 @@ L(arg_inf_or_nan):
/* Here if x is Inf. Set errno to EDOM. */
call JUMPTARGET(__errno_location)
- lea (%rax), %rax
movl $EDOM, (%rax)
.p2align 4
@@ -351,8 +348,6 @@ L(skip_errno_setting):
ret
END(__sinf)
-
-
.section .rodata, "a"
.p2align 3
L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
.p2align 3
L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
.long 0x00000000,0xffffffff
- .type L(DP_ABS_MASK),@object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+ .type L(DP_HI_MASK),@object
+ ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
.p2align 4
L(SP_ABS_MASK): /* Mask for getting SP absolute value */