Optimize libm

libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized.
author: Ulrich Drepper <drepper@gmail.com> 2011-10-12 11:27:51 -0400
committer: Ulrich Drepper <drepper@gmail.com> 2011-10-12 11:27:51 -0400
commit: 0ac5ae2335292908f39031b1ea9fe8edce433c0f (patch)
tree: f9d26c8abc0de39d18d4c13e70f6022cdc6b461f /sysdeps/x86_64
parent: a843a204a3e8a0dd53584dad3668771abaec84ac (diff)
download: glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.zip
glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.gz
glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.bz2
13 files changed, 152 insertions, 47 deletions
diff --git a/sysdeps/x86_64/fpu/bits/fenv.h b/sysdeps/x86_64/fpu/bits/fenv.h
index 11859f0..be2518d 100644
--- a/sysdeps/x86_64/fpu/bits/fenv.h
+++ b/sysdeps/x86_64/fpu/bits/fenv.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997,1998,1999,2000,2001,2004 Free Software Foundation, Inc.
+/* Copyright (C) 1997-2001,2004,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -95,3 +95,36 @@ fenv_t;
 /* Floating-point environment where none of the exception is masked.  */
 # define FE_NOMASK_ENV	((__const fenv_t *) -2)
 #endif
+
+
+#ifdef __OPTIMIZE__
+/* Optimized versions.  */
+extern int __feraiseexcept_renamed (int) __asm__ ("feraiseexcept");
+__extern_inline int feraiseexcept (int __excepts)
+{
+  if (__builtin_constant_p (__excepts)
+      && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0)
+    {
+      if ((FE_INVALID & __excepts) != 0)
+	{
+	  /* One example of a invalid operation is 0.0 / 0.0.  */
+	  float __f = 0.0;
+
+	  __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+	  (void) &__f;
+	}
+      if ((FE_DIVBYZERO & __excepts) != 0)
+	{
+	  float f = 1.0;
+	  float g = 0.0;
+
+	  __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
+	  (void) &f;
+	}
+
+      return 0;
+    }
+
+  return __feraiseexcept_renamed (__excepts);
+}
+#endif
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 780f878..5bdf47e 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -103,7 +103,8 @@ __NTH (llrint (double __x))
 }
 #  endif
 
-#  if __FINITE_MATH_ONLY__ == 1 && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+#  if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
+      && (__WORDSIZE == 64 || defined __SSE2_MATH__)
 /* Determine maximum of two values.  */
 __MATH_INLINE float
 __NTH (fmaxf (float __x, float __y))
diff --git a/sysdeps/x86_64/fpu/e_exp2l.S b/sysdeps/x86_64/fpu/e_exp2l.S
index 336b989..7abf425 100644
--- a/sysdeps/x86_64/fpu/e_exp2l.S
+++ b/sysdeps/x86_64/fpu/e_exp2l.S
@@ -36,3 +36,4 @@ ENTRY(__ieee754_exp2l)
 	fldz				/* Set result to 0.  */
 2:	ret
 END (__ieee754_exp2l)
+strong_alias (__ieee754_exp2l, __exp2l_finite)
diff --git a/sysdeps/x86_64/fpu/e_fmodl.S b/sysdeps/x86_64/fpu/e_fmodl.S
index 2967bf2..07c50df 100644
--- a/sysdeps/x86_64/fpu/e_fmodl.S
+++ b/sysdeps/x86_64/fpu/e_fmodl.S
@@ -20,3 +20,4 @@ ENTRY(__ieee754_fmodl)
 	fstp	%st(1)
 	ret
 END (__ieee754_fmodl)
+strong_alias (__ieee754_fmodl, __fmodl_finite)
diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S
index 633234b..50c5875 100644
--- a/sysdeps/x86_64/fpu/e_log10l.S
+++ b/sysdeps/x86_64/fpu/e_log10l.S
@@ -10,14 +10,12 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: $")
-
 #ifdef __ELF__
-	.section .rodata
+	.section .rodata.cst8,"aM",@progbits,8
 #else
 	.text
 #endif
-	.align ALIGNARG(4)
+	.p2align 3
 	ASM_TYPE_DIRECTIVE(one,@object)
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
@@ -30,9 +28,9 @@ limit:	.double 0.29
 
 
 #ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
 #else
-#define MO(op) op
+# define MO(op) op
 #endif
 
 	.text
@@ -65,3 +63,20 @@ ENTRY(__ieee754_log10l)
 	fstp	%st(1)
 	ret
 END(__ieee754_log10l)
+
+
+ENTRY(__log10l_finite)
+	fldlg2			// log10(2)
+	fldt	8(%rsp)		// x : log10(2)
+	fld	%st		// x : x : log10(2)
+4:	fsubl	MO(one)		// x-1 : x : log10(2)
+	fld	%st		// x-1 : x-1 : x : log10(2)
+	fabs			// |x-1| : x-1 : x : log10(2)
+	fcompl	MO(limit)	// x-1 : x : log10(2)
+	fnstsw			// x-1 : x : log10(2)
+	andb	$0x45, %ah
+	jz	2b
+	fstp	%st(1)		// x-1 : log10(2)
+	fyl2xp1			// log10(x)
+	ret
+END(__log10l_finite)
diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S
index f04d30a..78dc2d5 100644
--- a/sysdeps/x86_64/fpu/e_log2l.S
+++ b/sysdeps/x86_64/fpu/e_log2l.S
@@ -10,11 +10,11 @@
 #include <machine/asm.h>
 
 #ifdef __ELF__
-	.section .rodata
+	.section .rodata.cst8,"aM",@progbits,8
 #else
 	.text
 #endif
-	.align ALIGNARG(4)
+	.p2align 3
 	ASM_TYPE_DIRECTIVE(one,@object)
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
@@ -27,9 +27,9 @@ limit:	.double 0.29
 
 
 #ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
 #else
-#define MO(op) op
+# define MO(op) op
 #endif
 
 	.text
@@ -62,3 +62,20 @@ ENTRY(__ieee754_log2l)
 	fstp	%st(1)
 	ret
 END (__ieee754_log2l)
+
+
+ENTRY(__log2l_finite)
+	fldl	MO(one)
+	fldt	8(%rsp)		// x : 1
+	fld	%st		// x : x : 1
+	fsub	%st(2), %st	// x-1 : x : 1
+	fld	%st		// x-1 : x-1 : x : 1
+	fabs			// |x-1| : x-1 : x : 1
+	fcompl	MO(limit)	// x-1 : x : 1
+	fnstsw			// x-1 : x : 1
+	andb	$0x45, %ah
+	jz	2b
+	fstp	%st(1)		// x-1 : 1
+	fyl2xp1			// log(x)
+	ret
+END (__log2l_finite)
diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S
index 2ba91ee..2503b9a 100644
--- a/sysdeps/x86_64/fpu/e_logl.S
+++ b/sysdeps/x86_64/fpu/e_logl.S
@@ -8,15 +8,13 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: $")
-
 
 #ifdef __ELF__
-	.section .rodata
+	.section .rodata.cst8,"aM",@progbits,8
 #else
 	.text
 #endif
-	.align ALIGNARG(4)
+	.p2align 3
 	ASM_TYPE_DIRECTIVE(one,@object)
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
@@ -29,9 +27,9 @@ limit:	.double 0.29
 
 
 #ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
 #else
-#define MO(op) op
+# define MO(op) op
 #endif
 
 	.text
@@ -64,3 +62,20 @@ ENTRY(__ieee754_logl)
 	fstp	%st(1)
 	ret
 END (__ieee754_logl)
+
+
+ENTRY(__logl_finite)
+	fldln2			// log(2)
+	fldt	8(%rsp)		// x : log(2)
+	fld	%st		// x : x : log(2)
+	fsubl	MO(one)		// x-1 : x : log(2)
+	fld	%st		// x-1 : x-1 : x : log(2)
+	fabs			// |x-1| : x-1 : x : log(2)
+	fcompl	MO(limit)	// x-1 : x : log(2)
+	fnstsw			// x-1 : x : log(2)
+	andb	$0x45, %ah
+	jz	2b
+	fstp	%st(1)		// x-1 : log(2)
+	fyl2xp1			// log(x)
+	ret
+END (__logl_finite)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index a0b1b1d..a65c465 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -22,12 +22,27 @@
 #include <machine/asm.h>
 
 #ifdef __ELF__
-	.section .rodata
+	.section .rodata.cst8,"aM",@progbits,8
 #else
 	.text
 #endif
+	.p2align 3
+	ASM_TYPE_DIRECTIVE(one,@object)
+one:	.double 1.0
+	ASM_SIZE_DIRECTIVE(one)
+	ASM_TYPE_DIRECTIVE(limit,@object)
+limit:	.double 0.29
+	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
 
-	.align ALIGNARG(4)
+#ifdef __ELF__
+	.section .rodata.cst16,"aM",@progbits,16
+#else
+	.text
+#endif
+	.p2align 3
 	ASM_TYPE_DIRECTIVE(infinity,@object)
 inf_zero:
 infinity:
@@ -43,21 +58,11 @@ minfinity:
 mzero:
 	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
 	ASM_SIZE_DIRECTIVE(minf_mzero)
-	ASM_TYPE_DIRECTIVE(one,@object)
-one:	.double 1.0
-	ASM_SIZE_DIRECTIVE(one)
-	ASM_TYPE_DIRECTIVE(limit,@object)
-limit:	.double 0.29
-	ASM_SIZE_DIRECTIVE(limit)
-	ASM_TYPE_DIRECTIVE(p63,@object)
-p63:
-	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
-	ASM_SIZE_DIRECTIVE(p63)
 
 #ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
 #else
-#define MO(op) op
+# define MO(op) op
 #endif
 
 	.text
@@ -339,3 +344,4 @@ ENTRY(__ieee754_powl)
 	ret
 
 END(__ieee754_powl)
+strong_alias (__ieee754_powl, __powl_finite)
diff --git a/sysdeps/x86_64/fpu/e_remainderl.S b/sysdeps/x86_64/fpu/e_remainderl.S
index 480b1ca..4ee0910 100644
--- a/sysdeps/x86_64/fpu/e_remainderl.S
+++ b/sysdeps/x86_64/fpu/e_remainderl.S
@@ -18,3 +18,4 @@ ENTRY(__ieee754_remainderl)
 	fstp	%st(1)
 	ret
 END (__ieee754_remainderl)
+strong_alias (__ieee754_remainderl, __remainderl_finite)
diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S
index 6b22970..5833321 100644
--- a/sysdeps/x86_64/fpu/e_scalbl.S
+++ b/sysdeps/x86_64/fpu/e_scalbl.S
@@ -10,8 +10,6 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: $")
-
 #ifdef __ELF__
 	.section .rodata
 #else
@@ -23,16 +21,15 @@ RCSID("$NetBSD: $")
 zero_nan:
 	.double 0.0
 nan:	.byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
-minus_zero:
 	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
 	.byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
 	ASM_SIZE_DIRECTIVE(zero_nan)
 
 
 #ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
 #else
-#define MO(op) op
+# define MO(op) op
 #endif
 
 	.text
@@ -98,3 +95,4 @@ ENTRY(__ieee754_scalbl)
 	fdiv	%st
 	ret
 END(__ieee754_scalbl)
+strong_alias (__ieee754_scalbl, __scalbl_finite)
diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c
index d588a8b..9912099 100644
--- a/sysdeps/x86_64/fpu/e_sqrt.c
+++ b/sysdeps/x86_64/fpu/e_sqrt.c
@@ -1,5 +1,5 @@
 /* Square root of floating point number.
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,12 +19,14 @@
 
 #include <math_private.h>
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
   double res;
 
-  asm ("sqrtsd %0, %1" : "=x" (res) : "x" (x));
+  asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (x));
 
   return res;
 }
+strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c
index f7801f0..dade4f5 100644
--- a/sysdeps/x86_64/fpu/e_sqrtf.c
+++ b/sysdeps/x86_64/fpu/e_sqrtf.c
@@ -1,5 +1,5 @@
 /* Square root of floating point number.
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,12 +19,14 @@
 
 #include <math_private.h>
 
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
   float res;
 
-  asm ("sqrtss %0, %1" : "=x" (res) : "x" (x));
+  asm ("sqrtss %1, %0" : "=x" (res) : "xm" (x));
 
   return res;
 }
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 37357d3..523ec54 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -58,22 +58,35 @@ do {								\
 #endif
 
 #define __isnan(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, d);				      \
+  ({ long int __di; EXTRACT_WORDS64 (__di, (double) d);			      \
      (__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })
 #define __isnanf(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, d);				      \
+  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
      (__di & 0x7fffffff) > 0x7f800000; })
 
 #define __isinf_ns(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, d);				      \
+  ({ long int __di; EXTRACT_WORDS64 (__di, (double) d);			      \
      (__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })
 #define __isinf_nsf(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, d);				      \
+  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
      (__di & 0x7fffffff) == 0x7f800000; })
 
 #define __finite(d) \
-  ({ long int __di; EXTRACT_WORDS64 (__di, d);				      \
+  ({ long int __di; EXTRACT_WORDS64 (__di, (double) d);			      \
      (__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })
 #define __finitef(d) \
-  ({ int __di; GET_FLOAT_WORD (__di, d);				      \
+  ({ int __di; GET_FLOAT_WORD (__di, (float) d);			      \
      (__di & 0x7fffffff) < 0x7f800000; })
+
+#define __ieee754_sqrt(d) \
+  ({ double __res;							      \
+     asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) d));		      \
+     __res; })
+#define __ieee754_sqrtf(d) \
+  ({ float __res;							      \
+     asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) d));		      \
+     __res; })
+#define __ieee754_sqrtl(d) \
+  ({ long double __res;							      \
+     asm ("fsqrt" : "=t" (__res) : "0" ((long double) d));		      \
+     __res; })
author	Ulrich Drepper <drepper@gmail.com>	2011-10-12 11:27:51 -0400
committer	Ulrich Drepper <drepper@gmail.com>	2011-10-12 11:27:51 -0400
commit	0ac5ae2335292908f39031b1ea9fe8edce433c0f (patch)
tree	f9d26c8abc0de39d18d4c13e70f6022cdc6b461f /sysdeps/x86_64
parent	a843a204a3e8a0dd53584dad3668771abaec84ac (diff)
download	glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.zip glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.gz glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.bz2