diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-10-12 11:27:51 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-10-12 11:27:51 -0400 |
commit | 0ac5ae2335292908f39031b1ea9fe8edce433c0f (patch) | |
tree | f9d26c8abc0de39d18d4c13e70f6022cdc6b461f /sysdeps/x86_64 | |
parent | a843a204a3e8a0dd53584dad3668771abaec84ac (diff) | |
download | glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.zip glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.gz glibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.bz2 |
Optimize libm
libm is now somewhat integrated with gcc's -ffinite-math-only option
and lots of the wrapper functions have been optimized.
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r-- | sysdeps/x86_64/fpu/bits/fenv.h | 35 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/bits/mathinline.h | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_exp2l.S | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_fmodl.S | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_log10l.S | 27 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_log2l.S | 25 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_logl.S | 27 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_powl.S | 34 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_remainderl.S | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_scalbl.S | 8 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_sqrt.c | 6 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/e_sqrtf.c | 6 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/math_private.h | 25 |
13 files changed, 152 insertions, 47 deletions
diff --git a/sysdeps/x86_64/fpu/bits/fenv.h b/sysdeps/x86_64/fpu/bits/fenv.h index 11859f0..be2518d 100644 --- a/sysdeps/x86_64/fpu/bits/fenv.h +++ b/sysdeps/x86_64/fpu/bits/fenv.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997,1998,1999,2000,2001,2004 Free Software Foundation, Inc. +/* Copyright (C) 1997-2001,2004,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -95,3 +95,36 @@ fenv_t; /* Floating-point environment where none of the exception is masked. */ # define FE_NOMASK_ENV ((__const fenv_t *) -2) #endif + + +#ifdef __OPTIMIZE__ +/* Optimized versions. */ +extern int __feraiseexcept_renamed (int) __asm__ ("feraiseexcept"); +__extern_inline int feraiseexcept (int __excepts) +{ + if (__builtin_constant_p (__excepts) + && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0) + { + if ((FE_INVALID & __excepts) != 0) + { + /* One example of a invalid operation is 0.0 / 0.0. */ + float __f = 0.0; + + __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f)); + (void) &__f; + } + if ((FE_DIVBYZERO & __excepts) != 0) + { + float f = 1.0; + float g = 0.0; + + __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g)); + (void) &f; + } + + return 0; + } + + return __feraiseexcept_renamed (__excepts); +} +#endif diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h index 780f878..5bdf47e 100644 --- a/sysdeps/x86_64/fpu/bits/mathinline.h +++ b/sysdeps/x86_64/fpu/bits/mathinline.h @@ -103,7 +103,8 @@ __NTH (llrint (double __x)) } # endif -# if __FINITE_MATH_ONLY__ == 1 && (__WORDSIZE == 64 || defined __SSE2_MATH__) +# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ + && (__WORDSIZE == 64 || defined __SSE2_MATH__) /* Determine maximum of two values. */ __MATH_INLINE float __NTH (fmaxf (float __x, float __y)) diff --git a/sysdeps/x86_64/fpu/e_exp2l.S b/sysdeps/x86_64/fpu/e_exp2l.S index 336b989..7abf425 100644 --- a/sysdeps/x86_64/fpu/e_exp2l.S +++ b/sysdeps/x86_64/fpu/e_exp2l.S @@ -36,3 +36,4 @@ ENTRY(__ieee754_exp2l) fldz /* Set result to 0. */ 2: ret END (__ieee754_exp2l) +strong_alias (__ieee754_exp2l, __exp2l_finite) diff --git a/sysdeps/x86_64/fpu/e_fmodl.S b/sysdeps/x86_64/fpu/e_fmodl.S index 2967bf2..07c50df 100644 --- a/sysdeps/x86_64/fpu/e_fmodl.S +++ b/sysdeps/x86_64/fpu/e_fmodl.S @@ -20,3 +20,4 @@ ENTRY(__ieee754_fmodl) fstp %st(1) ret END (__ieee754_fmodl) +strong_alias (__ieee754_fmodl, __fmodl_finite) diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S index 633234b..50c5875 100644 --- a/sysdeps/x86_64/fpu/e_log10l.S +++ b/sysdeps/x86_64/fpu/e_log10l.S @@ -10,14 +10,12 @@ #include <machine/asm.h> -RCSID("$NetBSD: $") - #ifdef __ELF__ - .section .rodata + .section .rodata.cst8,"aM",@progbits,8 #else .text #endif - .align ALIGNARG(4) + .p2align 3 ASM_TYPE_DIRECTIVE(one,@object) one: .double 1.0 ASM_SIZE_DIRECTIVE(one) @@ -30,9 +28,9 @@ limit: .double 0.29 #ifdef PIC -#define MO(op) op##(%rip) +# define MO(op) op##(%rip) #else -#define MO(op) op +# define MO(op) op #endif .text @@ -65,3 +63,20 @@ ENTRY(__ieee754_log10l) fstp %st(1) ret END(__ieee754_log10l) + + +ENTRY(__log10l_finite) + fldlg2 // log10(2) + fldt 8(%rsp) // x : log10(2) + fld %st // x : x : log10(2) +4: fsubl MO(one) // x-1 : x : log10(2) + fld %st // x-1 : x-1 : x : log10(2) + fabs // |x-1| : x-1 : x : log10(2) + fcompl MO(limit) // x-1 : x : log10(2) + fnstsw // x-1 : x : log10(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log10(2) + fyl2xp1 // log10(x) + ret +END(__log10l_finite) diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S index f04d30a..78dc2d5 100644 --- a/sysdeps/x86_64/fpu/e_log2l.S +++ b/sysdeps/x86_64/fpu/e_log2l.S @@ -10,11 +10,11 @@ #include <machine/asm.h> #ifdef __ELF__ - .section .rodata + .section .rodata.cst8,"aM",@progbits,8 #else .text #endif - .align ALIGNARG(4) + .p2align 3 ASM_TYPE_DIRECTIVE(one,@object) one: .double 1.0 ASM_SIZE_DIRECTIVE(one) @@ -27,9 +27,9 @@ limit: .double 0.29 #ifdef PIC -#define MO(op) op##(%rip) +# define MO(op) op##(%rip) #else -#define MO(op) op +# define MO(op) op #endif .text @@ -62,3 +62,20 @@ ENTRY(__ieee754_log2l) fstp %st(1) ret END (__ieee754_log2l) + + +ENTRY(__log2l_finite) + fldl MO(one) + fldt 8(%rsp) // x : 1 + fld %st // x : x : 1 + fsub %st(2), %st // x-1 : x : 1 + fld %st // x-1 : x-1 : x : 1 + fabs // |x-1| : x-1 : x : 1 + fcompl MO(limit) // x-1 : x : 1 + fnstsw // x-1 : x : 1 + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : 1 + fyl2xp1 // log(x) + ret +END (__log2l_finite) diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S index 2ba91ee..2503b9a 100644 --- a/sysdeps/x86_64/fpu/e_logl.S +++ b/sysdeps/x86_64/fpu/e_logl.S @@ -8,15 +8,13 @@ #include <machine/asm.h> -RCSID("$NetBSD: $") - #ifdef __ELF__ - .section .rodata + .section .rodata.cst8,"aM",@progbits,8 #else .text #endif - .align ALIGNARG(4) + .p2align 3 ASM_TYPE_DIRECTIVE(one,@object) one: .double 1.0 ASM_SIZE_DIRECTIVE(one) @@ -29,9 +27,9 @@ limit: .double 0.29 #ifdef PIC -#define MO(op) op##(%rip) +# define MO(op) op##(%rip) #else -#define MO(op) op +# define MO(op) op #endif .text @@ -64,3 +62,20 @@ ENTRY(__ieee754_logl) fstp %st(1) ret END (__ieee754_logl) + + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 8(%rsp) // x : log(2) + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END (__logl_finite) diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index a0b1b1d..a65c465 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -22,12 +22,27 @@ #include <machine/asm.h> #ifdef __ELF__ - .section .rodata + .section .rodata.cst8,"aM",@progbits,8 #else .text #endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) - .align ALIGNARG(4) +#ifdef __ELF__ + .section .rodata.cst16,"aM",@progbits,16 +#else + .text +#endif + .p2align 3 ASM_TYPE_DIRECTIVE(infinity,@object) inf_zero: infinity: @@ -43,21 +58,11 @@ minfinity: mzero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80 ASM_SIZE_DIRECTIVE(minf_mzero) - ASM_TYPE_DIRECTIVE(one,@object) -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - ASM_TYPE_DIRECTIVE(limit,@object) -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - ASM_TYPE_DIRECTIVE(p63,@object) -p63: - .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 - ASM_SIZE_DIRECTIVE(p63) #ifdef PIC -#define MO(op) op##(%rip) +# define MO(op) op##(%rip) #else -#define MO(op) op +# define MO(op) op #endif .text @@ -339,3 +344,4 @@ ENTRY(__ieee754_powl) ret END(__ieee754_powl) +strong_alias (__ieee754_powl, __powl_finite) diff --git a/sysdeps/x86_64/fpu/e_remainderl.S b/sysdeps/x86_64/fpu/e_remainderl.S index 480b1ca..4ee0910 100644 --- a/sysdeps/x86_64/fpu/e_remainderl.S +++ b/sysdeps/x86_64/fpu/e_remainderl.S @@ -18,3 +18,4 @@ ENTRY(__ieee754_remainderl) fstp %st(1) ret END (__ieee754_remainderl) +strong_alias (__ieee754_remainderl, __remainderl_finite) diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S index 6b22970..5833321 100644 --- a/sysdeps/x86_64/fpu/e_scalbl.S +++ b/sysdeps/x86_64/fpu/e_scalbl.S @@ -10,8 +10,6 @@ #include <machine/asm.h> -RCSID("$NetBSD: $") - #ifdef __ELF__ .section .rodata #else @@ -23,16 +21,15 @@ RCSID("$NetBSD: $") zero_nan: .double 0.0 nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f -minus_zero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80 .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f ASM_SIZE_DIRECTIVE(zero_nan) #ifdef PIC -#define MO(op) op##(%rip) +# define MO(op) op##(%rip) #else -#define MO(op) op +# define MO(op) op #endif .text @@ -98,3 +95,4 @@ ENTRY(__ieee754_scalbl) fdiv %st ret END(__ieee754_scalbl) +strong_alias (__ieee754_scalbl, __scalbl_finite) diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c index d588a8b..9912099 100644 --- a/sysdeps/x86_64/fpu/e_sqrt.c +++ b/sysdeps/x86_64/fpu/e_sqrt.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002 Free Software Foundation, Inc. + Copyright (C) 2002, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,12 +19,14 @@ #include <math_private.h> +#undef __ieee754_sqrt double __ieee754_sqrt (double x) { double res; - asm ("sqrtsd %0, %1" : "=x" (res) : "x" (x)); + asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (x)); return res; } +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c index f7801f0..dade4f5 100644 --- a/sysdeps/x86_64/fpu/e_sqrtf.c +++ b/sysdeps/x86_64/fpu/e_sqrtf.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002 Free Software Foundation, Inc. + Copyright (C) 2002, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,12 +19,14 @@ #include <math_private.h> +#undef __ieee754_sqrtf float __ieee754_sqrtf (float x) { float res; - asm ("sqrtss %0, %1" : "=x" (res) : "x" (x)); + asm ("sqrtss %1, %0" : "=x" (res) : "xm" (x)); return res; } +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 37357d3..523ec54 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -58,22 +58,35 @@ do { \ #endif #define __isnan(d) \ - ({ long int __di; EXTRACT_WORDS64 (__di, d); \ + ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \ (__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; }) #define __isnanf(d) \ - ({ int __di; GET_FLOAT_WORD (__di, d); \ + ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ (__di & 0x7fffffff) > 0x7f800000; }) #define __isinf_ns(d) \ - ({ long int __di; EXTRACT_WORDS64 (__di, d); \ + ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \ (__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; }) #define __isinf_nsf(d) \ - ({ int __di; GET_FLOAT_WORD (__di, d); \ + ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ (__di & 0x7fffffff) == 0x7f800000; }) #define __finite(d) \ - ({ long int __di; EXTRACT_WORDS64 (__di, d); \ + ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \ (__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; }) #define __finitef(d) \ - ({ int __di; GET_FLOAT_WORD (__di, d); \ + ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ (__di & 0x7fffffff) < 0x7f800000; }) + +#define __ieee754_sqrt(d) \ + ({ double __res; \ + asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) d)); \ + __res; }) +#define __ieee754_sqrtf(d) \ + ({ float __res; \ + asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) d)); \ + __res; }) +#define __ieee754_sqrtl(d) \ + ({ long double __res; \ + asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \ + __res; }) |