diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/i386/configure | 23 | ||||
-rw-r--r-- | sysdeps/i386/configure.in | 11 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/bits/mathinline.h | 39 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/math_private.h | 38 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/Makefile | 22 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/e_atan2.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/e_exp.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/e_log.c | 3 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_atan.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_sin.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/s_tan.c | 2 |
11 files changed, 91 insertions, 57 deletions
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure index ae494e2..bc7900e 100644 --- a/sysdeps/i386/configure +++ b/sysdeps/i386/configure @@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5 +$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; } +if ${libc_cv_cc_sse2avx+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_cc_sse2avx=yes +else + libc_cv_cc_sse2avx=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5 +$as_echo "$libc_cv_cc_sse2avx" >&6; } +if test $libc_cv_cc_sse2avx = yes; then + $as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h + +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5 $as_echo_n "checking for FMA4 support... " >&6; } if ${libc_cv_cc_fma4+:} false; then : diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in index 5a9840e..59a4cd6 100644 --- a/sysdeps/i386/configure.in +++ b/sysdeps/i386/configure.in @@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then AC_DEFINE(HAVE_AVX_SUPPORT) fi +dnl Check if -msse2avx works. +AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl +if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then + libc_cv_cc_sse2avx=yes +else + libc_cv_cc_sse2avx=no +fi]) +if test $libc_cv_cc_sse2avx = yes; then + AC_DEFINE(HAVE_SSE2AVX_SUPPORT) +fi + dnl Check if -mfma4 works. AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h index 1a2c1ee..038c64e 100644 --- a/sysdeps/x86_64/fpu/bits/mathinline.h +++ b/sysdeps/x86_64/fpu/bits/mathinline.h @@ -1,5 +1,5 @@ /* Inline math functions for x86-64. - Copyright (C) 2002-2004, 2007, 2009, 2011 Free Software Foundation, Inc. + Copyright (C) 2002-2004,2007,2009,2011,2012 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -64,21 +64,20 @@ __MATH_INLINE int __NTH (__signbitl (long double __x)) { __extension__ union { long double __l; int __i[3]; } __u = { __l: __x }; - return (__u.__i[2] & 0x8000) != 0; + return __u.__i[2] & 0x8000; } __END_NAMESPACE_C99 #endif -#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \ - && defined __OPTIMIZE__) +#if __GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES && defined __OPTIMIZE__ # ifdef __USE_ISOC99 __BEGIN_NAMESPACE_C99 /* Round to nearest integer. */ -# if __WORDSIZE == 64 || defined __SSE_MATH__ +# ifdef __SSE_MATH__ __MATH_INLINE long int __NTH (lrintf (float __x)) { @@ -87,7 +86,7 @@ __NTH (lrintf (float __x)) return __res; } # endif -# if __WORDSIZE == 64 || defined __SSE2_MATH__ +# ifdef __SSE2_MATH__ __MATH_INLINE long int __NTH (lrint (double __x)) { @@ -114,40 +113,64 @@ __NTH (llrint (double __x)) # endif # if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ - && (__WORDSIZE == 64 || defined __SSE2_MATH__) + && defined __SSE2_MATH__ /* Determine maximum of two values. */ __MATH_INLINE float __NTH (fmaxf (float __x, float __y)) { +# ifdef __AVX__ + float __res; + __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y)); return __x; +# endif } __MATH_INLINE double __NTH (fmax (double __x, double __y)) { +# ifdef __AVX__ + float __res; + __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y)); return __x; +# endif } /* Determine minimum of two values. */ __MATH_INLINE float __NTH (fminf (float __x, float __y)) { +# ifdef __AVX__ + float __res; + __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y)); return __x; +# endif } __MATH_INLINE double __NTH (fmin (double __x, double __y)) { +# ifdef __AVX__ + float __res; + __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); + return __res; +# else __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y)); return __x; +# endif } # endif __END_NAMESPACE_C99 # endif -# if defined __SSE4_1__ && (__WORDSIZE == 64 || defined __SSE2_MATH__) +# if defined __SSE4_1__ && defined __SSE2_MATH__ # if defined __USE_MISC || defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 __BEGIN_NAMESPACE_C99 diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 50f765f..8e79718 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -19,7 +19,7 @@ /* We can do a few things better on x86-64. */ -#ifdef __AVX__ +#if defined __AVX__ || defined SSE2AVX # define MOVD "vmovd" # define STMXCSR "vstmxcsr" # define LDMXCSR "vldmxcsr" @@ -90,7 +90,7 @@ ({ int __di; GET_FLOAT_WORD (__di, (float) d); \ (__di & 0x7fffffff) < 0x7f800000; }) -#ifdef __AVX__ +#if defined __AVX__ || defined SSE2AVX # define __ieee754_sqrt(d) \ ({ double __res; \ asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ @@ -116,7 +116,7 @@ #ifdef __SSE4_1__ # ifndef __rint -# ifdef __AVX__ +# if defined __AVX__ || defined SSE2AVX # define __rint(d) \ ({ double __res; \ asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ @@ -129,7 +129,7 @@ # endif # endif # ifndef __rintf -# ifdef __AVX__ +# if defined __AVX__ || defined SSE2AVX # define __rintf(d) \ ({ float __res; \ asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ @@ -143,7 +143,7 @@ # endif # ifndef __floor -# ifdef __AVX__ +# if defined __AVX__ || defined SSE2AVX # define __floor(d) \ ({ double __res; \ asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ @@ -156,7 +156,7 @@ # endif # endif # ifndef __floorf -# ifdef __AVX__ +# if defined __AVX__ || defined SSE2AVX # define __floorf(d) \ ({ float __res; \ asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ @@ -173,29 +173,6 @@ /* Specialized variants of the <fenv.h> interfaces which only handle either the FPU or the SSE unit. */ -#undef libc_fegetround -#define libc_fegetround() \ - ({ \ - unsigned int mxcsr; \ - asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ - (mxcsr & 0x6000) >> 3; \ - }) -#undef libc_fegetroundf -#define libc_fegetroundf() libc_fegetround () -// #define libc_fegetroundl() fegetround () - -#undef libc_fesetround -#define libc_fesetround(r) \ - do { \ - unsigned int mxcsr; \ - asm (STMXCSR " %0" : "=m" (*&mxcsr)); \ - mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \ - asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \ - } while (0) -#undef libc_fesetroundf -#define libc_fesetroundf(r) libc_fesetround (r) -// #define libc_fesetroundl(r) (void) fesetround (r) - #undef libc_feholdexcept #define libc_feholdexcept(e) \ do { \ @@ -224,7 +201,8 @@ #undef libc_fetestexcept #define libc_fetestexcept(e) \ - ({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ + ({ unsigned int mxcsr; \ + asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ mxcsr & (e) & FE_ALL_EXCEPT; }) #undef libc_fetestexceptf #define libc_fetestexceptf(e) libc_fetestexcept (e) diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index be68903..4b5c173 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 endif -ifeq ($(config-cflags-avx),yes) +ifeq ($(config-cflags-sse2avx),yes) libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ e_atan2-avx s_sin-avx s_tan-avx \ mplog-avx mpa-avx slowexp-avx \ mpexp-avx -CFLAGS-e_atan2-avx.c = -mavx -CFLAGS-e_exp-avx.c = -mavx -CFLAGS-e_log-avx.c = -mavx -CFLAGS-mpa-avx.c = -mavx -CFLAGS-mpexp-avx.c = -mavx -CFLAGS-mplog-avx.c = -mavx -CFLAGS-s_atan-avx.c = -mavx -CFLAGS-s_sin-avx.c = -mavx -CFLAGS-slowexp-avx.c = -mavx -CFLAGS-s_tan-avx.c = -mavx +CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX +CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX +CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX +CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX +CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX +CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX +CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX +CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX +CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX +CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX endif endif diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 3a615fc..6867c6e 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double); libm_ifunc (__ieee754_atan2, HAS_FMA4 ? __ieee754_atan2_fma4 - : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); strong_alias (__ieee754_atan2, __atan2_finite) # define __ieee754_atan2 __ieee754_atan2_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index 7b2320a..3c65028 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double); libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 - : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); + : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); strong_alias (__ieee754_exp, __exp_finite) # define __ieee754_exp __ieee754_exp_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index ab277d6..05f3668 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double); libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 - : (HAS_YMM_USABLE ? __ieee754_log_avx - : __ieee754_log_sse2)); + : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); strong_alias (__ieee754_log, __log_finite) # define __ieee754_log __ieee754_log_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index 78c7e09..ae16d7c 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -13,7 +13,7 @@ extern double __atan_fma4 (double); # endif libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : - HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); + HAS_AVX ? __atan_avx : __atan_sse2)); # define atan __atan_sse2 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index 417acd0..a0c2521 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -18,11 +18,11 @@ extern double __sin_fma4 (double); # endif libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : - HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); + HAS_AVX ? __cos_avx : __cos_sse2)); weak_alias (__cos, cos) libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : - HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); + HAS_AVX ? __sin_avx : __sin_sse2)); weak_alias (__sin, sin) # define __cos __cos_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index 3047155..904308f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -13,7 +13,7 @@ extern double __tan_fma4 (double); # endif libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : - HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); + HAS_AVX ? __tan_avx : __tan_sse2)); # define tan __tan_sse2 #endif |