diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2014-07-14 07:58:27 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2014-07-14 07:58:27 -0700 |
commit | f2fef657d8736c32fb600771949f59852558b11a (patch) | |
tree | a9e954223fa5cbd9e53f143e0f50c6f6276ebe0c | |
parent | f6c44d475104e931bab2b4ffa499961088de673c (diff) | |
download | glibc-f2fef657d8736c32fb600771949f59852558b11a.zip glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.gz glibc-f2fef657d8736c32fb600771949f59852558b11a.tar.bz2 |
Enable AVX2 optimized memset only if -mavx2 works
* config.h.in (HAVE_AVX2_SUPPORT): New #undef.
* sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and
config-cflags-avx2.
* sysdeps/x86_64/configure.ac: Likewise.
* sysdeps/i386/configure: Regenerated.
* sysdeps/x86_64/configure: Likewise.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
memset-avx2 only if config-cflags-avx2 is yes.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is
defined.
* sysdeps/x86_64/multiarch/memset.S: Define multiple versions
only if HAVE_AVX2_SUPPORT is defined.
* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | config.h.in | 3 | ||||
-rw-r--r-- | sysdeps/i386/configure | 26 | ||||
-rw-r--r-- | sysdeps/i386/configure.ac | 9 | ||||
-rw-r--r-- | sysdeps/x86_64/configure | 26 | ||||
-rw-r--r-- | sysdeps/x86_64/configure.ac | 9 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/Makefile | 7 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/ifunc-impl-list.c | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset.S | 24 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/memset_chk.S | 2 |
10 files changed, 111 insertions, 14 deletions
@@ -1,3 +1,20 @@ +2014-07-14 H.J. Lu <hongjiu.lu@intel.com> + + * config.h.in (HAVE_AVX2_SUPPORT): New #undef. + * sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and + config-cflags-avx2. + * sysdeps/x86_64/configure.ac: Likewise. + * sysdeps/i386/configure: Regenerated. + * sysdeps/x86_64/configure: Likewise. + * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add + memset-avx2 only if config-cflags-avx2 is yes. + * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): + Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is + defined. + * sysdeps/x86_64/multiarch/memset.S: Define multiple versions + only if HAVE_AVX2_SUPPORT is defined. + * sysdeps/x86_64/multiarch/memset_chk.S: Likewise. + 2014-07-14 Alan Modra <amodra@gmail.com> [BZ #17153] diff --git a/config.h.in b/config.h.in index 2dcd135..97b5571 100644 --- a/config.h.in +++ b/config.h.in @@ -103,6 +103,9 @@ /* Define if gcc supports FMA4. */ #undef HAVE_FMA4_SUPPORT +/* Define if gcc supports AVX2. */ +#undef HAVE_AVX2_SUPPORT + /* Define if the compiler\'s exception support is based on libunwind. */ #undef HAVE_CC_WITH_LIBUNWIND diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure index f0a20e3..6e89b59 100644 --- a/sysdeps/i386/configure +++ b/sysdeps/i386/configure @@ -240,6 +240,32 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; } config_vars="$config_vars config-cflags-novzeroupper = $libc_cv_cc_novzeroupper" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5 +$as_echo_n "checking for AVX2 support... " >&6; } +if ${libc_cv_cc_avx2+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + libc_cv_cc_avx2=yes +else + libc_cv_cc_avx2=no +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5 +$as_echo "$libc_cv_cc_avx2" >&6; } +if test $libc_cv_cc_avx2 = yes; then + $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h + +fi +config_vars="$config_vars +config-cflags-avx2 = $libc_cv_cc_avx2" + $as_echo "#define USE_REGPARMS 1" >>confdefs.h diff --git a/sysdeps/i386/configure.ac b/sysdeps/i386/configure.ac index dfe0b47..35c4522 100644 --- a/sysdeps/i386/configure.ac +++ b/sysdeps/i386/configure.ac @@ -88,6 +88,15 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper], ]) LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper]) +dnl Check if -mavx2 works. +AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl +LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no]) +]) +if test $libc_cv_cc_avx2 = yes; then + AC_DEFINE(HAVE_AVX2_SUPPORT) +fi +LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2]) + AC_DEFINE(USE_REGPARMS) dnl It is always possible to access static and hidden symbols in an diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index 45d868d..7d4dadd 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -249,6 +249,32 @@ if test $libc_cv_asm_mpx == yes; then fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5 +$as_echo_n "checking for AVX2 support... " >&6; } +if ${libc_cv_cc_avx2+:} false; then : + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + libc_cv_cc_avx2=yes +else + libc_cv_cc_avx2=no +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5 +$as_echo "$libc_cv_cc_avx2" >&6; } +if test $libc_cv_cc_avx2 = yes; then + $as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h + +fi +config_vars="$config_vars +config-cflags-avx2 = $libc_cv_cc_avx2" + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h # work around problem with autoconf and empty lines at the end of files diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index 9138f63..c9f9a51 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -90,6 +90,15 @@ if test $libc_cv_asm_mpx == yes; then AC_DEFINE(HAVE_MPX_SUPPORT) fi +dnl Check if -mavx2 works. +AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl +LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no]) +]) +if test $libc_cv_cc_avx2 = yes; then + AC_DEFINE(HAVE_AVX2_SUPPORT) +fi +LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2]) + dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 42df96f..3bb9702 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -17,8 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \ - memset-avx2 + strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c varshift @@ -27,6 +26,10 @@ CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strspn-c.c += -msse4 endif + +ifeq (yes,$(config-cflags-avx2)) +sysdep_routines += memset-avx2 +endif endif ifeq ($(subdir),wcsmbs) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index f1593c5..7e93e59 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) +#ifdef HAVE_AVX2_SUPPORT /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ IFUNC_IMPL (i, name, __memset_chk, IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2) @@ -71,6 +72,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2) IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2)) +#endif /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S index 3113d1c..00d46d1 100644 --- a/sysdeps/x86_64/multiarch/memset.S +++ b/sysdeps/x86_64/multiarch/memset.S @@ -17,12 +17,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#ifdef HAVE_AVX2_SUPPORT #include <sysdep.h> #include <shlib-compat.h> #include <init-arch.h> /* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc +# ifndef NOT_IN_libc ENTRY(memset) .type memset, @gnu_indirect_function cmpl $0, __cpu_features+KIND_OFFSET(%rip) @@ -34,26 +35,27 @@ ENTRY(memset) leaq __memset_avx2(%rip), %rax 2: ret END(memset) -#endif +# endif -#if !defined NOT_IN_libc -# undef memset -# define memset __memset_sse2 +# if !defined NOT_IN_libc +# undef memset +# define memset __memset_sse2 -# undef __memset_chk -# define __memset_chk __memset_chk_sse2 +# undef __memset_chk +# define __memset_chk __memset_chk_sse2 -# ifdef SHARED +# ifdef SHARED # undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal memset calls through a PLT. The speedup we get from using GPR instruction is likely eaten away by the indirect call in the PLT. */ # define libc_hidden_builtin_def(name) \ .globl __GI_memset; __GI_memset = __memset_sse2 -# endif +# endif -# undef strong_alias -# define strong_alias(original, alias) +# undef strong_alias +# define strong_alias(original, alias) +# endif #endif #include "../memset.S" diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S index 2182780..8a607bd 100644 --- a/sysdeps/x86_64/multiarch/memset_chk.S +++ b/sysdeps/x86_64/multiarch/memset_chk.S @@ -22,7 +22,7 @@ /* Define multiple versions only for the definition in lib. */ #ifndef NOT_IN_libc -# ifdef SHARED +# if defined SHARED && defined HAVE_AVX2_SUPPORT ENTRY(__memset_chk) .type __memset_chk, @gnu_indirect_function cmpl $0, __cpu_features+KIND_OFFSET(%rip) |