From 1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Thu, 25 May 2017 21:51:27 +0000 Subject: re PR libfortran/78379 (Processor-specific versions for matmul) 2017-05-25 Thomas Koenig PR libfortran/78379 * Makefile.am: Add generated/matmulavx128_*.c files. Handle them for compiling and setting the right flags. * acinclude.m4: Add tests for FMA3, FMA4 and AVX128. * configure.ac: Call them. * Makefile.in: Regenerated. * config.h.in: Regenerated. * configure: Regenerated. * m4/matmul.m4: Handle AMD chips by calling 128-bit AVX versions which use FMA3 or FMA4. * m4/matmulavx128.m4: New file. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: New file. * generated/matmulavx128_c16.c: New file. * generated/matmulavx128_c4.c: New file. * generated/matmulavx128_c8.c: New file. * generated/matmulavx128_i1.c: New file. * generated/matmulavx128_i16.c: New file. * generated/matmulavx128_i2.c: New file. * generated/matmulavx128_i4.c: New file. * generated/matmulavx128_i8.c: New file. * generated/matmulavx128_r10.c: New file. * generated/matmulavx128_r16.c: New file. * generated/matmulavx128_r4.c: New file. * generated/matmulavx128_r8.c: New file. From-SVN: r248472 --- libgfortran/configure | 103 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) (limited to 'libgfortran/configure') diff --git a/libgfortran/configure b/libgfortran/configure index 81238fc..36e0155 100755 --- a/libgfortran/configure +++ b/libgfortran/configure @@ -606,6 +606,8 @@ am__EXEEXT_TRUE LTLIBOBJS LIBOBJS get_gcc_base_ver +HAVE_AVX128_FALSE +HAVE_AVX128_TRUE IEEE_FLAGS IEEE_SUPPORT IEEE_SUPPORT_FALSE @@ -12421,7 +12423,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12424 "configure" +#line 12426 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12527,7 +12529,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12530 "configure" +#line 12532 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -26363,6 +26365,99 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$ac_save_CFLAGS" +# Check for FMA3 extensions + + ac_save_CFLAGS="$CFLAGS" + CFLAGS="-O2 -mfma -mno-fma4" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + float + flt_mul_add (float a, float b, float c) + { + return __builtin_fmaf (a, b, c); + } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_FMA3 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$ac_save_CFLAGS" + + +# Check for FMA4 extensions + + ac_save_CFLAGS="$CFLAGS" + CFLAGS="-O2 -mfma4 -mno-fma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + float + flt_mul_add (float a, float b, float c) + { + return __builtin_fmaf (a, b, c); + } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_FMA4 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$ac_save_CFLAGS" + + +# Check if AVX128 works + + ac_save_CFLAGS="$CFLAGS" + CFLAGS="-O2 -mavx -mprefer-avx128" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + void foo() + { + } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_AVX128 1" >>confdefs.h + + if true; then + HAVE_AVX128_TRUE= + HAVE_AVX128_FALSE='#' +else + HAVE_AVX128_TRUE='#' + HAVE_AVX128_FALSE= +fi + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$ac_save_CFLAGS" + + # Determine what GCC version number to use in filesystem paths. get_gcc_base_ver="cat" @@ -26615,6 +26710,10 @@ if test -z "${IEEE_SUPPORT_TRUE}" && test -z "${IEEE_SUPPORT_FALSE}"; then as_fn_error "conditional \"IEEE_SUPPORT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_AVX128_TRUE}" && test -z "${HAVE_AVX128_FALSE}"; then + as_fn_error "conditional \"HAVE_AVX128\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : ${CONFIG_STATUS=./config.status} ac_write_fail=0 -- cgit v1.1