From 1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Thu, 25 May 2017 21:51:27 +0000 Subject: re PR libfortran/78379 (Processor-specific versions for matmul) 2017-05-25 Thomas Koenig PR libfortran/78379 * Makefile.am: Add generated/matmulavx128_*.c files. Handle them for compiling and setting the right flags. * acinclude.m4: Add tests for FMA3, FMA4 and AVX128. * configure.ac: Call them. * Makefile.in: Regenerated. * config.h.in: Regenerated. * configure: Regenerated. * m4/matmul.m4: Handle AMD chips by calling 128-bit AVX versions which use FMA3 or FMA4. * m4/matmulavx128.m4: New file. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: New file. * generated/matmulavx128_c16.c: New file. * generated/matmulavx128_c4.c: New file. * generated/matmulavx128_c8.c: New file. * generated/matmulavx128_i1.c: New file. * generated/matmulavx128_i16.c: New file. * generated/matmulavx128_i2.c: New file. * generated/matmulavx128_i4.c: New file. * generated/matmulavx128_i8.c: New file. * generated/matmulavx128_r10.c: New file. * generated/matmulavx128_r16.c: New file. * generated/matmulavx128_r4.c: New file. * generated/matmulavx128_r8.c: New file. From-SVN: r248472 --- libgfortran/generated/matmul_i2.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'libgfortran/generated/matmul_i2.c') diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c index 93316cb..d541fa3 100644 --- a/libgfortran/generated/matmul_i2.c +++ b/libgfortran/generated/matmul_i2.c @@ -1734,6 +1734,24 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, #endif /* HAVE_AVX512F */ +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, + gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_i2_avx128_fma3); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, + gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_i2_avx128_fma4); +#endif + /* Function to fall back to if there is no special processor-specific version. */ static void matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, @@ -2332,6 +2350,26 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray, } #endif /* HAVE_AVX */ } + else if (__cpu_model.__cpu_vendor == VENDOR_AMD) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) + && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) + { + matmul_fn = matmul_i2_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) + && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4))) + { + matmul_fn = matmul_i2_avx128_fma4; + goto store; + } +#endif + + } store: __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } -- cgit v1.1