diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2017-05-25 21:51:27 +0000 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2017-05-25 21:51:27 +0000 |
commit | 1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03 (patch) | |
tree | 3cd241e469094809d6ceb3aa1a14f6841627e681 /libgfortran/m4 | |
parent | 87e1e6036ef93b18b1450357488ee907db880f37 (diff) | |
download | gcc-1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03.zip gcc-1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03.tar.gz gcc-1d5cf7fcf28c0b7d44fab10d26fc450e9d962f03.tar.bz2 |
re PR libfortran/78379 (Processor-specific versions for matmul)
2017-05-25 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/78379
* Makefile.am: Add generated/matmulavx128_*.c files.
Handle them for compiling and setting the right flags.
* acinclude.m4: Add tests for FMA3, FMA4 and AVX128.
* configure.ac: Call them.
* Makefile.in: Regenerated.
* config.h.in: Regenerated.
* configure: Regenerated.
* m4/matmul.m4: Handle AMD chips by calling 128-bit AVX
versions which use FMA3 or FMA4.
* m4/matmulavx128.m4: New file.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
* generated/matmulavx128_c10.c: New file.
* generated/matmulavx128_c16.c: New file.
* generated/matmulavx128_c4.c: New file.
* generated/matmulavx128_c8.c: New file.
* generated/matmulavx128_i1.c: New file.
* generated/matmulavx128_i16.c: New file.
* generated/matmulavx128_i2.c: New file.
* generated/matmulavx128_i4.c: New file.
* generated/matmulavx128_i8.c: New file.
* generated/matmulavx128_r10.c: New file.
* generated/matmulavx128_r16.c: New file.
* generated/matmulavx128_r4.c: New file.
* generated/matmulavx128_r8.c: New file.
From-SVN: r248472
Diffstat (limited to 'libgfortran/m4')
-rw-r--r-- | libgfortran/m4/matmul.m4 | 40 | ||||
-rw-r--r-- | libgfortran/m4/matmulavx128.m4 | 67 |
2 files changed, 107 insertions, 0 deletions
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4 index 7976fda..c2f6415 100644 --- a/libgfortran/m4/matmul.m4 +++ b/libgfortran/m4/matmul.m4 @@ -106,6 +106,26 @@ static' include(matmul_internal.m4)dnl static' include(matmul_internal.m4)dnl `#endif /* HAVE_AVX512F */ +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl +`void +'matmul_name` ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto('matmul_name`); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl +`void +'matmul_name` ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto('matmul_name`); +#endif + /* Function to fall back to if there is no special processor-specific version. */ 'define(`matmul_name',`matmul_'rtype_code`_vanilla')dnl `static' include(matmul_internal.m4)dnl @@ -161,6 +181,26 @@ void matmul_'rtype_code` ('rtype` * const restrict retarray, } #endif /* HAVE_AVX */ } + else if (__cpu_model.__cpu_vendor == VENDOR_AMD) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) + && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA))) + { + matmul_fn = matmul_'rtype_code`_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX)) + && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4))) + { + matmul_fn = matmul_'rtype_code`_avx128_fma4; + goto store; + } +#endif + + } store: __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); } diff --git a/libgfortran/m4/matmulavx128.m4 b/libgfortran/m4/matmulavx128.m4 new file mode 100644 index 0000000..1417284 --- /dev/null +++ b/libgfortran/m4/matmulavx128.m4 @@ -0,0 +1,67 @@ +`/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h>' + +include(iparm.m4)dnl + +/* These are the specific versions of matmul with -mprefer-avx128. */ + +`#if defined (HAVE_'rtype_name`) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const 'rtype_name` *, const 'rtype_name` *, + const int *, const 'rtype_name` *, const int *, + const 'rtype_name` *, 'rtype_name` *, const int *, + int, int); + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl +`void +'matmul_name` ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto('matmul_name`); +'include(matmul_internal.m4)dnl +`#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl +`void +'matmul_name` ('rtype` * const restrict retarray, + 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto('matmul_name`); +'include(matmul_internal.m4)dnl +`#endif + +#endif +' |