aboutsummaryrefslogtreecommitdiff
path: root/libgfortran/generated
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2017-03-02 11:04:01 +0000
committerThomas Koenig <tkoenig@gcc.gnu.org>2017-03-02 11:04:01 +0000
commit6d03bdcc8196414a7d14d21629257ea746e82385 (patch)
tree479218dbc398682ff55cd867e71940dd85a0c39e /libgfortran/generated
parentdb9f7f657e035786836f8371b56d876a9bec46bc (diff)
downloadgcc-6d03bdcc8196414a7d14d21629257ea746e82385.zip
gcc-6d03bdcc8196414a7d14d21629257ea746e82385.tar.gz
gcc-6d03bdcc8196414a7d14d21629257ea746e82385.tar.bz2
re PR libfortran/78379 (Processor-specific versions for matmul)
2017-03-02 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/78379 * m4/matmul.m4: (matmul_'rtype_code`_avx2): Also generate for reals. Add fma to target options. (matmul_'rtype_code`): Call AVX2 only if FMA is available. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. From-SVN: r245836
Diffstat (limited to 'libgfortran/generated')
-rw-r--r--libgfortran/generated/matmul_c10.c8
-rw-r--r--libgfortran/generated/matmul_c16.c8
-rw-r--r--libgfortran/generated/matmul_c4.c8
-rw-r--r--libgfortran/generated/matmul_c8.c8
-rw-r--r--libgfortran/generated/matmul_i1.c8
-rw-r--r--libgfortran/generated/matmul_i16.c8
-rw-r--r--libgfortran/generated/matmul_i2.c8
-rw-r--r--libgfortran/generated/matmul_i4.c8
-rw-r--r--libgfortran/generated/matmul_i8.c8
-rw-r--r--libgfortran/generated/matmul_r10.c12
-rw-r--r--libgfortran/generated/matmul_r16.c12
-rw-r--r--libgfortran/generated/matmul_r4.c12
-rw-r--r--libgfortran/generated/matmul_r8.c12
13 files changed, 39 insertions, 81 deletions
diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c
index 2892c50..b333a84 100644
--- a/libgfortran/generated/matmul_c10.c
+++ b/libgfortran/generated/matmul_c10.c
@@ -74,9 +74,6 @@ extern void matmul_c10 (gfc_array_c10 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_c10);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
static void
matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_c10_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c
index d67ec15..0ef66c0 100644
--- a/libgfortran/generated/matmul_c16.c
+++ b/libgfortran/generated/matmul_c16.c
@@ -74,9 +74,6 @@ extern void matmul_c16 (gfc_array_c16 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_c16);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
static void
matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_c16_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c
index b20539d..b30320b 100644
--- a/libgfortran/generated/matmul_c4.c
+++ b/libgfortran/generated/matmul_c4.c
@@ -74,9 +74,6 @@ extern void matmul_c4 (gfc_array_c4 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_c4);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
static void
matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_c4_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c
index 954dd5e..75b4680 100644
--- a/libgfortran/generated/matmul_c8.c
+++ b/libgfortran/generated/matmul_c8.c
@@ -74,9 +74,6 @@ extern void matmul_c8 (gfc_array_c8 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_c8);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
static void
matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_c8_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c
index 6211836..9248263 100644
--- a/libgfortran/generated/matmul_i1.c
+++ b/libgfortran/generated/matmul_i1.c
@@ -74,9 +74,6 @@ extern void matmul_i1 (gfc_array_i1 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_i1);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
static void
matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i1_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c
index 51e92fb..f10540e 100644
--- a/libgfortran/generated/matmul_i16.c
+++ b/libgfortran/generated/matmul_i16.c
@@ -74,9 +74,6 @@ extern void matmul_i16 (gfc_array_i16 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_i16);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
static void
matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i16_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c
index 2077db3..55ad5c6 100644
--- a/libgfortran/generated/matmul_i2.c
+++ b/libgfortran/generated/matmul_i2.c
@@ -74,9 +74,6 @@ extern void matmul_i2 (gfc_array_i2 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_i2);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
static void
matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i2_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c
index 053f4a52..97b4a5b 100644
--- a/libgfortran/generated/matmul_i4.c
+++ b/libgfortran/generated/matmul_i4.c
@@ -74,9 +74,6 @@ extern void matmul_i4 (gfc_array_i4 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_i4);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
static void
matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i4_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c
index a3418d8..ae78ecf 100644
--- a/libgfortran/generated/matmul_i8.c
+++ b/libgfortran/generated/matmul_i8.c
@@ -74,9 +74,6 @@ extern void matmul_i8 (gfc_array_i8 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_i8);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -628,7 +625,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
static void
matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
@@ -2277,7 +2274,8 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i8_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c
index 24a331a..11d0591 100644
--- a/libgfortran/generated/matmul_r10.c
+++ b/libgfortran/generated/matmul_r10.c
@@ -74,13 +74,6 @@ extern void matmul_r10 (gfc_array_r10 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_r10);
-#if defined(HAVE_AVX) && defined(HAVE_AVX2)
-/* REAL types generate identical code for AVX and AVX2. Only generate
- an AVX2 function if we are dealing with integer. */
-#undef HAVE_AVX2
-#endif
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -632,7 +625,7 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
static void
matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
@@ -2281,7 +2274,8 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_r10_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c
index be3e8fd..73e7c98 100644
--- a/libgfortran/generated/matmul_r16.c
+++ b/libgfortran/generated/matmul_r16.c
@@ -74,13 +74,6 @@ extern void matmul_r16 (gfc_array_r16 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_r16);
-#if defined(HAVE_AVX) && defined(HAVE_AVX2)
-/* REAL types generate identical code for AVX and AVX2. Only generate
- an AVX2 function if we are dealing with integer. */
-#undef HAVE_AVX2
-#endif
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -632,7 +625,7 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
static void
matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
@@ -2281,7 +2274,8 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_r16_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c
index b7a40d3..ac7306f 100644
--- a/libgfortran/generated/matmul_r4.c
+++ b/libgfortran/generated/matmul_r4.c
@@ -74,13 +74,6 @@ extern void matmul_r4 (gfc_array_r4 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_r4);
-#if defined(HAVE_AVX) && defined(HAVE_AVX2)
-/* REAL types generate identical code for AVX and AVX2. Only generate
- an AVX2 function if we are dealing with integer. */
-#undef HAVE_AVX2
-#endif
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -632,7 +625,7 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
static void
matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
@@ -2281,7 +2274,8 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_r4_avx2;
goto tailcall;
diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c
index cffa4ec..8d2e784 100644
--- a/libgfortran/generated/matmul_r8.c
+++ b/libgfortran/generated/matmul_r8.c
@@ -74,13 +74,6 @@ extern void matmul_r8 (gfc_array_r8 * const restrict retarray,
int blas_limit, blas_call gemm);
export_proto(matmul_r8);
-#if defined(HAVE_AVX) && defined(HAVE_AVX2)
-/* REAL types generate identical code for AVX and AVX2. Only generate
- an AVX2 function if we are dealing with integer. */
-#undef HAVE_AVX2
-#endif
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
@@ -632,7 +625,7 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
static void
matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
@@ -2281,7 +2274,8 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_r8_avx2;
goto tailcall;