From bbf974160054c4b8aa3f0f49084bb46521195a51 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Tue, 6 Jun 2017 19:18:37 +0000 Subject: re PR fortran/80975 (matmul for zero-length arrays) 2017-06-06 Thomas Koenig PR fortran/80975 * m4/matmul_internal.m4: Move zeroing before early return. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: Regenerated. * generated/matmulavx128_c16.c: Regenerated. * generated/matmulavx128_c4.c: Regenerated. * generated/matmulavx128_c8.c: Regenerated. * generated/matmulavx128_i1.c: Regenerated. * generated/matmulavx128_i16.c: Regenerated. * generated/matmulavx128_i2.c: Regenerated. * generated/matmulavx128_i4.c: Regenerated. * generated/matmulavx128_i8.c: Regenerated. * generated/matmulavx128_r10.c: Regenerated. * generated/matmulavx128_r16.c: Regenerated. * generated/matmulavx128_r4.c: Regenerated. * generated/matmulavx128_r8.c: Regenerated. 2017-06-06 Thomas Koenig PR fortran/80975 * gfortran.dg/matmul_16.f90: New test. * gfortran.dg/inline_matmul_18.f90: New test. From-SVN: r248932 --- libgfortran/ChangeLog | 31 ++++++++++++++++++++ libgfortran/generated/matmul_c10.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_c16.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_c4.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_c8.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_i1.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_i16.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_i2.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_i4.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_i8.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_r10.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_r16.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_r4.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmul_r8.c | 50 ++++++++++++++++---------------- libgfortran/generated/matmulavx128_c10.c | 20 ++++++------- libgfortran/generated/matmulavx128_c16.c | 20 ++++++------- libgfortran/generated/matmulavx128_c4.c | 20 ++++++------- libgfortran/generated/matmulavx128_c8.c | 20 ++++++------- libgfortran/generated/matmulavx128_i1.c | 20 ++++++------- libgfortran/generated/matmulavx128_i16.c | 20 ++++++------- libgfortran/generated/matmulavx128_i2.c | 20 ++++++------- libgfortran/generated/matmulavx128_i4.c | 20 ++++++------- libgfortran/generated/matmulavx128_i8.c | 20 ++++++------- libgfortran/generated/matmulavx128_r10.c | 20 ++++++------- libgfortran/generated/matmulavx128_r16.c | 20 ++++++------- libgfortran/generated/matmulavx128_r4.c | 20 ++++++------- libgfortran/generated/matmulavx128_r8.c | 20 ++++++------- libgfortran/m4/matmul_internal.m4 | 10 +++---- 28 files changed, 491 insertions(+), 460 deletions(-) (limited to 'libgfortran') diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index e763232..1f5a026 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,34 @@ +2017-06-06 Thomas Koenig + + PR fortran/80975 + * m4/matmul_internal.m4: Move zeroing before early return. + * generated/matmul_c10.c: Regenerated. + * generated/matmul_c16.c: Regenerated. + * generated/matmul_c4.c: Regenerated. + * generated/matmul_c8.c: Regenerated. + * generated/matmul_i1.c: Regenerated. + * generated/matmul_i16.c: Regenerated. + * generated/matmul_i2.c: Regenerated. + * generated/matmul_i4.c: Regenerated. + * generated/matmul_i8.c: Regenerated. + * generated/matmul_r10.c: Regenerated. + * generated/matmul_r16.c: Regenerated. + * generated/matmul_r4.c: Regenerated. + * generated/matmul_r8.c: Regenerated. + * generated/matmulavx128_c10.c: Regenerated. + * generated/matmulavx128_c16.c: Regenerated. + * generated/matmulavx128_c4.c: Regenerated. + * generated/matmulavx128_c8.c: Regenerated. + * generated/matmulavx128_i1.c: Regenerated. + * generated/matmulavx128_i16.c: Regenerated. + * generated/matmulavx128_i2.c: Regenerated. + * generated/matmulavx128_i4.c: Regenerated. + * generated/matmulavx128_i8.c: Regenerated. + * generated/matmulavx128_r10.c: Regenerated. + * generated/matmulavx128_r16.c: Regenerated. + * generated/matmulavx128_r4.c: Regenerated. + * generated/matmulavx128_r8.c: Regenerated. + 2017-05-29 Jerry DeLisle PR libgfortran/53029 diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c index 54e2714..4e156ba 100644 --- a/libgfortran/generated/matmul_c10.c +++ b/libgfortran/generated/matmul_c10.c @@ -307,6 +307,11 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_c10 (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_c10 (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c index cd8aacd..162e564 100644 --- a/libgfortran/generated/matmul_c16.c +++ b/libgfortran/generated/matmul_c16.c @@ -307,6 +307,11 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_c16 (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_c16 (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c index ead22ef..43a0288 100644 --- a/libgfortran/generated/matmul_c4.c +++ b/libgfortran/generated/matmul_c4.c @@ -307,6 +307,11 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c index a52e4bd..6efdca1 100644 --- a/libgfortran/generated/matmul_c8.c +++ b/libgfortran/generated/matmul_c8.c @@ -307,6 +307,11 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c index dfd47e1..a658c45 100644 --- a/libgfortran/generated/matmul_i1.c +++ b/libgfortran/generated/matmul_i1.c @@ -307,6 +307,11 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c index a7bdcb5..447112b 100644 --- a/libgfortran/generated/matmul_i16.c +++ b/libgfortran/generated/matmul_i16.c @@ -307,6 +307,11 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c index d541fa3..4e21651 100644 --- a/libgfortran/generated/matmul_i2.c +++ b/libgfortran/generated/matmul_i2.c @@ -307,6 +307,11 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c index f8f0cdb..5331e94 100644 --- a/libgfortran/generated/matmul_i4.c +++ b/libgfortran/generated/matmul_i4.c @@ -307,6 +307,11 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c index 2aac1d7..d150e69 100644 --- a/libgfortran/generated/matmul_i8.c +++ b/libgfortran/generated/matmul_i8.c @@ -307,6 +307,11 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c index 448c96b..5407615 100644 --- a/libgfortran/generated/matmul_r10.c +++ b/libgfortran/generated/matmul_r10.c @@ -307,6 +307,11 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c index 57a47dc..1254ec1 100644 --- a/libgfortran/generated/matmul_r16.c +++ b/libgfortran/generated/matmul_r16.c @@ -307,6 +307,11 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_r16 (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_r16 (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c index 52eea53..5d13e67 100644 --- a/libgfortran/generated/matmul_r4.c +++ b/libgfortran/generated/matmul_r4.c @@ -307,6 +307,11 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c index 074697d..e835ed1 100644 --- a/libgfortran/generated/matmul_r8.c +++ b/libgfortran/generated/matmul_r8.c @@ -307,6 +307,11 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_c10.c b/libgfortran/generated/matmulavx128_c10.c index 53cdecb..ac8a013 100644 --- a/libgfortran/generated/matmulavx128_c10.c +++ b/libgfortran/generated/matmulavx128_c10.c @@ -272,6 +272,11 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_c16.c b/libgfortran/generated/matmulavx128_c16.c index e7657a0..6af99be 100644 --- a/libgfortran/generated/matmulavx128_c16.c +++ b/libgfortran/generated/matmulavx128_c16.c @@ -272,6 +272,11 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_c4.c b/libgfortran/generated/matmulavx128_c4.c index 950f1eb..0e358be 100644 --- a/libgfortran/generated/matmulavx128_c4.c +++ b/libgfortran/generated/matmulavx128_c4.c @@ -272,6 +272,11 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_c8.c b/libgfortran/generated/matmulavx128_c8.c index a41c160..c4416b1 100644 --- a/libgfortran/generated/matmulavx128_c8.c +++ b/libgfortran/generated/matmulavx128_c8.c @@ -272,6 +272,11 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_i1.c b/libgfortran/generated/matmulavx128_i1.c index e187157..011de3a 100644 --- a/libgfortran/generated/matmulavx128_i1.c +++ b/libgfortran/generated/matmulavx128_i1.c @@ -272,6 +272,11 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_i16.c b/libgfortran/generated/matmulavx128_i16.c index 1a7b733..3a8b48a 100644 --- a/libgfortran/generated/matmulavx128_i16.c +++ b/libgfortran/generated/matmulavx128_i16.c @@ -272,6 +272,11 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_i2.c b/libgfortran/generated/matmulavx128_i2.c index a095c58..738f642 100644 --- a/libgfortran/generated/matmulavx128_i2.c +++ b/libgfortran/generated/matmulavx128_i2.c @@ -272,6 +272,11 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_i4.c b/libgfortran/generated/matmulavx128_i4.c index a01c56f..6c3bb7c 100644 --- a/libgfortran/generated/matmulavx128_i4.c +++ b/libgfortran/generated/matmulavx128_i4.c @@ -272,6 +272,11 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_i8.c b/libgfortran/generated/matmulavx128_i8.c index bc78ffe..58bdf2c 100644 --- a/libgfortran/generated/matmulavx128_i8.c +++ b/libgfortran/generated/matmulavx128_i8.c @@ -272,6 +272,11 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_r10.c b/libgfortran/generated/matmulavx128_r10.c index 943678d..72277fa 100644 --- a/libgfortran/generated/matmulavx128_r10.c +++ b/libgfortran/generated/matmulavx128_r10.c @@ -272,6 +272,11 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_r16.c b/libgfortran/generated/matmulavx128_r16.c index 3d5738b..a6e1178 100644 --- a/libgfortran/generated/matmulavx128_r16.c +++ b/libgfortran/generated/matmulavx128_r16.c @@ -272,6 +272,11 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_r4.c b/libgfortran/generated/matmulavx128_r4.c index 6c6da39..b981053 100644 --- a/libgfortran/generated/matmulavx128_r4.c +++ b/libgfortran/generated/matmulavx128_r4.c @@ -272,6 +272,11 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/generated/matmulavx128_r8.c b/libgfortran/generated/matmulavx128_r8.c index d628200..6392b7b 100644 --- a/libgfortran/generated/matmulavx128_r8.c +++ b/libgfortran/generated/matmulavx128_r8.c @@ -272,6 +272,11 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4 index fdc93e7..e20f922 100644 --- a/libgfortran/m4/matmul_internal.m4 +++ b/libgfortran/m4/matmul_internal.m4 @@ -223,6 +223,11 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = ('rtype_name`)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -235,11 +240,6 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl t1 = malloc (t1_dim * sizeof('rtype_name`)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = ('rtype_name`)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) -- cgit v1.1