diff options
28 files changed, 1335 insertions, 1288 deletions
diff --git a/gcc/testsuite/gfortran.dg/matmul_20.f90 b/gcc/testsuite/gfortran.dg/matmul_20.f90 new file mode 100644 index 0000000..7a211a4 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/matmul_20.f90 @@ -0,0 +1,47 @@ +! { dg-do run } +! PR97063 - Wrong result for vector (step size is negative) * matrix + +program p + implicit none + integer, parameter :: m = 3, k = 2*m, l = k-1, n = 4 + integer :: i, j, m1, m2, ms + integer :: ai(k), bi(k,n), ci(n), ci_ref(n), c1, c2 + real :: ar(k), br(k,n), cr(n), cr_ref(n) + + ai(:) = [(i,i=0,k-1)] + bi(:,:) = reshape ([(((5*i+j),i=0,k-1),j=0,n-1)],[k,n]) + + ! Parameters of subscript triplet + m1 = 1; m2 = l; ms = 2 + + ! Reference values for cross-checks: integer variant + c1 = dot_product (ai(m1:m2: ms), bi(m1:m2: ms,1)) + c2 = dot_product (ai(m1:m2: ms), bi(m1:m2: ms,2)) + ci_ref = matmul (ai(m1:m2: ms), bi(m1:m2: ms,:)) + ci = matmul (ai(m2:m1:-ms), bi(m2:m1:-ms,:)) + + if (ci_ref(1) /= c1 .or. ci_ref(2) /= c2) stop 1 + if (any (ci /= ci_ref)) stop 2 + + ! Real variant + ar = real (ai) + br = real (bi) + cr_ref = matmul (ar(m1:m2: ms), br(m1:m2: ms,:)) + cr = matmul (ar(m2:m1:-ms), br(m2:m1:-ms,:)) + + if (any (cr_ref /= real (ci_ref))) stop 3 + if (any (cr /= cr_ref )) stop 4 + + ! Mixed variants + cr_ref = matmul (ar(m1:m2: ms), bi(m1:m2: ms,:)) + cr = matmul (ar(m2:m1:-ms), bi(m2:m1:-ms,:)) + + if (any (cr_ref /= real (ci_ref))) stop 5 + if (any (cr /= cr_ref )) stop 6 + + cr_ref = matmul (ai(m1:m2: ms), br(m1:m2: ms,:)) + cr = matmul (ai(m2:m1:-ms), br(m2:m1:-ms,:)) + + if (any (cr_ref /= real (ci_ref))) stop 7 + if (any (cr /= cr_ref )) stop 8 +end program diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c index ce5be24..5bfd61d 100644 --- a/libgfortran/generated/matmul_c10.c +++ b/libgfortran/generated/matmul_c10.c @@ -590,20 +590,6 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_c10 (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_c10 (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c index bf756d1..d7617e3 100644 --- a/libgfortran/generated/matmul_c16.c +++ b/libgfortran/generated/matmul_c16.c @@ -590,20 +590,6 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_c16 (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_c16 (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c index 5b24410..9303e6a 100644 --- a/libgfortran/generated/matmul_c4.c +++ b/libgfortran/generated/matmul_c4.c @@ -590,20 +590,6 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c index df3cb92..d29c99a 100644 --- a/libgfortran/generated/matmul_c8.c +++ b/libgfortran/generated/matmul_c8.c @@ -590,20 +590,6 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c index 49b0fba..72c4cee 100644 --- a/libgfortran/generated/matmul_i1.c +++ b/libgfortran/generated/matmul_i1.c @@ -590,20 +590,6 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c index 4e1d837..8158684 100644 --- a/libgfortran/generated/matmul_i16.c +++ b/libgfortran/generated/matmul_i16.c @@ -590,20 +590,6 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c index 1912987..1320a2e 100644 --- a/libgfortran/generated/matmul_i2.c +++ b/libgfortran/generated/matmul_i2.c @@ -590,20 +590,6 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c index ab14a0a..4ee2221 100644 --- a/libgfortran/generated/matmul_i4.c +++ b/libgfortran/generated/matmul_i4.c @@ -590,20 +590,6 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c index bc627e1..b68a27f 100644 --- a/libgfortran/generated/matmul_i8.c +++ b/libgfortran/generated/matmul_i8.c @@ -590,20 +590,6 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c index b5e63be..859c5a5 100644 --- a/libgfortran/generated/matmul_r10.c +++ b/libgfortran/generated/matmul_r10.c @@ -590,20 +590,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c index 4e6c66b..b2fc7f8 100644 --- a/libgfortran/generated/matmul_r16.c +++ b/libgfortran/generated/matmul_r16.c @@ -590,20 +590,6 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_r16 (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_r16 (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c index 202634b..11d785f 100644 --- a/libgfortran/generated/matmul_r4.c +++ b/libgfortran/generated/matmul_r4.c @@ -590,20 +590,6 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c index 22c24e5..6aae02f 100644 --- a/libgfortran/generated/matmul_r8.c +++ b/libgfortran/generated/matmul_r8.c @@ -590,20 +590,6 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_c10.c b/libgfortran/generated/matmulavx128_c10.c index b5ffd03..d0b417c 100644 --- a/libgfortran/generated/matmulavx128_c10.c +++ b/libgfortran/generated/matmulavx128_c10.c @@ -555,20 +555,6 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_10 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_10 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_c16.c b/libgfortran/generated/matmulavx128_c16.c index 32a355e..0137ba5 100644 --- a/libgfortran/generated/matmulavx128_c16.c +++ b/libgfortran/generated/matmulavx128_c16.c @@ -555,20 +555,6 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_16 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_16 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_c4.c b/libgfortran/generated/matmulavx128_c4.c index 97b53d3..850bd2b 100644 --- a/libgfortran/generated/matmulavx128_c4.c +++ b/libgfortran/generated/matmulavx128_c4.c @@ -555,20 +555,6 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_4 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_4 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_c8.c b/libgfortran/generated/matmulavx128_c8.c index e73575e..49d8b44 100644 --- a/libgfortran/generated/matmulavx128_c8.c +++ b/libgfortran/generated/matmulavx128_c8.c @@ -555,20 +555,6 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_COMPLEX_8 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_COMPLEX_8 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_i1.c b/libgfortran/generated/matmulavx128_i1.c index 00885fa..8fc6d92 100644 --- a/libgfortran/generated/matmulavx128_i1.c +++ b/libgfortran/generated/matmulavx128_i1.c @@ -555,20 +555,6 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_1 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_1 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_i16.c b/libgfortran/generated/matmulavx128_i16.c index 942dc08..a349557 100644 --- a/libgfortran/generated/matmulavx128_i16.c +++ b/libgfortran/generated/matmulavx128_i16.c @@ -555,20 +555,6 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_16 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_16 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_i2.c b/libgfortran/generated/matmulavx128_i2.c index baa3c9f..944eaf0 100644 --- a/libgfortran/generated/matmulavx128_i2.c +++ b/libgfortran/generated/matmulavx128_i2.c @@ -555,20 +555,6 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_2 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_2 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_i4.c b/libgfortran/generated/matmulavx128_i4.c index 0c69623..a8e270d 100644 --- a/libgfortran/generated/matmulavx128_i4.c +++ b/libgfortran/generated/matmulavx128_i4.c @@ -555,20 +555,6 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_4 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_4 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_i8.c b/libgfortran/generated/matmulavx128_i8.c index f867002..9c7f492 100644 --- a/libgfortran/generated/matmulavx128_i8.c +++ b/libgfortran/generated/matmulavx128_i8.c @@ -555,20 +555,6 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_INTEGER_8 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_INTEGER_8 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_r10.c b/libgfortran/generated/matmulavx128_r10.c index 24fb297..e2a44cf 100644 --- a/libgfortran/generated/matmulavx128_r10.c +++ b/libgfortran/generated/matmulavx128_r10.c @@ -555,20 +555,6 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_r16.c b/libgfortran/generated/matmulavx128_r16.c index 231d04d..186b226 100644 --- a/libgfortran/generated/matmulavx128_r16.c +++ b/libgfortran/generated/matmulavx128_r16.c @@ -555,20 +555,6 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_16 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_16 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_r4.c b/libgfortran/generated/matmulavx128_r4.c index c582280..e21ea39 100644 --- a/libgfortran/generated/matmulavx128_r4.c +++ b/libgfortran/generated/matmulavx128_r4.c @@ -555,20 +555,6 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_4 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_4)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_4 *restrict abase_x; diff --git a/libgfortran/generated/matmulavx128_r8.c b/libgfortran/generated/matmulavx128_r8.c index e93aeec..e7efd07 100644 --- a/libgfortran/generated/matmulavx128_r8.c +++ b/libgfortran/generated/matmulavx128_r8.c @@ -555,20 +555,6 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -583,6 +569,20 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; @@ -1124,20 +1124,6 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_8 *restrict bbase_y; @@ -1152,6 +1138,20 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_8 *restrict abase_x; diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4 index 32a1e01..13fd769 100644 --- a/libgfortran/m4/matmul_internal.m4 +++ b/libgfortran/m4/matmul_internal.m4 @@ -506,20 +506,6 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = ('rtype_name`)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const 'rtype_name` *restrict bbase_y; @@ -534,6 +520,20 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = ('rtype_name`)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const 'rtype_name` *restrict abase_x; |