From cd6cd6aed195b4ec7d652e8b41d60b60e174304e Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Sun, 18 Oct 2020 20:15:26 +0200 Subject: PR libfortran/97063 - Wrong result for vector (step size is negative) * matrix The MATMUL intrinsic provided a wrong result for rank-1 times rank-2 array when a negative stride was used for addressing the elements of the rank-1 array, because a check on strides was erroneously placed before the check on the rank. Interchange order of checks. libgfortran/ChangeLog: * m4/matmul_internal.m4: Move check for rank-1 times rank-2 before checks on strides for rank-2 times rank-2. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Likewise. * generated/matmul_c4.c: Likewise. * generated/matmul_c8.c: Likewise. * generated/matmul_i1.c: Likewise. * generated/matmul_i16.c: Likewise. * generated/matmul_i2.c: Likewise. * generated/matmul_i4.c: Likewise. * generated/matmul_i8.c: Likewise. * generated/matmul_r10.c: Likewise. * generated/matmul_r16.c: Likewise. * generated/matmul_r4.c: Likewise. * generated/matmul_r8.c: Likewise. * generated/matmulavx128_c10.c: Likewise. * generated/matmulavx128_c16.c: Likewise. * generated/matmulavx128_c4.c: Likewise. * generated/matmulavx128_c8.c: Likewise. * generated/matmulavx128_i1.c: Likewise. * generated/matmulavx128_i16.c: Likewise. * generated/matmulavx128_i2.c: Likewise. * generated/matmulavx128_i4.c: Likewise. * generated/matmulavx128_i8.c: Likewise. * generated/matmulavx128_r10.c: Likewise. * generated/matmulavx128_r16.c: Likewise. * generated/matmulavx128_r4.c: Likewise. * generated/matmulavx128_r8.c: Likewise. gcc/testsuite/ChangeLog: * gfortran.dg/matmul_20.f90: New test. --- libgfortran/generated/matmul_r10.c | 140 ++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 70 deletions(-) (limited to 'libgfortran/generated/matmul_r10.c') diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c index b5e63be..859c5a5 100644 --- a/libgfortran/generated/matmul_r10.c +++ b/libgfortran/generated/matmul_r10.c @@ -590,20 +590,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -618,6 +604,20 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -1158,20 +1158,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -1186,6 +1172,20 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -1726,20 +1726,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -1754,6 +1740,20 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -2308,20 +2308,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -2336,6 +2322,20 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; @@ -2949,20 +2949,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, } } } - else if (axstride < aystride) - { - for (y = 0; y < ycount; y++) - for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; - - for (y = 0; y < ycount; y++) - for (n = 0; n < count; n++) - for (x = 0; x < xcount; x++) - /* dest[x,y] += a[x,n] * b[n,y] */ - dest[x*rxstride + y*rystride] += - abase[x*axstride + n*aystride] * - bbase[n*bxstride + y*bystride]; - } else if (GFC_DESCRIPTOR_RANK (a) == 1) { const GFC_REAL_10 *restrict bbase_y; @@ -2977,6 +2963,20 @@ matmul_r10 (gfc_array_r10 * const restrict retarray, dest[y*rxstride] = s; } } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_10)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } else { const GFC_REAL_10 *restrict abase_x; -- cgit v1.1