aboutsummaryrefslogtreecommitdiff
path: root/libgfortran/generated/matmul_i2.c
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2024-09-24 21:51:42 +0200
committerThomas Koenig <tkoenig@gcc.gnu.org>2024-09-24 21:51:42 +0200
commit5d98fe096b5d17021875806ffc32ba41ea0e87b0 (patch)
tree03c4d94456c7c17ba3b5164ac17bf6a71cca4485 /libgfortran/generated/matmul_i2.c
parent650e91566561870f3d1c8d5b92e6613296ee1a8d (diff)
downloadgcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.zip
gcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.tar.gz
gcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.tar.bz2
Implement MATMUL and DOT_PRODUCT for unsigned.
gcc/fortran/ChangeLog: * arith.cc (gfc_arith_uminus): Fix warning. (gfc_arith_minus): Correctly truncate unsigneds. * check.cc (gfc_check_dot_product): Handle unsigned arguments. (gfc_check_matmul): Likewise. * expr.cc (gfc_get_unsigned_expr): New function. * gfortran.h (gfc_get_unsigned_expr): Add prototype. * iresolve.cc (gfc_resolve_matmul): If using UNSIGNED, use the signed integer version. * gfortran.texi: Document MATMUL and DOT_PRODUCT for unsigned. * simplify.cc (compute_dot_product): Handle unsigneds. libgfortran/ChangeLog: * m4/iparm.m4: Add UNSIGED if type is m. * m4/matmul.m4: If type is GFC_INTEGER, use GFC_UINTEGER instead. Whitespace fixes. * m4/matmul_internal.m4: Whitespace fixes. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c17.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regeneraated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r17.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * libgfortran.h: Add array types for unsiged. gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_25.f90: New test. * gfortran.dg/unsigned_26.f90: New test.
Diffstat (limited to 'libgfortran/generated/matmul_i2.c')
-rw-r--r--libgfortran/generated/matmul_i2.c346
1 files changed, 173 insertions, 173 deletions
diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c
index 1b727e4..89e326e 100644
--- a/libgfortran/generated/matmul_i2.c
+++ b/libgfortran/generated/matmul_i2.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_2)
+#if defined (HAVE_GFC_UINTEGER_2)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_2 *, const GFC_INTEGER_2 *,
- const int *, const GFC_INTEGER_2 *, const int *,
- const GFC_INTEGER_2 *, GFC_INTEGER_2 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_2 *, const GFC_UINTEGER_2 *,
+ const int *, const GFC_UINTEGER_2 *, const int *,
+ const GFC_UINTEGER_2 *, GFC_UINTEGER_2 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+extern void matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i2);
@@ -80,17 +80,17 @@ export_proto(matmul_i2);
#ifdef HAVE_AVX
static void
-matmul_i2_avx (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i2_avx (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx512f (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx512f (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx128_fma3 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i2_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx128_fma4 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i2_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_vanilla (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+void matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;