aboutsummaryrefslogtreecommitdiff
path: root/libgfortran
diff options
context:
space:
mode:
authorThomas Koenig <tkoenig@gcc.gnu.org>2024-09-24 21:51:42 +0200
committerThomas Koenig <tkoenig@gcc.gnu.org>2024-09-24 21:51:42 +0200
commit5d98fe096b5d17021875806ffc32ba41ea0e87b0 (patch)
tree03c4d94456c7c17ba3b5164ac17bf6a71cca4485 /libgfortran
parent650e91566561870f3d1c8d5b92e6613296ee1a8d (diff)
downloadgcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.zip
gcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.tar.gz
gcc-5d98fe096b5d17021875806ffc32ba41ea0e87b0.tar.bz2
Implement MATMUL and DOT_PRODUCT for unsigned.
gcc/fortran/ChangeLog: * arith.cc (gfc_arith_uminus): Fix warning. (gfc_arith_minus): Correctly truncate unsigneds. * check.cc (gfc_check_dot_product): Handle unsigned arguments. (gfc_check_matmul): Likewise. * expr.cc (gfc_get_unsigned_expr): New function. * gfortran.h (gfc_get_unsigned_expr): Add prototype. * iresolve.cc (gfc_resolve_matmul): If using UNSIGNED, use the signed integer version. * gfortran.texi: Document MATMUL and DOT_PRODUCT for unsigned. * simplify.cc (compute_dot_product): Handle unsigneds. libgfortran/ChangeLog: * m4/iparm.m4: Add UNSIGED if type is m. * m4/matmul.m4: If type is GFC_INTEGER, use GFC_UINTEGER instead. Whitespace fixes. * m4/matmul_internal.m4: Whitespace fixes. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c17.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regeneraated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r17.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * libgfortran.h: Add array types for unsiged. gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_25.f90: New test. * gfortran.dg/unsigned_26.f90: New test.
Diffstat (limited to 'libgfortran')
-rw-r--r--libgfortran/generated/matmul_c10.c36
-rw-r--r--libgfortran/generated/matmul_c16.c36
-rw-r--r--libgfortran/generated/matmul_c17.c36
-rw-r--r--libgfortran/generated/matmul_c4.c36
-rw-r--r--libgfortran/generated/matmul_c8.c36
-rw-r--r--libgfortran/generated/matmul_i1.c346
-rw-r--r--libgfortran/generated/matmul_i16.c346
-rw-r--r--libgfortran/generated/matmul_i2.c346
-rw-r--r--libgfortran/generated/matmul_i4.c346
-rw-r--r--libgfortran/generated/matmul_i8.c346
-rw-r--r--libgfortran/generated/matmul_r10.c36
-rw-r--r--libgfortran/generated/matmul_r16.c36
-rw-r--r--libgfortran/generated/matmul_r17.c36
-rw-r--r--libgfortran/generated/matmul_r4.c36
-rw-r--r--libgfortran/generated/matmul_r8.c36
-rw-r--r--libgfortran/libgfortran.h7
-rw-r--r--libgfortran/m4/iparm.m42
-rw-r--r--libgfortran/m4/matmul.m429
-rw-r--r--libgfortran/m4/matmul_internal.m42
19 files changed, 1070 insertions, 1060 deletions
diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c
index c3dbb6d..54a8364 100644
--- a/libgfortran/generated/matmul_c10.c
+++ b/libgfortran/generated/matmul_c10.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_COMPLEX_10 *, const GFC_COMPLEX_10 *,
- const int *, const GFC_COMPLEX_10 *, const int *,
- const GFC_COMPLEX_10 *, GFC_COMPLEX_10 *, const int *,
- int, int);
+ const int *, const GFC_COMPLEX_10 *, const GFC_COMPLEX_10 *,
+ const int *, const GFC_COMPLEX_10 *, const int *,
+ const GFC_COMPLEX_10 *, GFC_COMPLEX_10 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_c10 (gfc_array_c10 * const restrict retarray,
+extern void matmul_c10 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_c10);
@@ -80,11 +80,11 @@ export_proto(matmul_c10);
#ifdef HAVE_AVX
static void
-matmul_c10_avx (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_c10_avx (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_c10_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_c10_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
+matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_c10_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_c10_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
+matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_c10 (gfc_array_c10 * const restrict retarray,
+void matmul_c10 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_c10 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_c10 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_c10 (gfc_array_c10 * const restrict retarray,
+matmul_c10 (gfc_array_c10 * const restrict retarray,
gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c
index 230f17d..fce4ce2 100644
--- a/libgfortran/generated/matmul_c16.c
+++ b/libgfortran/generated/matmul_c16.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_COMPLEX_16 *, const GFC_COMPLEX_16 *,
- const int *, const GFC_COMPLEX_16 *, const int *,
- const GFC_COMPLEX_16 *, GFC_COMPLEX_16 *, const int *,
- int, int);
+ const int *, const GFC_COMPLEX_16 *, const GFC_COMPLEX_16 *,
+ const int *, const GFC_COMPLEX_16 *, const int *,
+ const GFC_COMPLEX_16 *, GFC_COMPLEX_16 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_c16 (gfc_array_c16 * const restrict retarray,
+extern void matmul_c16 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_c16);
@@ -80,11 +80,11 @@ export_proto(matmul_c16);
#ifdef HAVE_AVX
static void
-matmul_c16_avx (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_c16_avx (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_c16_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_c16_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
+matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_c16_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_c16_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
+matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_c16 (gfc_array_c16 * const restrict retarray,
+void matmul_c16 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_c16 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_c16 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_c16 (gfc_array_c16 * const restrict retarray,
+matmul_c16 (gfc_array_c16 * const restrict retarray,
gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_c17.c b/libgfortran/generated/matmul_c17.c
index cbfd25d..aee0d5a 100644
--- a/libgfortran/generated/matmul_c17.c
+++ b/libgfortran/generated/matmul_c17.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *,
- const int *, const GFC_COMPLEX_17 *, const int *,
- const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *,
- int, int);
+ const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *,
+ const int *, const GFC_COMPLEX_17 *, const int *,
+ const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_c17 (gfc_array_c17 * const restrict retarray,
+extern void matmul_c17 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_c17);
@@ -80,11 +80,11 @@ export_proto(matmul_c17);
#ifdef HAVE_AVX
static void
-matmul_c17_avx (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_c17_avx (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_c17_avx (gfc_array_c17 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_c17_avx2 (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx2 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_c17_avx2 (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx2 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_c17_avx2 (gfc_array_c17 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_c17_avx512f (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx512f (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_c17_avx512f (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx512f (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_c17_avx512f (gfc_array_c17 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_c17_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_c17_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray,
+matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_c17_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_c17_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_c17_vanilla (gfc_array_c17 * const restrict retarray,
+matmul_c17_vanilla (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_c17_vanilla (gfc_array_c17 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_c17 (gfc_array_c17 * const restrict retarray,
+void matmul_c17 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_c17 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_c17 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_c17 (gfc_array_c17 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_c17 (gfc_array_c17 * const restrict retarray,
+matmul_c17 (gfc_array_c17 * const restrict retarray,
gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c
index c8f4550..2ab8a6f 100644
--- a/libgfortran/generated/matmul_c4.c
+++ b/libgfortran/generated/matmul_c4.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_COMPLEX_4 *, const GFC_COMPLEX_4 *,
- const int *, const GFC_COMPLEX_4 *, const int *,
- const GFC_COMPLEX_4 *, GFC_COMPLEX_4 *, const int *,
- int, int);
+ const int *, const GFC_COMPLEX_4 *, const GFC_COMPLEX_4 *,
+ const int *, const GFC_COMPLEX_4 *, const int *,
+ const GFC_COMPLEX_4 *, GFC_COMPLEX_4 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_c4 (gfc_array_c4 * const restrict retarray,
+extern void matmul_c4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_c4);
@@ -80,11 +80,11 @@ export_proto(matmul_c4);
#ifdef HAVE_AVX
static void
-matmul_c4_avx (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_c4_avx (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_c4_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_c4_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
+matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_c4_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_c4_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
+matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_c4 (gfc_array_c4 * const restrict retarray,
+void matmul_c4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_c4 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_c4 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_c4 (gfc_array_c4 * const restrict retarray,
+matmul_c4 (gfc_array_c4 * const restrict retarray,
gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c
index 5c5928d..fb5246e 100644
--- a/libgfortran/generated/matmul_c8.c
+++ b/libgfortran/generated/matmul_c8.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_COMPLEX_8 *, const GFC_COMPLEX_8 *,
- const int *, const GFC_COMPLEX_8 *, const int *,
- const GFC_COMPLEX_8 *, GFC_COMPLEX_8 *, const int *,
- int, int);
+ const int *, const GFC_COMPLEX_8 *, const GFC_COMPLEX_8 *,
+ const int *, const GFC_COMPLEX_8 *, const int *,
+ const GFC_COMPLEX_8 *, GFC_COMPLEX_8 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_c8 (gfc_array_c8 * const restrict retarray,
+extern void matmul_c8 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_c8);
@@ -80,11 +80,11 @@ export_proto(matmul_c8);
#ifdef HAVE_AVX
static void
-matmul_c8_avx (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_c8_avx (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_c8_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_c8_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
+matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_c8_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_c8_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
+matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_c8 (gfc_array_c8 * const restrict retarray,
+void matmul_c8 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_c8 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_c8 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_c8 (gfc_array_c8 * const restrict retarray,
+matmul_c8 (gfc_array_c8 * const restrict retarray,
gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c
index 7a30ad8..51e020a 100644
--- a/libgfortran/generated/matmul_i1.c
+++ b/libgfortran/generated/matmul_i1.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_1)
+#if defined (HAVE_GFC_UINTEGER_1)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_1 *, const GFC_INTEGER_1 *,
- const int *, const GFC_INTEGER_1 *, const int *,
- const GFC_INTEGER_1 *, GFC_INTEGER_1 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_1 *, const GFC_UINTEGER_1 *,
+ const int *, const GFC_UINTEGER_1 *, const int *,
+ const GFC_UINTEGER_1 *, GFC_UINTEGER_1 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i1 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+extern void matmul_i1 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i1);
@@ -80,17 +80,17 @@ export_proto(matmul_i1);
#ifdef HAVE_AVX
static void
-matmul_i1_avx (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i1_avx (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_1 * restrict abase;
- const GFC_INTEGER_1 * restrict bbase;
- GFC_INTEGER_1 * restrict dest;
+ const GFC_UINTEGER_1 * restrict abase;
+ const GFC_UINTEGER_1 * restrict bbase;
+ GFC_UINTEGER_1 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_1 one = 1, zero = 0;
+ const GFC_UINTEGER_1 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_1 *a, *b;
- GFC_INTEGER_1 *c;
+ const GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_1 *t1;
+ GFC_UINTEGER_1 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_1)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx2 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx2 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_1 * restrict abase;
- const GFC_INTEGER_1 * restrict bbase;
- GFC_INTEGER_1 * restrict dest;
+ const GFC_UINTEGER_1 * restrict abase;
+ const GFC_UINTEGER_1 * restrict bbase;
+ GFC_UINTEGER_1 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_1 one = 1, zero = 0;
+ const GFC_UINTEGER_1 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_1 *a, *b;
- GFC_INTEGER_1 *c;
+ const GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_1 *t1;
+ GFC_UINTEGER_1 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_1)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx512f (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx512f (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_1 * restrict abase;
- const GFC_INTEGER_1 * restrict bbase;
- GFC_INTEGER_1 * restrict dest;
+ const GFC_UINTEGER_1 * restrict abase;
+ const GFC_UINTEGER_1 * restrict bbase;
+ GFC_UINTEGER_1 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_1 one = 1, zero = 0;
+ const GFC_UINTEGER_1 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_1 *a, *b;
- GFC_INTEGER_1 *c;
+ const GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_1 *t1;
+ GFC_UINTEGER_1 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_1)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx128_fma3 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i1_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_avx128_fma4 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i1_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1_vanilla (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_1 * restrict abase;
- const GFC_INTEGER_1 * restrict bbase;
- GFC_INTEGER_1 * restrict dest;
+ const GFC_UINTEGER_1 * restrict abase;
+ const GFC_UINTEGER_1 * restrict bbase;
+ GFC_UINTEGER_1 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_1 one = 1, zero = 0;
+ const GFC_UINTEGER_1 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_1 *a, *b;
- GFC_INTEGER_1 *c;
+ const GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_1 *t1;
+ GFC_UINTEGER_1 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_1)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i1 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+void matmul_i1 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i1 (gfc_array_i1 * const restrict retarray,
- gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+matmul_i1 (gfc_array_m1 * const restrict retarray,
+ gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_1 * restrict abase;
- const GFC_INTEGER_1 * restrict bbase;
- GFC_INTEGER_1 * restrict dest;
+ const GFC_UINTEGER_1 * restrict abase;
+ const GFC_UINTEGER_1 * restrict bbase;
+ GFC_UINTEGER_1 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_1 one = 1, zero = 0;
+ const GFC_UINTEGER_1 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_1 *a, *b;
- GFC_INTEGER_1 *c;
+ const GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_1 *t1;
+ GFC_UINTEGER_1 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_1)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
}
else
{
- const GFC_INTEGER_1 *restrict abase_x;
- const GFC_INTEGER_1 *restrict bbase_y;
- GFC_INTEGER_1 *restrict dest_y;
- GFC_INTEGER_1 s;
+ const GFC_UINTEGER_1 *restrict abase_x;
+ const GFC_UINTEGER_1 *restrict bbase_y;
+ GFC_UINTEGER_1 *restrict dest_y;
+ GFC_UINTEGER_1 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_1) 0;
+ s = (GFC_UINTEGER_1) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c
index cf72f65..9a7eee4 100644
--- a/libgfortran/generated/matmul_i16.c
+++ b/libgfortran/generated/matmul_i16.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_16)
+#if defined (HAVE_GFC_UINTEGER_16)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_16 *, const GFC_INTEGER_16 *,
- const int *, const GFC_INTEGER_16 *, const int *,
- const GFC_INTEGER_16 *, GFC_INTEGER_16 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_16 *, const GFC_UINTEGER_16 *,
+ const int *, const GFC_UINTEGER_16 *, const int *,
+ const GFC_UINTEGER_16 *, GFC_UINTEGER_16 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i16 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+extern void matmul_i16 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i16);
@@ -80,17 +80,17 @@ export_proto(matmul_i16);
#ifdef HAVE_AVX
static void
-matmul_i16_avx (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i16_avx (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_16 * restrict abase;
- const GFC_INTEGER_16 * restrict bbase;
- GFC_INTEGER_16 * restrict dest;
+ const GFC_UINTEGER_16 * restrict abase;
+ const GFC_UINTEGER_16 * restrict bbase;
+ GFC_UINTEGER_16 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_16 one = 1, zero = 0;
+ const GFC_UINTEGER_16 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_16 *a, *b;
- GFC_INTEGER_16 *c;
+ const GFC_UINTEGER_16 *a, *b;
+ GFC_UINTEGER_16 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_16 *t1;
+ GFC_UINTEGER_16 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_16)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx2 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx2 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_16 * restrict abase;
- const GFC_INTEGER_16 * restrict bbase;
- GFC_INTEGER_16 * restrict dest;
+ const GFC_UINTEGER_16 * restrict abase;
+ const GFC_UINTEGER_16 * restrict bbase;
+ GFC_UINTEGER_16 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_16 one = 1, zero = 0;
+ const GFC_UINTEGER_16 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_16 *a, *b;
- GFC_INTEGER_16 *c;
+ const GFC_UINTEGER_16 *a, *b;
+ GFC_UINTEGER_16 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_16 *t1;
+ GFC_UINTEGER_16 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_16)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx512f (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx512f (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_16 * restrict abase;
- const GFC_INTEGER_16 * restrict bbase;
- GFC_INTEGER_16 * restrict dest;
+ const GFC_UINTEGER_16 * restrict abase;
+ const GFC_UINTEGER_16 * restrict bbase;
+ GFC_UINTEGER_16 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_16 one = 1, zero = 0;
+ const GFC_UINTEGER_16 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_16 *a, *b;
- GFC_INTEGER_16 *c;
+ const GFC_UINTEGER_16 *a, *b;
+ GFC_UINTEGER_16 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_16 *t1;
+ GFC_UINTEGER_16 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_16)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx128_fma3 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i16_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_avx128_fma4 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i16_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16_vanilla (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_16 * restrict abase;
- const GFC_INTEGER_16 * restrict bbase;
- GFC_INTEGER_16 * restrict dest;
+ const GFC_UINTEGER_16 * restrict abase;
+ const GFC_UINTEGER_16 * restrict bbase;
+ GFC_UINTEGER_16 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_16 one = 1, zero = 0;
+ const GFC_UINTEGER_16 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_16 *a, *b;
- GFC_INTEGER_16 *c;
+ const GFC_UINTEGER_16 *a, *b;
+ GFC_UINTEGER_16 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_16 *t1;
+ GFC_UINTEGER_16 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_16)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i16 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+void matmul_i16 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i16 (gfc_array_i16 * const restrict retarray,
- gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+matmul_i16 (gfc_array_m16 * const restrict retarray,
+ gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_16 * restrict abase;
- const GFC_INTEGER_16 * restrict bbase;
- GFC_INTEGER_16 * restrict dest;
+ const GFC_UINTEGER_16 * restrict abase;
+ const GFC_UINTEGER_16 * restrict bbase;
+ GFC_UINTEGER_16 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_16 one = 1, zero = 0;
+ const GFC_UINTEGER_16 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_16 *a, *b;
- GFC_INTEGER_16 *c;
+ const GFC_UINTEGER_16 *a, *b;
+ GFC_UINTEGER_16 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_16 *t1;
+ GFC_UINTEGER_16 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_16)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
else
{
- const GFC_INTEGER_16 *restrict abase_x;
- const GFC_INTEGER_16 *restrict bbase_y;
- GFC_INTEGER_16 *restrict dest_y;
- GFC_INTEGER_16 s;
+ const GFC_UINTEGER_16 *restrict abase_x;
+ const GFC_UINTEGER_16 *restrict bbase_y;
+ GFC_UINTEGER_16 *restrict dest_y;
+ GFC_UINTEGER_16 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_16) 0;
+ s = (GFC_UINTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c
index 1b727e4..89e326e 100644
--- a/libgfortran/generated/matmul_i2.c
+++ b/libgfortran/generated/matmul_i2.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_2)
+#if defined (HAVE_GFC_UINTEGER_2)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_2 *, const GFC_INTEGER_2 *,
- const int *, const GFC_INTEGER_2 *, const int *,
- const GFC_INTEGER_2 *, GFC_INTEGER_2 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_2 *, const GFC_UINTEGER_2 *,
+ const int *, const GFC_UINTEGER_2 *, const int *,
+ const GFC_UINTEGER_2 *, GFC_UINTEGER_2 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+extern void matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i2);
@@ -80,17 +80,17 @@ export_proto(matmul_i2);
#ifdef HAVE_AVX
static void
-matmul_i2_avx (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i2_avx (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx512f (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx512f (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx128_fma3 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i2_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_avx128_fma4 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i2_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2_vanilla (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+void matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i2 (gfc_array_i2 * const restrict retarray,
- gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+matmul_i2 (gfc_array_m2 * const restrict retarray,
+ gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_2 * restrict abase;
- const GFC_INTEGER_2 * restrict bbase;
- GFC_INTEGER_2 * restrict dest;
+ const GFC_UINTEGER_2 * restrict abase;
+ const GFC_UINTEGER_2 * restrict bbase;
+ GFC_UINTEGER_2 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_2 one = 1, zero = 0;
+ const GFC_UINTEGER_2 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_2 *a, *b;
- GFC_INTEGER_2 *c;
+ const GFC_UINTEGER_2 *a, *b;
+ GFC_UINTEGER_2 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_2 *t1;
+ GFC_UINTEGER_2 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_2)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
else
{
- const GFC_INTEGER_2 *restrict abase_x;
- const GFC_INTEGER_2 *restrict bbase_y;
- GFC_INTEGER_2 *restrict dest_y;
- GFC_INTEGER_2 s;
+ const GFC_UINTEGER_2 *restrict abase_x;
+ const GFC_UINTEGER_2 *restrict bbase_y;
+ GFC_UINTEGER_2 *restrict dest_y;
+ GFC_UINTEGER_2 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_2) 0;
+ s = (GFC_UINTEGER_2) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c
index ba421d7..2601f64 100644
--- a/libgfortran/generated/matmul_i4.c
+++ b/libgfortran/generated/matmul_i4.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_4)
+#if defined (HAVE_GFC_UINTEGER_4)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_4 *, const GFC_INTEGER_4 *,
- const int *, const GFC_INTEGER_4 *, const int *,
- const GFC_INTEGER_4 *, GFC_INTEGER_4 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_4 *, const GFC_UINTEGER_4 *,
+ const int *, const GFC_UINTEGER_4 *, const int *,
+ const GFC_UINTEGER_4 *, GFC_UINTEGER_4 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i4 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+extern void matmul_i4 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i4);
@@ -80,17 +80,17 @@ export_proto(matmul_i4);
#ifdef HAVE_AVX
static void
-matmul_i4_avx (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i4_avx (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_4 * restrict abase;
- const GFC_INTEGER_4 * restrict bbase;
- GFC_INTEGER_4 * restrict dest;
+ const GFC_UINTEGER_4 * restrict abase;
+ const GFC_UINTEGER_4 * restrict bbase;
+ GFC_UINTEGER_4 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_4 one = 1, zero = 0;
+ const GFC_UINTEGER_4 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_4 *a, *b;
- GFC_INTEGER_4 *c;
+ const GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_4 *t1;
+ GFC_UINTEGER_4 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_4)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx2 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx2 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_4 * restrict abase;
- const GFC_INTEGER_4 * restrict bbase;
- GFC_INTEGER_4 * restrict dest;
+ const GFC_UINTEGER_4 * restrict abase;
+ const GFC_UINTEGER_4 * restrict bbase;
+ GFC_UINTEGER_4 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_4 one = 1, zero = 0;
+ const GFC_UINTEGER_4 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_4 *a, *b;
- GFC_INTEGER_4 *c;
+ const GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_4 *t1;
+ GFC_UINTEGER_4 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_4)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx512f (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx512f (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_4 * restrict abase;
- const GFC_INTEGER_4 * restrict bbase;
- GFC_INTEGER_4 * restrict dest;
+ const GFC_UINTEGER_4 * restrict abase;
+ const GFC_UINTEGER_4 * restrict bbase;
+ GFC_UINTEGER_4 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_4 one = 1, zero = 0;
+ const GFC_UINTEGER_4 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_4 *a, *b;
- GFC_INTEGER_4 *c;
+ const GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_4 *t1;
+ GFC_UINTEGER_4 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_4)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx128_fma3 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i4_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_avx128_fma4 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i4_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4_vanilla (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_4 * restrict abase;
- const GFC_INTEGER_4 * restrict bbase;
- GFC_INTEGER_4 * restrict dest;
+ const GFC_UINTEGER_4 * restrict abase;
+ const GFC_UINTEGER_4 * restrict bbase;
+ GFC_UINTEGER_4 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_4 one = 1, zero = 0;
+ const GFC_UINTEGER_4 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_4 *a, *b;
- GFC_INTEGER_4 *c;
+ const GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_4 *t1;
+ GFC_UINTEGER_4 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_4)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i4 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+void matmul_i4 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i4 (gfc_array_i4 * const restrict retarray,
- gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+matmul_i4 (gfc_array_m4 * const restrict retarray,
+ gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_4 * restrict abase;
- const GFC_INTEGER_4 * restrict bbase;
- GFC_INTEGER_4 * restrict dest;
+ const GFC_UINTEGER_4 * restrict abase;
+ const GFC_UINTEGER_4 * restrict bbase;
+ GFC_UINTEGER_4 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_4 one = 1, zero = 0;
+ const GFC_UINTEGER_4 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_4 *a, *b;
- GFC_INTEGER_4 *c;
+ const GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_4 *t1;
+ GFC_UINTEGER_4 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_4)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
else
{
- const GFC_INTEGER_4 *restrict abase_x;
- const GFC_INTEGER_4 *restrict bbase_y;
- GFC_INTEGER_4 *restrict dest_y;
- GFC_INTEGER_4 s;
+ const GFC_UINTEGER_4 *restrict abase_x;
+ const GFC_UINTEGER_4 *restrict bbase_y;
+ GFC_UINTEGER_4 *restrict dest_y;
+ GFC_UINTEGER_4 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_4) 0;
+ s = (GFC_UINTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c
index 9405abc..96ef7e6 100644
--- a/libgfortran/generated/matmul_i8.c
+++ b/libgfortran/generated/matmul_i8.c
@@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>
-#if defined (HAVE_GFC_INTEGER_8)
+#if defined (HAVE_GFC_UINTEGER_8)
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
passed to us by the front-end, in which case we call it for large
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_INTEGER_8 *, const GFC_INTEGER_8 *,
- const int *, const GFC_INTEGER_8 *, const int *,
- const GFC_INTEGER_8 *, GFC_INTEGER_8 *, const int *,
- int, int);
+ const int *, const GFC_UINTEGER_8 *, const GFC_UINTEGER_8 *,
+ const int *, const GFC_UINTEGER_8 *, const int *,
+ const GFC_UINTEGER_8 *, GFC_UINTEGER_8 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_i8 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+extern void matmul_i8 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_i8);
@@ -80,17 +80,17 @@ export_proto(matmul_i8);
#ifdef HAVE_AVX
static void
-matmul_i8_avx (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_i8_avx (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_8 * restrict abase;
- const GFC_INTEGER_8 * restrict bbase;
- GFC_INTEGER_8 * restrict dest;
+ const GFC_UINTEGER_8 * restrict abase;
+ const GFC_UINTEGER_8 * restrict bbase;
+ GFC_UINTEGER_8 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -132,7 +132,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -251,7 +251,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_8 one = 1, zero = 0;
+ const GFC_UINTEGER_8 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -289,8 +289,8 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_8 *a, *b;
- GFC_INTEGER_8 *c;
+ const GFC_UINTEGER_8 *a, *b;
+ GFC_UINTEGER_8 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -298,11 +298,11 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_8 *t1;
+ GFC_UINTEGER_8 *t1;
a = abase;
b = bbase;
@@ -322,7 +322,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_8)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -339,7 +339,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8));
/* Start turning the crank. */
i1 = n;
@@ -557,10 +557,10 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -569,7 +569,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -578,13 +578,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -593,13 +593,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -609,7 +609,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -621,10 +621,10 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -633,7 +633,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -649,17 +649,17 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx2 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx2 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_8 * restrict abase;
- const GFC_INTEGER_8 * restrict bbase;
- GFC_INTEGER_8 * restrict dest;
+ const GFC_UINTEGER_8 * restrict abase;
+ const GFC_UINTEGER_8 * restrict bbase;
+ GFC_UINTEGER_8 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -701,7 +701,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -820,7 +820,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_8 one = 1, zero = 0;
+ const GFC_UINTEGER_8 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -858,8 +858,8 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_8 *a, *b;
- GFC_INTEGER_8 *c;
+ const GFC_UINTEGER_8 *a, *b;
+ GFC_UINTEGER_8 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -867,11 +867,11 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_8 *t1;
+ GFC_UINTEGER_8 *t1;
a = abase;
b = bbase;
@@ -891,7 +891,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_8)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -908,7 +908,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8));
/* Start turning the crank. */
i1 = n;
@@ -1126,10 +1126,10 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -1138,7 +1138,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1147,13 +1147,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1162,13 +1162,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1178,7 +1178,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1190,10 +1190,10 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -1202,7 +1202,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1218,17 +1218,17 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx512f (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx512f (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_8 * restrict abase;
- const GFC_INTEGER_8 * restrict bbase;
- GFC_INTEGER_8 * restrict dest;
+ const GFC_UINTEGER_8 * restrict abase;
+ const GFC_UINTEGER_8 * restrict bbase;
+ GFC_UINTEGER_8 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1270,7 +1270,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1389,7 +1389,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_8 one = 1, zero = 0;
+ const GFC_UINTEGER_8 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -1427,8 +1427,8 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_8 *a, *b;
- GFC_INTEGER_8 *c;
+ const GFC_UINTEGER_8 *a, *b;
+ GFC_UINTEGER_8 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -1436,11 +1436,11 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_8 *t1;
+ GFC_UINTEGER_8 *t1;
a = abase;
b = bbase;
@@ -1460,7 +1460,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_8)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -1477,7 +1477,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8));
/* Start turning the crank. */
i1 = n;
@@ -1695,10 +1695,10 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -1707,7 +1707,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -1716,13 +1716,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -1731,13 +1731,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -1747,7 +1747,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -1759,10 +1759,10 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -1771,7 +1771,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -1789,29 +1789,29 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx128_fma3 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_i8_avx128_fma3);
#endif
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_avx128_fma4 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_i8_avx128_fma4);
#endif
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8_vanilla (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_8 * restrict abase;
- const GFC_INTEGER_8 * restrict bbase;
- GFC_INTEGER_8 * restrict dest;
+ const GFC_UINTEGER_8 * restrict abase;
+ const GFC_UINTEGER_8 * restrict bbase;
+ GFC_UINTEGER_8 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -1853,7 +1853,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -1972,7 +1972,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_8 one = 1, zero = 0;
+ const GFC_UINTEGER_8 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2010,8 +2010,8 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_8 *a, *b;
- GFC_INTEGER_8 *c;
+ const GFC_UINTEGER_8 *a, *b;
+ GFC_UINTEGER_8 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2019,11 +2019,11 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_8 *t1;
+ GFC_UINTEGER_8 *t1;
a = abase;
b = bbase;
@@ -2043,7 +2043,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_8)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2060,7 +2060,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8));
/* Start turning the crank. */
i1 = n;
@@ -2278,10 +2278,10 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -2290,7 +2290,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2299,13 +2299,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2314,13 +2314,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2330,7 +2330,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2342,10 +2342,10 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -2354,7 +2354,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
@@ -2371,16 +2371,16 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_i8 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+void matmul_i8 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+ static void (*matmul_p) (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+ void (*matmul_fn) (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
@@ -2447,13 +2447,13 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_i8 (gfc_array_i8 * const restrict retarray,
- gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+matmul_i8 (gfc_array_m8 * const restrict retarray,
+ gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- const GFC_INTEGER_8 * restrict abase;
- const GFC_INTEGER_8 * restrict bbase;
- GFC_INTEGER_8 * restrict dest;
+ const GFC_UINTEGER_8 * restrict abase;
+ const GFC_UINTEGER_8 * restrict bbase;
+ GFC_UINTEGER_8 * restrict dest;
index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
index_type x, y, n, count, xcount, ycount;
@@ -2495,7 +2495,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
retarray->base_addr
- = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8));
retarray->offset = 0;
}
else if (unlikely (compile_options.bounds_check))
@@ -2614,7 +2614,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
> POW3(blas_limit)))
{
const int m = xcount, n = ycount, k = count, ldc = rystride;
- const GFC_INTEGER_8 one = 1, zero = 0;
+ const GFC_UINTEGER_8 one = 1, zero = 0;
const int lda = (axstride == 1) ? aystride : axstride,
ldb = (bxstride == 1) ? bystride : bxstride;
@@ -2652,8 +2652,8 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
from netlib.org, translated to C, and modified for matmul.m4. */
- const GFC_INTEGER_8 *a, *b;
- GFC_INTEGER_8 *c;
+ const GFC_UINTEGER_8 *a, *b;
+ GFC_UINTEGER_8 *c;
const index_type m = xcount, n = ycount, k = count;
/* System generated locals */
@@ -2661,11 +2661,11 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
i1, i2, i3, i4, i5, i6;
/* Local variables */
- GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
+ GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42,
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
- GFC_INTEGER_8 *t1;
+ GFC_UINTEGER_8 *t1;
a = abase;
b = bbase;
@@ -2685,7 +2685,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
/* Empty c first. */
for (j=1; j<=n; j++)
for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+ c[i + j * c_dim1] = (GFC_UINTEGER_8)0;
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
@@ -2702,7 +2702,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
if (t1_dim > 65536)
t1_dim = 65536;
- t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+ t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8));
/* Start turning the crank. */
i1 = n;
@@ -2920,10 +2920,10 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
{
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -2932,7 +2932,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
@@ -2941,13 +2941,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
@@ -2956,13 +2956,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase[n*axstride] * bbase_y[n*bxstride];
dest[y*rxstride] = s;
@@ -2972,7 +2972,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+ dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0;
for (y = 0; y < ycount; y++)
for (n = 0; n < count; n++)
@@ -2984,10 +2984,10 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
else
{
- const GFC_INTEGER_8 *restrict abase_x;
- const GFC_INTEGER_8 *restrict bbase_y;
- GFC_INTEGER_8 *restrict dest_y;
- GFC_INTEGER_8 s;
+ const GFC_UINTEGER_8 *restrict abase_x;
+ const GFC_UINTEGER_8 *restrict bbase_y;
+ GFC_UINTEGER_8 *restrict dest_y;
+ GFC_UINTEGER_8 s;
for (y = 0; y < ycount; y++)
{
@@ -2996,7 +2996,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
- s = (GFC_INTEGER_8) 0;
+ s = (GFC_UINTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n*aystride] * bbase_y[n*bxstride];
dest_y[x*rxstride] = s;
diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c
index c3434c2..9d28bf3 100644
--- a/libgfortran/generated/matmul_r10.c
+++ b/libgfortran/generated/matmul_r10.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_REAL_10 *, const GFC_REAL_10 *,
- const int *, const GFC_REAL_10 *, const int *,
- const GFC_REAL_10 *, GFC_REAL_10 *, const int *,
- int, int);
+ const int *, const GFC_REAL_10 *, const GFC_REAL_10 *,
+ const int *, const GFC_REAL_10 *, const int *,
+ const GFC_REAL_10 *, GFC_REAL_10 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_r10 (gfc_array_r10 * const restrict retarray,
+extern void matmul_r10 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_r10);
@@ -80,11 +80,11 @@ export_proto(matmul_r10);
#ifdef HAVE_AVX
static void
-matmul_r10_avx (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_r10_avx (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_r10_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_r10_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
+matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_r10_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_r10_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
+matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_r10 (gfc_array_r10 * const restrict retarray,
+void matmul_r10 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_r10 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_r10 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_r10 (gfc_array_r10 * const restrict retarray,
+matmul_r10 (gfc_array_r10 * const restrict retarray,
gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c
index 2fe50d2..889280c 100644
--- a/libgfortran/generated/matmul_r16.c
+++ b/libgfortran/generated/matmul_r16.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_REAL_16 *, const GFC_REAL_16 *,
- const int *, const GFC_REAL_16 *, const int *,
- const GFC_REAL_16 *, GFC_REAL_16 *, const int *,
- int, int);
+ const int *, const GFC_REAL_16 *, const GFC_REAL_16 *,
+ const int *, const GFC_REAL_16 *, const int *,
+ const GFC_REAL_16 *, GFC_REAL_16 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_r16 (gfc_array_r16 * const restrict retarray,
+extern void matmul_r16 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_r16);
@@ -80,11 +80,11 @@ export_proto(matmul_r16);
#ifdef HAVE_AVX
static void
-matmul_r16_avx (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_r16_avx (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_r16_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_r16_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
+matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_r16_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_r16_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
+matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_r16 (gfc_array_r16 * const restrict retarray,
+void matmul_r16 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_r16 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_r16 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_r16 (gfc_array_r16 * const restrict retarray,
+matmul_r16 (gfc_array_r16 * const restrict retarray,
gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_r17.c b/libgfortran/generated/matmul_r17.c
index 67ff8e6..7ab9f2f 100644
--- a/libgfortran/generated/matmul_r17.c
+++ b/libgfortran/generated/matmul_r17.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_REAL_17 *, const GFC_REAL_17 *,
- const int *, const GFC_REAL_17 *, const int *,
- const GFC_REAL_17 *, GFC_REAL_17 *, const int *,
- int, int);
+ const int *, const GFC_REAL_17 *, const GFC_REAL_17 *,
+ const int *, const GFC_REAL_17 *, const int *,
+ const GFC_REAL_17 *, GFC_REAL_17 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_r17 (gfc_array_r17 * const restrict retarray,
+extern void matmul_r17 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_r17);
@@ -80,11 +80,11 @@ export_proto(matmul_r17);
#ifdef HAVE_AVX
static void
-matmul_r17_avx (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_r17_avx (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_r17_avx (gfc_array_r17 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_r17_avx2 (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx2 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_r17_avx2 (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx2 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_r17_avx2 (gfc_array_r17 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_r17_avx512f (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx512f (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_r17_avx512f (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx512f (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_r17_avx512f (gfc_array_r17 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_r17_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_r17_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray,
+matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_r17_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_r17_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_r17_vanilla (gfc_array_r17 * const restrict retarray,
+matmul_r17_vanilla (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_r17_vanilla (gfc_array_r17 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_r17 (gfc_array_r17 * const restrict retarray,
+void matmul_r17 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_r17 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_r17 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_r17 (gfc_array_r17 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_r17 (gfc_array_r17 * const restrict retarray,
+matmul_r17 (gfc_array_r17 * const restrict retarray,
gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c
index f1df577..8117af3 100644
--- a/libgfortran/generated/matmul_r4.c
+++ b/libgfortran/generated/matmul_r4.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_REAL_4 *, const GFC_REAL_4 *,
- const int *, const GFC_REAL_4 *, const int *,
- const GFC_REAL_4 *, GFC_REAL_4 *, const int *,
- int, int);
+ const int *, const GFC_REAL_4 *, const GFC_REAL_4 *,
+ const int *, const GFC_REAL_4 *, const int *,
+ const GFC_REAL_4 *, GFC_REAL_4 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_r4 (gfc_array_r4 * const restrict retarray,
+extern void matmul_r4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_r4);
@@ -80,11 +80,11 @@ export_proto(matmul_r4);
#ifdef HAVE_AVX
static void
-matmul_r4_avx (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_r4_avx (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_r4_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_r4_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
+matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_r4_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_r4_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
+matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_r4 (gfc_array_r4 * const restrict retarray,
+void matmul_r4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_r4 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_r4 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_r4 (gfc_array_r4 * const restrict retarray,
+matmul_r4 (gfc_array_r4 * const restrict retarray,
gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c
index ddfe0a7..d05dede 100644
--- a/libgfortran/generated/matmul_r8.c
+++ b/libgfortran/generated/matmul_r8.c
@@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const GFC_REAL_8 *, const GFC_REAL_8 *,
- const int *, const GFC_REAL_8 *, const int *,
- const GFC_REAL_8 *, GFC_REAL_8 *, const int *,
- int, int);
+ const int *, const GFC_REAL_8 *, const GFC_REAL_8 *,
+ const int *, const GFC_REAL_8 *, const int *,
+ const GFC_REAL_8 *, GFC_REAL_8 *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_r8 (gfc_array_r8 * const restrict retarray,
+extern void matmul_r8 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_r8);
@@ -80,11 +80,11 @@ export_proto(matmul_r8);
#ifdef HAVE_AVX
static void
-matmul_r8_avx (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static void
-matmul_r8_avx (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -649,11 +649,11 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
#ifdef HAVE_AVX2
static void
-matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
-matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1218,11 +1218,11 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
#ifdef HAVE_AVX512F
static void
-matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static void
-matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -1789,7 +1789,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
void
-matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto(matmul_r8_avx128_fma3);
@@ -1797,7 +1797,7 @@ internal_proto(matmul_r8_avx128_fma3);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
void
-matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
+matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto(matmul_r8_avx128_fma4);
@@ -1805,7 +1805,7 @@ internal_proto(matmul_r8_avx128_fma4);
/* Function to fall back to if there is no special processor-specific version. */
static void
-matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
+matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
@@ -2371,15 +2371,15 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_r8 (gfc_array_r8 * const restrict retarray,
+void matmul_r8 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) (gfc_array_r8 * const restrict retarray,
+ static void (*matmul_p) (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) (gfc_array_r8 * const restrict retarray,
+ void (*matmul_fn) (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
@@ -2447,7 +2447,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray,
#else /* Just the vanilla function. */
void
-matmul_r8 (gfc_array_r8 * const restrict retarray,
+matmul_r8 (gfc_array_r8 * const restrict retarray,
gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index faf57a3..aaa9222 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -403,6 +403,13 @@ typedef GFC_ARRAY_DESCRIPTOR (index_type) gfc_array_index_type;
#ifdef HAVE_GFC_INTEGER_16
typedef GFC_ARRAY_DESCRIPTOR (GFC_INTEGER_16) gfc_array_i16;
#endif
+typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_1) gfc_array_m1;
+typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_2) gfc_array_m2;
+typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_4) gfc_array_m4;
+typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_8) gfc_array_m8;
+#ifdef HAVE_GFC_UINTEGER_16
+typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_16) gfc_array_m16;
+#endif
typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_4) gfc_array_r4;
typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_8) gfc_array_r8;
#ifdef HAVE_GFC_REAL_10
diff --git a/libgfortran/m4/iparm.m4 b/libgfortran/m4/iparm.m4
index b474620..0c4c76c 100644
--- a/libgfortran/m4/iparm.m4
+++ b/libgfortran/m4/iparm.m4
@@ -4,7 +4,7 @@ dnl This file is part of the GNU Fortran 95 Runtime Library (libgfortran)
dnl Distributed under the GNU GPL with exception. See COPYING for details.
dnl M4 macro file to get type names from filenames
define(get_typename2, `GFC_$1_$2')dnl
-define(get_typename, `get_typename2(ifelse($1,i,INTEGER,ifelse($1,r,REAL,ifelse($1,l,LOGICAL,ifelse($1,c,COMPLEX,ifelse($1,s,UINTEGER,unknown))))),`$2')')dnl
+define(get_typename, `get_typename2(ifelse($1,i,INTEGER,ifelse($1,r,REAL,ifelse($1,l,LOGICAL,ifelse($1,c,COMPLEX,ifelse($1,m,UINTEGER,ifelse($1,s,UINTEGER,unknown)))))),`$2')')dnl
define(get_arraytype, `gfc_array_$1$2')dnl
define(define_type, `dnl
ifelse(regexp($2,`^[0-9]'),-1,`dnl
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4
index 7fc1f5f..cd804e8 100644
--- a/libgfortran/m4/matmul.m4
+++ b/libgfortran/m4/matmul.m4
@@ -28,6 +28,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <assert.h>'
include(iparm.m4)dnl
+ifelse(index(rtype_name,`GFC_INTEGER'),`0',dnl
+define(`rtype_name',patsubst(rtype_name,`GFC_INTEGER',`GFC_UINTEGER'))dnl
+define(`rtype',patsubst(rtype,`gfc_array_i',`gfc_array_m')))dnl
`#if defined (HAVE_'rtype_name`)
@@ -36,10 +39,10 @@ include(iparm.m4)dnl
matrices. */
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
- const int *, const 'rtype_name` *, const 'rtype_name` *,
- const int *, const 'rtype_name` *, const int *,
- const 'rtype_name` *, 'rtype_name` *, const int *,
- int, int);
+ const int *, const 'rtype_name` *, const 'rtype_name` *,
+ const int *, const 'rtype_name` *, const int *,
+ const 'rtype_name` *, 'rtype_name` *, const int *,
+ int, int);
/* The order of loops is different in the case of plain matrix
multiplication C=MATMUL(A,B), and in the frequent special case where
@@ -70,7 +73,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *,
see if there is a way to perform the matrix multiplication by a call
to the BLAS gemm function. */
-extern void matmul_'rtype_code` ('rtype` * const restrict retarray,
+extern void matmul_'rtype_code` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
export_proto(matmul_'rtype_code`);
@@ -82,7 +85,7 @@ export_proto(matmul_'rtype_code`);
#ifdef HAVE_AVX
'define(`matmul_name',`matmul_'rtype_code`_avx')dnl
`static void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
static' include(matmul_internal.m4)dnl
@@ -91,7 +94,7 @@ static' include(matmul_internal.m4)dnl
#ifdef HAVE_AVX2
'define(`matmul_name',`matmul_'rtype_code`_avx2')dnl
`static void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static' include(matmul_internal.m4)dnl
@@ -100,7 +103,7 @@ static' include(matmul_internal.m4)dnl
#ifdef HAVE_AVX512F
'define(`matmul_name',`matmul_'rtype_code`_avx512f')dnl
`static void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
static' include(matmul_internal.m4)dnl
@@ -111,7 +114,7 @@ static' include(matmul_internal.m4)dnl
#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl
`void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
internal_proto('matmul_name`);
@@ -120,7 +123,7 @@ internal_proto('matmul_name`);
#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl
`void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
internal_proto('matmul_name`);
@@ -134,15 +137,15 @@ internal_proto('matmul_name`);
/* Currently, this is i386 only. Adjust for other architectures. */
-void matmul_'rtype_code` ('rtype` * const restrict retarray,
+void matmul_'rtype_code` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{
- static void (*matmul_p) ('rtype` * const restrict retarray,
+ static void (*matmul_p) ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
- void (*matmul_fn) ('rtype` * const restrict retarray,
+ void (*matmul_fn) ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm);
diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4
index 0e96207..20b1a48 100644
--- a/libgfortran/m4/matmul_internal.m4
+++ b/libgfortran/m4/matmul_internal.m4
@@ -1,5 +1,5 @@
`void
-'matmul_name` ('rtype` * const restrict retarray,
+'matmul_name` ('rtype` * const restrict retarray,
'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
int blas_limit, blas_call gemm)
{