Make-lang.in (fortran/trans-resolve.o): Depend on fortran/dependency.h.

gcc/fortran/ * Make-lang.in (fortran/trans-resolve.o): Depend on fortran/dependency.h. * gfortran.h (gfc_expr): Add an "inline_noncopying_intrinsic" flag. * dependency.h (gfc_get_noncopying_intrinsic_argument): Declare. (gfc_check_fncall_dependency): Change prototype. * dependency.c (gfc_get_noncopying_intrinsic_argument): New function. (gfc_check_argument_var_dependency): New function, split from gfc_check_fncall_dependency. (gfc_check_argument_dependency): New function. (gfc_check_fncall_dependency): Replace the expression parameter with separate symbol and argument list parameters. Generalize the function to handle dependencies for any type of expression, not just variables. Accept a further argument giving the intent of the expression being tested. Ignore intent(in) arguments if that expression is also intent(in). * resolve.c: Include dependency.h. (find_noncopying_intrinsics): New function. (resolve_function, resolve_call): Call it on success. * trans-array.h (gfc_conv_array_transpose): Declare. (gfc_check_fncall_dependency): Remove prototype. * trans-array.c (gfc_conv_array_transpose): New function. * trans-intrinsic.c (gfc_conv_intrinsic_function): Don't use the libcall handling if the expression is to be evaluated inline. Add a case for handling inline transpose()s. * trans-expr.c (gfc_trans_arrayfunc_assign): Adjust for the new interface provided by gfc_check_fncall_dependency. libgfortran/ * m4/matmul.m4: Use a different order in the special case of a transposed first argument. * generated/matmul_c4.c, generated/matmul_c8.c, generated/matmul_c10.c, * generated/matmul_c16.c, generated/matmul_i4.c, generated/matmul_i8.c, * generated/matmul_i10.c, generated/matmul_r4.c, generated/matmul_r8.c * generated/matmul_r10.c, generated/matmul_r16.c: Regenerated. Co-Authored-By: Victor Leikehman <LEI@il.ibm.com> From-SVN: r108459
author: Richard Sandiford <richard@codesourcery.com> 2005-12-13 05:23:12 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2005-12-13 05:23:12 +0000
commit: 1524f80b1ceeda3c293142f4e370616be6dcf2cf (patch)
tree: 8b430f1a509f3fecd45aafac44d2707a38eb02d2 /libgfortran/m4
parent: 264c41eda5b1d3b073053da88f7e757635269cc3 (diff)
download: gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.zip
gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.tar.gz
gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.tar.bz2
1 files changed, 63 insertions, 8 deletions
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4
index 730e4d7..f488f5e 100644
--- a/libgfortran/m4/matmul.m4
+++ b/libgfortran/m4/matmul.m4
@@ -37,16 +37,29 @@ include(iparm.m4)dnl
 
 `#if defined (HAVE_'rtype_name`)'
 
-/* This is a C version of the following fortran pseudo-code. The key
-   point is the loop order -- we access all arrays column-first, which
-   improves the performance enough to boost galgel spec score by 50%.
+/* The order of loops is different in the case of plain matrix
+   multiplication C=MATMUL(A,B), and in the frequent special case where
+   the argument A is the temporary result of a TRANSPOSE intrinsic:
+   C=MATMUL(TRANSPOSE(A),B).  Transposed temporaries are detected by
+   looking at their strides.
+
+   The equivalent Fortran pseudo-code is:
 
    DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
-   C = 0
-   DO J=1,N
-     DO K=1,COUNT
+   IF (.NOT.IS_TRANSPOSED(A)) THEN
+     C = 0
+     DO J=1,N
+       DO K=1,COUNT
+         DO I=1,M
+           C(I,J) = C(I,J)+A(I,K)*B(K,J)
+   ELSE
+     DO J=1,N
        DO I=1,M
-         C(I,J) = C(I,J)+A(I,K)*B(K,J)
+         S = 0
+         DO K=1,COUNT
+           S = S+A(I,K)+B(K,J)
+         C(I,J) = S
+   ENDIF
 */
 
 extern void matmul_`'rtype_code (rtype * const restrict retarray, 
@@ -206,7 +219,28 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
 	    }
 	}
     }
-  else
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      const rtype_name *restrict abase_x;
+      const rtype_name *restrict bbase_y;
+      rtype_name *restrict dest_y;
+      rtype_name s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (rtype_name) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n] * bbase_y[n];
+	      dest_y[x] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
     {
       for (y = 0; y < ycount; y++)
 	for (x = 0; x < xcount; x++)
@@ -218,6 +252,27 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
 	    /* dest[x,y] += a[x,n] * b[n,y] */
 	    dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
     }
+  else
+    {
+      const rtype_name *restrict abase_x;
+      const rtype_name *restrict bbase_y;
+      rtype_name *restrict dest_y;
+      rtype_name s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (rtype_name) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
 }
 
 #endif
author	Richard Sandiford <richard@codesourcery.com>	2005-12-13 05:23:12 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2005-12-13 05:23:12 +0000
commit	1524f80b1ceeda3c293142f4e370616be6dcf2cf (patch)
tree	8b430f1a509f3fecd45aafac44d2707a38eb02d2 /libgfortran/m4
parent	264c41eda5b1d3b073053da88f7e757635269cc3 (diff)
download	gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.zip gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.tar.gz gcc-1524f80b1ceeda3c293142f4e370616be6dcf2cf.tar.bz2