re PR middle-end/90796 (GCC: O2 vs O3 output differs on simple test)

Fix PR middle-end/90796 PR middle-end/90796 * gimple-loop-jam.c (any_access_function_variant_p): New function. (adjust_unroll_factor): Use it to constrain safety, new parameter. (tree_loop_unroll_and_jam): Adjust call and profitable unroll factor. testsuite/ * gcc.dg/unroll-and-jam.c: Add three invalid and one valid case. From-SVN: r277287
author: Michael Matz <matz@suse.de> 2019-10-22 12:25:03 +0000
committer: Michael Matz <matz@gcc.gnu.org> 2019-10-22 12:25:03 +0000
commit: 92781ff1da896b2f92b1dcc06953be493371bf21 (patch)
tree: dc1d1782a0e0eb5a953e17ec69cebac7ecb7c6d7 /gcc/gimple-loop-jam.c
parent: 9107d6526b938eba8168025c0d90d06ad3634e69 (diff)
download: gcc-92781ff1da896b2f92b1dcc06953be493371bf21.zip
gcc-92781ff1da896b2f92b1dcc06953be493371bf21.tar.gz
gcc-92781ff1da896b2f92b1dcc06953be493371bf21.tar.bz2
1 files changed, 74 insertions, 7 deletions
diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
index 11153f5..899653b 100644
--- a/gcc/gimple-loop-jam.c
+++ b/gcc/gimple-loop-jam.c
@@ -360,16 +360,33 @@ fuse_loops (class loop *loop)
   rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
 }
 
+/* Return true if any of the access functions for dataref A
+   isn't invariant with respect to loop LOOP_NEST.  */
+static bool
+any_access_function_variant_p (const struct data_reference *a,
+			       const class loop *loop_nest)
+{
+  unsigned int i;
+  vec<tree> fns = DR_ACCESS_FNS (a);
+  tree t;
+
+  FOR_EACH_VEC_ELT (fns, i, t)
+    if (!evolution_function_is_invariant_p (t, loop_nest->num))
+      return true;
+
+  return false;
+}
+
 /* Returns true if the distance in DDR can be determined and adjusts
    the unroll factor in *UNROLL to make unrolling valid for that distance.
-   Otherwise return false.
+   Otherwise return false.  DDR is with respect to the outer loop of INNER.
 
    If this data dep can lead to a removed memory reference, increment
    *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
    for this to happen.  */
 
 static bool
-adjust_unroll_factor (struct data_dependence_relation *ddr,
+adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
 		      unsigned *unroll, unsigned *profit_unroll,
 		      unsigned *removed)
 {
@@ -392,9 +409,59 @@ adjust_unroll_factor (struct data_dependence_relation *ddr,
 	    gcc_unreachable ();
 	  else if ((unsigned)dist >= *unroll)
 	    ;
-	  else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
-		   || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
-		       && dist > 0))
+	  else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
+	    {
+	      /* We have (a,0) with a < N, so this will be transformed into
+	         (0,0) after unrolling by N.  This might potentially be a
+		 problem, if it's not a read-read dependency.  */
+	      if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
+		;
+	      else
+		{
+		  /* So, at least one is a write, and we might reduce the
+		     distance vector to (0,0).  This is still no problem
+		     if both data-refs are affine with respect to the inner
+		     loops.  But if one of them is invariant with respect
+		     to an inner loop our reordering implicit in loop fusion
+		     corrupts the program, as our data dependences don't
+		     capture this.  E.g. for:
+		       for (0 <= i < n)
+		         for (0 <= j < m)
+		           a[i][0] = a[i+1][0] + 2;    // (1)
+		           b[i][j] = b[i+1][j] + 2;    // (2)
+		     the distance vector for both statements is (-1,0),
+		     but exchanging the order for (2) is okay, while
+		     for (1) it is not.  To see this, write out the original
+		     accesses (assume m is 2):
+		       a i j original
+		       0 0 0 r a[1][0] b[1][0]
+		       1 0 0 w a[0][0] b[0][0]
+		       2 0 1 r a[1][0] b[1][1]
+		       3 0 1 w a[0][0] b[0][1]
+		       4 1 0 r a[2][0] b[2][0]
+		       5 1 0 w a[1][0] b[1][0]
+		     after unroll-by-2 and fusion the accesses are done in
+		     this order (from column a): 0,1, 4,5, 2,3, i.e. this:
+		       a i j transformed
+		       0 0 0 r a[1][0] b[1][0]
+		       1 0 0 w a[0][0] b[0][0]
+		       4 1 0 r a[2][0] b[2][0]
+		       5 1 0 w a[1][0] b[1][0]
+		       2 0 1 r a[1][0] b[1][1]  
+		       3 0 1 w a[0][0] b[0][1]
+		     Note how access 2 accesses the same element as access 5
+		     for array 'a' but not for array 'b'.  */
+		  if (any_access_function_variant_p (DDR_A (ddr), inner)
+		      && any_access_function_variant_p (DDR_B (ddr), inner))
+		    ;
+		  else
+		    /* And if any dataref of this pair is invariant with
+		       respect to the inner loop, we have no chance than
+		       to reduce the unroll factor.  */
+		    *unroll = dist;
+		}
+	    }
+	  else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
 	    ;
 	  else
 	    *unroll = dist;
@@ -486,7 +553,7 @@ tree_loop_unroll_and_jam (void)
 	  /* Now check the distance vector, for determining a sensible
 	     outer unroll factor, and for validity of merging the inner
 	     loop copies.  */
-	  if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
+	  if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
 				     &removed))
 	    {
 	      /* Couldn't get the distance vector.  For two reads that's
@@ -506,7 +573,7 @@ tree_loop_unroll_and_jam (void)
 	 to ignore all profitability concerns and apply the transformation
 	 always.  */
       if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
-	profit_unroll = 2;
+	profit_unroll = MAX(2, profit_unroll);
       else if (removed * 100 / datarefs.length ()
 	  < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
 	profit_unroll = 1;
author	Michael Matz <matz@suse.de>	2019-10-22 12:25:03 +0000
committer	Michael Matz <matz@gcc.gnu.org>	2019-10-22 12:25:03 +0000
commit	92781ff1da896b2f92b1dcc06953be493371bf21 (patch)
tree	dc1d1782a0e0eb5a953e17ec69cebac7ecb7c6d7 /gcc/gimple-loop-jam.c
parent	9107d6526b938eba8168025c0d90d06ad3634e69 (diff)
download	gcc-92781ff1da896b2f92b1dcc06953be493371bf21.zip gcc-92781ff1da896b2f92b1dcc06953be493371bf21.tar.gz gcc-92781ff1da896b2f92b1dcc06953be493371bf21.tar.bz2