Use a single comparison for index-based alias checks

This patch rewrites the index-based alias checks to use conditions of the form: (unsigned T) (a - b + bias) <= limit E.g. before the patch: struct s { int x[100]; }; void f1 (struct s *s1, int a, int b) { for (int i = 0; i < 32; ++i) s1->x[i + a] += s1->x[i + b]; } used: add w3, w1, 3 cmp w3, w2 add w3, w2, 3 ccmp w1, w3, 0, ge ble .L2 whereas after the patch it uses: sub w3, w1, w2 add w3, w3, 3 cmp w3, 6 bls .L2 The patch also fixes the seg_len1 and seg_len2 negation for cases in which seg_len is a "negative unsigned" value narrower than 64 bits, like it is for 32-bit targets. Previously we'd end up with values like 0xffffffff000000001 instead of 1. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-data-ref.c (create_intersect_range_checks_index): Rewrite the index tests to have the form (unsigned T) (B - A + bias) <= limit. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-18.c: New test. * gcc.dg/vect/vect-alias-check-19.c: Likewise. * gcc.dg/vect/vect-alias-check-20.c: Likewise. From-SVN: r278354
author: Richard Sandiford <richard.sandiford@arm.com> 2019-11-16 11:43:31 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2019-11-16 11:43:31 +0000
commit: f9d6338bd15ce1fae36bf25d3a0545e9678ddc58 (patch)
tree: df3f266e8905b2bb9efece9a71af140fced56abb /gcc/tree-data-ref.c
parent: b4d1b635737a4780e5be247f8be9550eaf83dae5 (diff)
download: gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.zip
gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.tar.gz
gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.tar.bz2
1 files changed, 92 insertions, 51 deletions
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 527cb5e..4dfa334 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -1743,7 +1743,9 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
 
    We can create expression based on index rather than address:
 
-     (i_0 + 4 < j_0 || j_0 + 4 < i_0)
+     (unsigned) (i_0 - j_0 + 3) <= 6
+
+   i.e. the indices are less than 4 apart.
 
    Note evolution step of index needs to be considered in comparison.  */
 
@@ -1780,15 +1782,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
   if (neg_step)
     {
       abs_step = -abs_step;
-      seg_len1 = -seg_len1;
-      seg_len2 = -seg_len2;
-    }
-  else
-    {
-      /* Include the access size in the length, so that we only have one
-	 tree addition below.  */
-      seg_len1 += dr_a.access_size;
-      seg_len2 += dr_b.access_size;
+      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
+      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
     }
 
   /* Infer the number of iterations with which the memory segment is accessed
@@ -1802,16 +1797,13 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
     return false;
 
-  poly_uint64 niter_access1 = 0, niter_access2 = 0;
-  if (neg_step)
-    {
-      /* Divide each access size by the byte step, rounding up.  */
-      if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
-			    abs_step, &niter_access1)
-	  || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
-			       abs_step, &niter_access2))
-	return false;
-    }
+  /* Divide each access size by the byte step, rounding up.  */
+  poly_uint64 niter_access1, niter_access2;
+  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
+			abs_step, &niter_access1)
+      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+			   abs_step, &niter_access2))
+    return false;
 
   unsigned int i;
   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
@@ -1851,38 +1843,87 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
 	 index of data reference.  Like segment length, index length is
 	 linear function of the number of iterations with index_step as
 	 the coefficient, i.e, niter_len * idx_step.  */
-      tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
-				   build_int_cst (TREE_TYPE (min1),
-						  niter_len1));
-      tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
-				   build_int_cst (TREE_TYPE (min2),
-						  niter_len2));
-      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
-      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
-      /* Adjust ranges for negative step.  */
+      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
+						  SIGNED);
       if (neg_step)
-	{
-	  /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
-	  std::swap (min1, max1);
-	  std::swap (min2, max2);
-
-	  /* As with the lengths just calculated, we've measured the access
-	     sizes in iterations, so multiply them by the index step.  */
-	  tree idx_access1
-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
-			   build_int_cst (TREE_TYPE (min1), niter_access1));
-	  tree idx_access2
-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
-			   build_int_cst (TREE_TYPE (min2), niter_access2));
-
-	  /* MINUS_EXPR because the above values are negative.  */
-	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
-	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
-	}
-      tree part_cond_expr
-	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
-	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
+	abs_idx_step = -abs_idx_step;
+      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
+      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
+      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
+      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
+
+      gcc_assert (known_ge (idx_len1, 0)
+		  && known_ge (idx_len2, 0)
+		  && known_ge (idx_access1, 0)
+		  && known_ge (idx_access2, 0));
+
+      /* Each access has the following pattern, with lengths measured
+	 in units of INDEX:
+
+	      <-- idx_len -->
+	      <--- A: -ve step --->
+	      +-----+-------+-----+-------+-----+
+	      | n-1 | ..... |  0  | ..... | n-1 |
+	      +-----+-------+-----+-------+-----+
+			    <--- B: +ve step --->
+			    <-- idx_len -->
+			    |
+			   min
+
+	 where "n" is the number of scalar iterations covered by the segment
+	 and where each access spans idx_access units.
+
+	 A is the range of bytes accessed when the step is negative,
+	 B is the range when the step is positive.
+
+	 When checking for general overlap, we need to test whether
+	 the range:
+
+	   [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
+
+	 overlaps:
+
+	   [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
+
+	 where:
+
+	    low_offsetN = +ve step ? 0 : -idx_lenN;
+	   high_offsetN = +ve step ? idx_lenN : 0;
+
+	 This is equivalent to testing whether:
+
+	   min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
+	   && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
+
+	 Converting this into a single test, there is an overlap if:
+
+	   0 <= min2 - min1 + bias <= limit
+
+	 where  bias = high_offset2 + idx_access2 - 1 - low_offset1
+	       limit = (high_offset1 - low_offset1 + idx_access1 - 1)
+		     + (high_offset2 - low_offset2 + idx_access2 - 1)
+	  i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
+
+	 Combining the tests requires limit to be computable in an unsigned
+	 form of the index type; if it isn't, we fall back to the usual
+	 pointer-based checks.  */
+      poly_offset_int limit = (idx_len1 + idx_access1 - 1
+			       + idx_len2 + idx_access2 - 1);
+      tree utype = unsigned_type_for (TREE_TYPE (min1));
+      if (!wi::fits_to_tree_p (limit, utype))
+	return false;
+
+      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
+      poly_offset_int high_offset2 = neg_step ? 0 : idx_len2;
+      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
+
+      tree subject = fold_build2 (MINUS_EXPR, utype,
+				  fold_convert (utype, min2),
+				  fold_convert (utype, min1));
+      subject = fold_build2 (PLUS_EXPR, utype, subject,
+			     wide_int_to_tree (utype, bias));
+      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
+					 wide_int_to_tree (utype, limit));
       if (*cond_expr)
 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
 				  *cond_expr, part_cond_expr);
author	Richard Sandiford <richard.sandiford@arm.com>	2019-11-16 11:43:31 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2019-11-16 11:43:31 +0000
commit	f9d6338bd15ce1fae36bf25d3a0545e9678ddc58 (patch)
tree	df3f266e8905b2bb9efece9a71af140fced56abb /gcc/tree-data-ref.c
parent	b4d1b635737a4780e5be247f8be9550eaf83dae5 (diff)
download	gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.zip gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.tar.gz gcc-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58.tar.bz2