From 4cf70c20cb10acd6fb1016611d05540728176b60 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 10 Dec 2020 12:10:00 +0000
Subject: data-ref: Rework integer handling in split_constant_offset [PR98069]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR98069 is about a case in which split_constant_offset miscategorises
an expression of the form:

  int foo;
  …
  POINTER_PLUS_EXPR<base, (sizetype)(INT_MIN - foo) * size>

as:

  base: base
  offset: (sizetype) (-foo) * size
  init: INT_MIN * size

“-foo” overflows when “foo” is INT_MIN, whereas the original expression
didn't overflow in that case.

As discussed in the PR trail, we could simply ignore the fact that
int overflow is undefined and treat it as a wrapping type, but that
is likely to pessimise quite a few cases.

This patch instead reworks split_constant_offset so that:

- it treats integer operations as having an implicit cast to sizetype
- for integer operations, the returned VAR has type sizetype

In other words, the problem becomes to express:

  (sizetype) (OP0 CODE OP1)

as:

  VAR:sizetype + (sizetype) OFF:ssizetype

The top-level integer split_constant_offset will (usually) be a sizetype
POINTER_PLUS operand, so the extra cast to sizetype disappears.  But adding
the cast allows the conversion handling to defer a lot of the difficult
cases to the recursive split_constant_offset call, which can detect
overflow on individual operations.

The net effect is to analyse the access above as:

  base: base
  offset: -(sizetype) foo * size
  init: INT_MIN * size

See the comments in the patch for more details.

gcc/
	PR tree-optimization/98069
	* tree-data-ref.c (compute_distributive_range): New function.
	(nop_conversion_for_offset_p): Likewise.
	(split_constant_offset): In the internal overload, treat integer
	expressions as having an implicit cast to sizetype and express
	them accordingly.  Pass back the range of the original (uncast)
	expression in a new range parameter.
	(split_constant_offset_1): Likewise.  Rework the handling of
	conversions to account for the implicit sizetype casts.
---
 gcc/tree-data-ref.c | 427 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 330 insertions(+), 97 deletions(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index e8308ce..926553b 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -97,6 +97,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "ssa.h"
 #include "internal-fn.h"
+#include "range-op.h"
+#include "vr-values.h"
 
 static struct datadep_stats
 {
@@ -581,26 +583,196 @@ debug_ddrs (vec<ddr_p> ddrs)
   dump_ddrs (stderr, ddrs);
 }
 
+/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
+   OP0 CODE OP1, where:
+
+   - OP0 CODE OP1 has integral type TYPE
+   - the range of OP0 is given by OP0_RANGE and
+   - the range of OP1 is given by OP1_RANGE.
+
+   Independently of RESULT_RANGE, try to compute:
+
+     DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
+	     - (sizetype) (OP0 CODE OP1)
+
+   as a constant and subtract DELTA from the ssizetype constant in *OFF.
+   Return true on success, or false if DELTA is not known at compile time.
+
+   Truncation and sign changes are known to distribute over CODE, i.e.
+
+     (itype) (A CODE B) == (itype) A CODE (itype) B
+
+   for any integral type ITYPE whose precision is no greater than the
+   precision of A and B.  */
+
+static bool
+compute_distributive_range (tree type, value_range &op0_range,
+			    tree_code code, value_range &op1_range,
+			    tree *off, value_range *result_range)
+{
+  gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
+  if (result_range)
+    {
+      range_operator *op = range_op_handler (code, type);
+      op->fold_range (*result_range, type, op0_range, op1_range);
+    }
+
+  /* The distributive property guarantees that if TYPE is no narrower
+     than SIZETYPE,
+
+       (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
+
+     and so we can treat DELTA as zero.  */
+  if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
+    return true;
+
+  /* If overflow is undefined, we can assume that:
+
+       X == (ssizetype) OP0 CODE (ssizetype) OP1
+
+     is within the range of TYPE, i.e.:
+
+       X == (ssizetype) (TYPE) X
+
+     Distributing the (TYPE) truncation over X gives:
+
+       X == (ssizetype) (OP0 CODE OP1)
+
+     Casting both sides to sizetype and distributing the sizetype cast
+     over X gives:
+
+       (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
+
+     and so we can treat DELTA as zero.  */
+  if (TYPE_OVERFLOW_UNDEFINED (type))
+    return true;
+
+  /* Compute the range of:
+
+       (ssizetype) OP0 CODE (ssizetype) OP1
+
+     The distributive property guarantees that this has the same bitpattern as:
+
+       (sizetype) OP0 CODE (sizetype) OP1
+
+     but its range is more conducive to analysis.  */
+  range_cast (op0_range, ssizetype);
+  range_cast (op1_range, ssizetype);
+  value_range wide_range;
+  range_operator *op = range_op_handler (code, ssizetype);
+  bool saved_flag_wrapv = flag_wrapv;
+  flag_wrapv = 1;
+  op->fold_range (wide_range, ssizetype, op0_range, op1_range);
+  flag_wrapv = saved_flag_wrapv;
+  if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
+    return false;
+
+  wide_int lb = wide_range.lower_bound ();
+  wide_int ub = wide_range.upper_bound ();
+
+  /* Calculate the number of times that each end of the range overflows or
+     underflows TYPE.  We can only calculate DELTA if the numbers match.  */
+  unsigned int precision = TYPE_PRECISION (type);
+  if (!TYPE_UNSIGNED (type))
+    {
+      wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
+      lb -= type_min;
+      ub -= type_min;
+    }
+  wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
+  lb &= upper_bits;
+  ub &= upper_bits;
+  if (lb != ub)
+    return false;
+
+  /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
+     negative values indicating underflow.  The low PRECISION bits of LB
+     are clear, so DELTA is therefore LB (== UB).  */
+  *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
+  return true;
+}
+
+/* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
+   given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
+   FROM_TYPE are integral types.  */
+
+static bool
+nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
+{
+  gcc_assert (INTEGRAL_TYPE_P (to_type)
+	      && INTEGRAL_TYPE_P (from_type)
+	      && !TYPE_OVERFLOW_TRAPS (to_type)
+	      && !TYPE_OVERFLOW_TRAPS (from_type));
+
+  /* Converting to something no narrower than sizetype and then to sizetype
+     is equivalent to converting directly to sizetype.  */
+  if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
+    return true;
+
+  /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
+  if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
+      && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
+    return true;
+
+  /* For narrowing conversions, we could in principle test whether
+     the bits in FROM_TYPE but not in TO_TYPE have a fixed value
+     and apply a constant adjustment.
+
+     For other conversions (which involve a sign change) we could
+     check that the signs are always equal, and apply a constant
+     adjustment if the signs are negative.
+
+     However, both cases should be rare.  */
+  return range_fits_type_p (&range, TYPE_PRECISION (to_type),
+			    TYPE_SIGN (to_type));
+}
+
 static void
-split_constant_offset (tree exp, tree *var, tree *off,
+split_constant_offset (tree type, tree *var, tree *off,
+		       value_range *result_range,
 		       hash_map<tree, std::pair<tree, tree> > &cache,
 		       unsigned *limit);
 
-/* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
-   (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
-   constant of type ssizetype, and returns true.  If we cannot do this
-   with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
-   is returned.  */
+/* Helper function for split_constant_offset.  If TYPE is a pointer type,
+   try to express OP0 CODE OP1 as:
+
+     POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+   where:
+
+   - *VAR has type TYPE
+   - *OFF is a constant of type ssizetype.
+
+   If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
+
+     *VAR + (sizetype) *OFF
+
+   where:
+
+   - *VAR has type sizetype
+   - *OFF is a constant of type ssizetype.
+
+   In both cases, OP0 CODE OP1 has type TYPE.
+
+   Return true on success.  A false return value indicates that we can't
+   do better than set *OFF to zero.
+
+   When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
+   if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
+
+   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
+   visited.  LIMIT counts down the number of SSA names that we are
+   allowed to process before giving up.  */
 
 static bool
 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
-			 tree *var, tree *off,
+			 tree *var, tree *off, value_range *result_range,
 			 hash_map<tree, std::pair<tree, tree> > &cache,
 			 unsigned *limit)
 {
   tree var0, var1;
   tree off0, off1;
-  enum tree_code ocode = code;
+  value_range op0_range, op1_range;
 
   *var = NULL_TREE;
   *off = NULL_TREE;
@@ -608,35 +780,42 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
   switch (code)
     {
     case INTEGER_CST:
-      *var = build_int_cst (type, 0);
+      *var = size_int (0);
       *off = fold_convert (ssizetype, op0);
+      if (result_range)
+	result_range->set (op0, op0);
       return true;
 
     case POINTER_PLUS_EXPR:
-      ocode = PLUS_EXPR;
-      /* FALLTHROUGH */
+      split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
+      split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
+      *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
+      *off = size_binop (PLUS_EXPR, off0, off1);
+      return true;
+
     case PLUS_EXPR:
     case MINUS_EXPR:
-      if (TREE_CODE (op1) == INTEGER_CST)
-	{
-	  split_constant_offset (op0, &var0, &off0, cache, limit);
-	  *var = var0;
-	  *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
-	  return true;
-	}
-      split_constant_offset (op0, &var0, &off0, cache, limit);
-      split_constant_offset (op1, &var1, &off1, cache, limit);
-      *var = fold_build2 (code, type, var0, var1);
-      *off = size_binop (ocode, off0, off1);
+      split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
+      split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
+      *off = size_binop (code, off0, off1);
+      if (!compute_distributive_range (type, op0_range, code, op1_range,
+				       off, result_range))
+	return false;
+      *var = fold_build2 (code, sizetype, var0, var1);
       return true;
 
     case MULT_EXPR:
       if (TREE_CODE (op1) != INTEGER_CST)
 	return false;
 
-      split_constant_offset (op0, &var0, &off0, cache, limit);
-      *var = fold_build2 (MULT_EXPR, type, var0, op1);
+      split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
+      op1_range.set (op1, op1);
       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
+      if (!compute_distributive_range (type, op0_range, code, op1_range,
+				       off, result_range))
+	return false;
+      *var = fold_build2 (MULT_EXPR, sizetype, var0,
+			  fold_convert (sizetype, op1));
       return true;
 
     case ADDR_EXPR:
@@ -658,13 +837,10 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 
 	if (poffset)
 	  {
-	    split_constant_offset (poffset, &poffset, &off1, cache, limit);
+	    split_constant_offset (poffset, &poffset, &off1, nullptr,
+				   cache, limit);
 	    off0 = size_binop (PLUS_EXPR, off0, off1);
-	    if (POINTER_TYPE_P (TREE_TYPE (base)))
-	      base = fold_build_pointer_plus (base, poffset);
-	    else
-	      base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
-				  fold_convert (TREE_TYPE (base), poffset));
+	    base = fold_build_pointer_plus (base, poffset);
 	  }
 
 	var0 = fold_convert (type, base);
@@ -723,6 +899,7 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 		  return false;
 		*var = e.first;
 		*off = e.second;
+		/* The caller sets the range in this case.  */
 		return true;
 	      }
 	    e = std::make_pair (op0, ssize_int (0));
@@ -736,72 +913,80 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 	var1 = gimple_assign_rhs2 (def_stmt);
 
 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
-					    var, off, cache, limit);
+					    var, off, nullptr, cache, limit);
 	if (res && use_cache)
 	  *cache.get (op0) = std::make_pair (*var, *off);
+	/* The caller sets the range in this case.  */
 	return res;
       }
     CASE_CONVERT:
       {
-	/* We must not introduce undefined overflow, and we must not change
-	   the value.  Hence we're okay if the inner type doesn't overflow
-	   to start with (pointer or signed), the outer type also is an
-	   integer or pointer and the outer precision is at least as large
-	   as the inner.  */
+	/* We can only handle the following conversions:
+
+	   - Conversions from one pointer type to another pointer type.
+
+	   - Conversions from one non-trapping integral type to another
+	     non-trapping integral type.  In this case, the recursive
+	     call makes sure that:
+
+	       (sizetype) OP0
+
+	     can be expressed as a sizetype operation involving VAR and OFF,
+	     and all we need to do is check whether:
+
+	       (sizetype) OP0 == (sizetype) (TYPE) OP0
+
+	   - Conversions from a non-trapping sizetype-size integral type to
+	     a like-sized pointer type.  In this case, the recursive call
+	     makes sure that:
+
+	       (sizetype) OP0 == *VAR + (sizetype) *OFF
+
+	     and we can convert that to:
+
+	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
+
+	   - Conversions from a sizetype-sized pointer type to a like-sized
+	     non-trapping integral type.  In this case, the recursive call
+	     makes sure that:
+
+	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+	     where the POINTER_PLUS and *VAR have the same precision as
+	     TYPE (and the same precision as sizetype).  Then:
+
+	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 	tree itype = TREE_TYPE (op0);
 	if ((POINTER_TYPE_P (itype)
 	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
-	    && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
-	    && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
+	    && (POINTER_TYPE_P (type)
+		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
+	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
+		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
+		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 	  {
-	    if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype)
-		&& (TYPE_PRECISION (type) > TYPE_PRECISION (itype)
-		    || TYPE_UNSIGNED (itype) != TYPE_UNSIGNED (type)))
+	    if (POINTER_TYPE_P (type))
+	      {
+		split_constant_offset (op0, var, off, nullptr, cache, limit);
+		*var = fold_convert (type, *var);
+	      }
+	    else if (POINTER_TYPE_P (itype))
+	      {
+		split_constant_offset (op0, var, off, nullptr, cache, limit);
+		*var = fold_convert (sizetype, *var);
+	      }
+	    else
 	      {
-		/* Split the unconverted operand and try to prove that
-		   wrapping isn't a problem.  */
-		tree tmp_var, tmp_off;
-		split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
-
-		/* See whether we have an known range [A, B] for tmp_var.  */
-		wide_int var_min, var_max;
-		signop sgn = TYPE_SIGN (itype);
-		if (TREE_CODE (tmp_var) == SSA_NAME)
+		split_constant_offset (op0, var, off, &op0_range,
+				       cache, limit);
+		if (!nop_conversion_for_offset_p (type, itype, op0_range))
+		  return false;
+		if (result_range)
 		  {
-		    value_range_kind vr_type
-		      = get_range_info (tmp_var, &var_min, &var_max);
-		    wide_int var_nonzero = get_nonzero_bits (tmp_var);
-		    if (intersect_range_with_nonzero_bits (vr_type, &var_min,
-							   &var_max,
-							   var_nonzero,
-							   sgn) != VR_RANGE)
-		      return false;
+		    *result_range = op0_range;
+		    range_cast (*result_range, type);
 		  }
-		else if (determine_value_range (tmp_var, &var_min, &var_max)
-			 != VR_RANGE)
-		  return false;
-
-		/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
-		   is known to be [A + TMP_OFF, B + TMP_OFF], with all
-		   operations done in ITYPE.  The addition must overflow
-		   at both ends of the range or at neither.  */
-		wi::overflow_type overflow[2];
-		unsigned int prec = TYPE_PRECISION (itype);
-		wide_int woff = wi::to_wide (tmp_off, prec);
-		wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
-		wi::add (var_max, woff, sgn, &overflow[1]);
-		if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
-		  return false;
-
-		/* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
-		widest_int diff = (widest_int::from (op0_min, sgn)
-				   - widest_int::from (var_min, sgn));
-		var0 = tmp_var;
-		*off = wide_int_to_tree (ssizetype, diff);
 	      }
-	    else
-	      split_constant_offset (op0, &var0, off, cache, limit);
-	    *var = fold_convert (type, var0);
 	    return true;
 	  }
 	return false;
@@ -812,33 +997,80 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
     }
 }
 
-/* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
-   will be ssizetype.  */
+/* If EXP has pointer type, try to express it as:
+
+     POINTER_PLUS <*VAR, (sizetype) *OFF>
+
+   where:
+
+   - *VAR has the same type as EXP
+   - *OFF is a constant of type ssizetype.
+
+   If EXP has an integral type, try to express (sizetype) EXP as:
+
+     *VAR + (sizetype) *OFF
+
+   where:
+
+   - *VAR has type sizetype
+   - *OFF is a constant of type ssizetype.
+
+   If EXP_RANGE is nonnull, set it to the range of EXP.
+
+   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
+   visited.  LIMIT counts down the number of SSA names that we are
+   allowed to process before giving up.  */
 
 static void
-split_constant_offset (tree exp, tree *var, tree *off,
+split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
 		       hash_map<tree, std::pair<tree, tree> > &cache,
 		       unsigned *limit)
 {
-  tree type = TREE_TYPE (exp), op0, op1, e, o;
+  tree type = TREE_TYPE (exp), op0, op1;
   enum tree_code code;
 
-  *var = exp;
-  *off = ssize_int (0);
+  code = TREE_CODE (exp);
+  if (exp_range)
+    {
+      *exp_range = type;
+      if (code == SSA_NAME)
+	{
+	  wide_int var_min, var_max;
+	  value_range_kind vr_kind = get_range_info (exp, &var_min, &var_max);
+	  wide_int var_nonzero = get_nonzero_bits (exp);
+	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
+						       &var_min, &var_max,
+						       var_nonzero,
+						       TYPE_SIGN (type));
+	  if (vr_kind == VR_RANGE)
+	    *exp_range = value_range (type, var_min, var_max);
+	}
+    }
 
-  if (tree_is_chrec (exp)
-      || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
-    return;
+  if (!tree_is_chrec (exp)
+      && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
+    {
+      extract_ops_from_tree (exp, &code, &op0, &op1);
+      if (split_constant_offset_1 (type, op0, code, op1, var, off,
+				   exp_range, cache, limit))
+	return;
+    }
 
-  code = TREE_CODE (exp);
-  extract_ops_from_tree (exp, &code, &op0, &op1);
-  if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
+  *var = exp;
+  if (INTEGRAL_TYPE_P (type))
+    *var = fold_convert (sizetype, *var);
+  *off = ssize_int (0);
+  if (exp_range && code != SSA_NAME)
     {
-      *var = e;
-      *off = o;
+      wide_int var_min, var_max;
+      if (determine_value_range (exp, &var_min, &var_max) == VR_RANGE)
+	*exp_range = value_range (type, var_min, var_max);
     }
 }
 
+/* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
+   type as EXP while OFF has type ssizetype.  */
+
 void
 split_constant_offset (tree exp, tree *var, tree *off)
 {
@@ -846,7 +1078,8 @@ split_constant_offset (tree exp, tree *var, tree *off)
   static hash_map<tree, std::pair<tree, tree> > *cache;
   if (!cache)
     cache = new hash_map<tree, std::pair<tree, tree> > (37);
-  split_constant_offset (exp, var, off, *cache, &limit);
+  split_constant_offset (exp, var, off, nullptr, *cache, &limit);
+  *var = fold_convert (TREE_TYPE (exp), *var);
   cache->empty ();
 }
 
-- 
cgit v1.1


From 99dee82307f1e163e150c9c810452979994047ce Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Mon, 4 Jan 2021 10:26:59 +0100
Subject: Update copyright years.

---
 gcc/tree-data-ref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 926553b..394470a 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -1,5 +1,5 @@
 /* Data references and dependences detectors.
-   Copyright (C) 2003-2020 Free Software Foundation, Inc.
+   Copyright (C) 2003-2021 Free Software Foundation, Inc.
    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
 
 This file is part of GCC.
-- 
cgit v1.1


From 2182274f510c180ea92a4f826a0f6cf5f1f55b66 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 14 Jan 2021 14:08:41 +0100
Subject: tree-optimization/98674 - improve dependence analysis

This improves dependence analysis on refs that access the same
array but with different typed but same sized accesses.  That's
obviously safe for the case of types that cannot have any
access function based off them.  For the testcase this is
signed short vs. unsigned short.

2021-01-14  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/98674
	* tree-data-ref.c (base_supports_access_fn_components_p): New.
	(initialize_data_dependence_relation): For two bases without
	possible access fns resort to type size equality when determining
	shape compatibility.

	* gcc.dg/vect/pr98674.c: New testcase.
---
 gcc/tree-data-ref.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 394470a..65fe6d5 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -1291,6 +1291,23 @@ access_fn_component_p (tree op)
     }
 }
 
+/* Returns whether BASE can have a access_fn_component_p with BASE
+   as base.  */
+
+static bool
+base_supports_access_fn_components_p (tree base)
+{
+  switch (TREE_CODE (TREE_TYPE (base)))
+    {
+    case COMPLEX_TYPE:
+    case ARRAY_TYPE:
+    case RECORD_TYPE:
+      return true;
+    default:
+      return false;
+    }
+}
+
 /* Determines the base object and the list of indices of memory reference
    DR, analyzed in LOOP and instantiated before NEST.  */
 
@@ -3272,8 +3289,13 @@ initialize_data_dependence_relation (struct data_reference *a,
 		      && full_seq.start_b + full_seq.length == num_dimensions_b
 		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
-		      && types_compatible_p (TREE_TYPE (base_a),
-					     TREE_TYPE (base_b))
+		      && (types_compatible_p (TREE_TYPE (base_a),
+					      TREE_TYPE (base_b))
+			  || (!base_supports_access_fn_components_p (base_a)
+			      && !base_supports_access_fn_components_p (base_b)
+			      && operand_equal_p
+				   (TYPE_SIZE (TREE_TYPE (base_a)),
+				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
 		      && (!loop_nest.exists ()
 			  || (object_address_invariant_in_loop_p
 			      (loop_nest[0], base_a))));
-- 
cgit v1.1


From 34599780d0de72faf5719ea08d11a061722b9d19 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Wed, 20 Jan 2021 08:48:34 +0100
Subject: tree-optimization/98758 - fix integer arithmetic in data-ref analysis

This fixes some int arithmetic issues and a bogus truncation.

2021-01-20  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/98758
	* tree-data-ref.c (int_divides_p): Use lambda_int arguments.
	(lambda_matrix_right_hermite): Avoid undefinedness with
	signed integer abs and multiplication.
	(analyze_subscript_affine_affine): Use lambda_int.

	* gcc.dg/torture/pr98758.c: New testcase.
---
 gcc/tree-data-ref.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 65fe6d5..9d07b41 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -143,7 +143,7 @@ tree_fold_divides_p (const_tree a, const_tree b)
 /* Returns true iff A divides B.  */
 
 static inline bool
-int_divides_p (int a, int b)
+int_divides_p (lambda_int a, lambda_int b)
 {
   return ((b % a) == 0);
 }
@@ -4280,10 +4280,10 @@ lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
 
 		  a = S[i-1][j];
 		  b = S[i][j];
-		  sigma = (a * b < 0) ? -1: 1;
-		  a = abs_hwi (a);
-		  b = abs_hwi (b);
-		  factor = sigma * (a / b);
+		  sigma = ((a < 0) ^ (b < 0)) ? -1: 1;
+		  unsigned HOST_WIDE_INT abs_a = absu_hwi (a);
+		  unsigned HOST_WIDE_INT abs_b = absu_hwi (b);
+		  factor = sigma * (lambda_int)(abs_a / abs_b);
 
 		  lambda_matrix_row_add (S, n, i, i-1, -factor);
 		  std::swap (S[i], S[i-1]);
@@ -4309,7 +4309,7 @@ analyze_subscript_affine_affine (tree chrec_a,
 				 tree *last_conflicts)
 {
   unsigned nb_vars_a, nb_vars_b, dim;
-  HOST_WIDE_INT gamma, gcd_alpha_beta;
+  lambda_int gamma, gcd_alpha_beta;
   lambda_matrix A, U, S;
   struct obstack scratch_obstack;
 
-- 
cgit v1.1


From 261cdd23195bc921737fd7a44e34a93aaaaccc44 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Wed, 20 Jan 2021 11:28:30 +0100
Subject: Handle overflow in dependence analysis lambda ops gracefully

The following tries to handle overflow in the integer computations
done by lambda ops of dependence analysis by failing instead of
silently continuing with overflowed values.

It also avoids treating large unsigned CHREC_RIGHT as negative
unless the chrec is of pointer type and avoids the most negative
integer value to avoid excessive overflow checking (with this
the fix for PR98758 can be partly simplified as seen).

I've added add_hwi and mul_hwi functions computing HOST_WIDE_INT
signed sum and product with indicating overflow, they hopefully
get matched to the appropriate internal functions.

I don't have any testcases triggering overflow in any of the
guarded computations.

2021-01-20  Richard Biener  <rguenther@suse.de>

	* hwint.h (add_hwi): New function.
	(mul_hwi): Likewise.
	* tree-data-ref.c (initialize_matrix_A): Properly translate
	tree constants and avoid HOST_WIDE_INT_MIN.
	(lambda_matrix_row_add): Avoid undefined integer overflow
	and return true on such overflow.
	(lambda_matrix_right_hermite): Handle overflow from
	lambda_matrix_row_add gracefully.  Simplify previous fix.
	(analyze_subscript_affine_affine): Likewise.
---
 gcc/tree-data-ref.c | 63 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 14 deletions(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 9d07b41..d19c5eb 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -3924,9 +3924,25 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
   switch (TREE_CODE (chrec))
     {
     case POLYNOMIAL_CHREC:
-      if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+      /* CHREC_RIGHT and its negated value should fit in a lambda_int.
+	 Pointer typed chrecs right are to be interpreted signed.  */
+      HOST_WIDE_INT chrec_right;
+      if (POINTER_TYPE_P (chrec_type (chrec)))
+	{
+	  if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+	    return chrec_dont_know;
+	  chrec_right = int_cst_value (CHREC_RIGHT (chrec));
+	}
+      else
+	{
+	  if (!tree_fits_shwi_p (CHREC_RIGHT (chrec)))
+	    return chrec_dont_know;
+	  chrec_right = tree_to_shwi (CHREC_RIGHT (chrec));
+	}
+      /* We want to be able to negate without overflow.  */
+      if (chrec_right == HOST_WIDE_INT_MIN)
 	return chrec_dont_know;
-      A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
+      A[index][0] = mult * chrec_right;
       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
 
     case PLUS_EXPR:
@@ -4193,17 +4209,28 @@ lambda_vector_first_nz (lambda_vector vec1, int n, int start)
 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
    R2 = R2 + CONST1 * R1.  */
 
-static void
+static bool
 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
 		       lambda_int const1)
 {
   int i;
 
   if (const1 == 0)
-    return;
+    return true;
 
   for (i = 0; i < n; i++)
-    mat[r2][i] += const1 * mat[r1][i];
+    {
+      bool ovf;
+      lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
+      if (ovf)
+	return false;
+      lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
+      if (ovf || tem2 == HOST_WIDE_INT_MIN)
+	return false;
+      mat[r2][i] = tem2;
+    }
+
+  return true;
 }
 
 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
@@ -4258,7 +4285,7 @@ lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
    Restructuring Compilers" Utpal Banerjee.  */
 
-static void
+static bool
 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
 			     lambda_matrix S, lambda_matrix U)
 {
@@ -4276,24 +4303,26 @@ lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
 	    {
 	      while (S[i][j] != 0)
 		{
-		  lambda_int sigma, factor, a, b;
+		  lambda_int factor, a, b;
 
 		  a = S[i-1][j];
 		  b = S[i][j];
-		  sigma = ((a < 0) ^ (b < 0)) ? -1: 1;
-		  unsigned HOST_WIDE_INT abs_a = absu_hwi (a);
-		  unsigned HOST_WIDE_INT abs_b = absu_hwi (b);
-		  factor = sigma * (lambda_int)(abs_a / abs_b);
+		  gcc_assert (a != HOST_WIDE_INT_MIN);
+		  factor = a / b;
 
-		  lambda_matrix_row_add (S, n, i, i-1, -factor);
+		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
+		    return false;
 		  std::swap (S[i], S[i-1]);
 
-		  lambda_matrix_row_add (U, m, i, i-1, -factor);
+		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
+		    return false;
 		  std::swap (U[i], U[i-1]);
 		}
 	    }
 	}
     }
+
+  return true;
 }
 
 /* Determines the overlapping elements due to accesses CHREC_A and
@@ -4410,7 +4439,13 @@ analyze_subscript_affine_affine (tree chrec_a,
     }
 
   /* U.A = S */
-  lambda_matrix_right_hermite (A, dim, 1, S, U);
+  if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
+    {
+      *overlaps_a = conflict_fn_not_known ();
+      *overlaps_b = conflict_fn_not_known ();
+      *last_conflicts = chrec_dont_know;
+      goto end_analyze_subs_aa;
+    }
 
   if (S[0][0] < 0)
     {
-- 
cgit v1.1


From 8bad25eb56bd16f3482f856a75b1c1ae5cfe1c4f Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 22 Jan 2021 11:29:17 +0100
Subject: middle-end/98773 - always sign extend CHREC_RIGHT

The previous change exposed a miscompile when trying to interpret
CHREC_RIGHT correctly which in fact it already was to the extent
it is used.  The following reverts this part of the change, only
retaining the singling out of HOST_WIDE_INT_MIN.

2021-01-22  Richard Biener  <rguenther@suse.de>

	PR middle-end/98773
	* tree-data-ref.c (initalize_matrix_A): Revert previous
	change, retaining failing on HOST_WIDE_INT_MIN CHREC_RIGHT.

	* gcc.dg/torture/pr98773.c: New testcase.
---
 gcc/tree-data-ref.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'gcc/tree-data-ref.c')

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index d19c5eb..124a7be 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -3924,21 +3924,10 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
   switch (TREE_CODE (chrec))
     {
     case POLYNOMIAL_CHREC:
-      /* CHREC_RIGHT and its negated value should fit in a lambda_int.
-	 Pointer typed chrecs right are to be interpreted signed.  */
       HOST_WIDE_INT chrec_right;
-      if (POINTER_TYPE_P (chrec_type (chrec)))
-	{
-	  if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
-	    return chrec_dont_know;
-	  chrec_right = int_cst_value (CHREC_RIGHT (chrec));
-	}
-      else
-	{
-	  if (!tree_fits_shwi_p (CHREC_RIGHT (chrec)))
-	    return chrec_dont_know;
-	  chrec_right = tree_to_shwi (CHREC_RIGHT (chrec));
-	}
+      if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
+	return chrec_dont_know;
+      chrec_right = int_cst_value (CHREC_RIGHT (chrec));
       /* We want to be able to negate without overflow.  */
       if (chrec_right == HOST_WIDE_INT_MIN)
 	return chrec_dont_know;
-- 
cgit v1.1