ranger: Optimise irange_union

When compiling an optabs.ii at -O2 with a release-checking build, the hottest function in the profile was irange_union. This patch tries to optimise it a bit. The specific changes are: - Use quick_push rather than safe_push, since the final number of entries is known in advance. - Avoid assigning wi::to_wide & co. to a temporary wide_int, such as in: wide_int val_j = wi::to_wide (res[j]); wi::to_wide returns a wide_int "view" of the in-place INTEGER_CST storage. Assigning the result to wide_int forces an unnecessary copy to temporary storage. This is one area where "auto" helps a lot. In the end though, it seemed more readable to inline the wi::to_*s rather than use auto. - Use to_widest_int rather than to_wide_int. Both are functionally correct, but to_widest_int is more efficient, for three reasons: - to_wide returns a wide-int representation in which the most significant element might not be canonically sign-extended. This is because we want to allow the storage of an INTEGER_CST like 0x1U << 31 to be accessed directly with both a wide_int view (where only 32 bits matter) and a widest_int view (where many more bits matter, and where the 32 bits are zero-extended to match the unsigned type). However, operating on uncanonicalised wide_int forms is less efficient than operating on canonicalised forms. - to_widest_int has a constant rather than variable precision and there are never any redundant upper bits to worry about. - Using widest_int avoids the need for an overflow check, since there is enough precision to add 1 to any IL constant without wrap-around. This gives a ~2% compile-time speed up with the test above. I also tried adding a path for two single-pair ranges, but it wasn't a win. gcc/ * value-range.cc (irange::irange_union): Use quick_push rather than safe_push. Use widest_int rather than wide_int. Avoid assigning wi::to_* results to wide*_int temporaries.
author: Richard Sandiford <richard.sandiford@arm.com> 2021-12-06 18:29:30 +0000
committer: Richard Sandiford <richard.sandiford@arm.com> 2021-12-06 18:29:30 +0000
commit: d27b7e69872b34890077e3dff291b4bcbc52e4cd (patch)
tree: 702b9a2a1b93e6efa6a83ebe07dc9e7c9d3526f9 /gcc
parent: 14dc5b71d7e845be4ac21b16e849d6689df81b67 (diff)
download: gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.zip
gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.tar.gz
gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.tar.bz2
1 files changed, 13 insertions, 33 deletions
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 82509fa..d38d078 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1550,70 +1550,50 @@ irange::irange_union (const irange &r)
   // the merge is performed.
   //
   // [Xi,Yi]..[Xn,Yn]  U  [Xj,Yj]..[Xm,Ym]   -->  [Xk,Yk]..[Xp,Yp]
-  tree ttype = r.type ();
-  signop sign = TYPE_SIGN (ttype);
-
-  auto_vec<tree, 20> res;
-  wide_int u1 ;
-  wi::overflow_type ovf;
+  auto_vec<tree, 20> res (m_num_ranges * 2 + r.m_num_ranges * 2);
   unsigned i = 0, j = 0, k = 0;
 
   while (i < m_num_ranges * 2 && j < r.m_num_ranges * 2)
     {
       // lower of Xi and Xj is the lowest point.
-      if (wi::le_p (wi::to_wide (m_base[i]), wi::to_wide (r.m_base[j]), sign))
+      if (wi::to_widest (m_base[i]) <= wi::to_widest (r.m_base[j]))
 	{
-	  res.safe_push (m_base[i]);
-	  res.safe_push (m_base[i + 1]);
+	  res.quick_push (m_base[i]);
+	  res.quick_push (m_base[i + 1]);
 	  k += 2;
 	  i += 2;
 	}
       else
 	{
-	  res.safe_push (r.m_base[j]);
-	  res.safe_push (r.m_base[j + 1]);
+	  res.quick_push (r.m_base[j]);
+	  res.quick_push (r.m_base[j + 1]);
 	  k += 2;
 	  j += 2;
 	}
     }
   for ( ; i < m_num_ranges * 2; i += 2)
     {
-      res.safe_push (m_base[i]);
-      res.safe_push (m_base[i + 1]);
+      res.quick_push (m_base[i]);
+      res.quick_push (m_base[i + 1]);
       k += 2;
     }
   for ( ; j < r.m_num_ranges * 2; j += 2)
     {
-      res.safe_push (r.m_base[j]);
-      res.safe_push (r.m_base[j + 1]);
+      res.quick_push (r.m_base[j]);
+      res.quick_push (r.m_base[j + 1]);
       k += 2;
     }
 
   // Now normalize the vector removing any overlaps.
   i = 2;
-  int prec = TYPE_PRECISION (ttype);
-  wide_int max_val = wi::max_value (prec, sign);
   for (j = 2; j < k ; j += 2)
     {
-      wide_int val_im1 = wi::to_wide (res[i - 1]);
-      if (val_im1 == max_val)
-	break;
-      u1 = wi::add (val_im1, 1, sign, &ovf);
-
-      // Overflow indicates we are at MAX already.
-      // A wide int bug requires the previous max_val check
-      // trigger: gcc.c-torture/compile/pr80443.c  with -O3
-      if (ovf == wi::OVF_OVERFLOW)
-	break;
-
-      wide_int val_j = wi::to_wide (res[j]);
-      wide_int val_jp1 = wi::to_wide (res[j+1]);
       // Current upper+1 is >= lower bound next pair, then we merge ranges.
-      if (wi::ge_p (u1, val_j, sign))
+      if (wi::to_widest (res[i - 1]) + 1 >= wi::to_widest (res[j]))
 	{
 	  // New upper bounds is greater of current or the next one.
-	  if (wi::gt_p (val_jp1, val_im1, sign))
-	    res [i - 1] = res[j + 1];
+	  if (wi::to_widest (res[j + 1]) > wi::to_widest (res[i - 1]))
+	    res[i - 1] = res[j + 1];
 	}
       else
 	{
author	Richard Sandiford <richard.sandiford@arm.com>	2021-12-06 18:29:30 +0000
committer	Richard Sandiford <richard.sandiford@arm.com>	2021-12-06 18:29:30 +0000
commit	d27b7e69872b34890077e3dff291b4bcbc52e4cd (patch)
tree	702b9a2a1b93e6efa6a83ebe07dc9e7c9d3526f9 /gcc
parent	14dc5b71d7e845be4ac21b16e849d6689df81b67 (diff)
download	gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.zip gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.tar.gz gcc-d27b7e69872b34890077e3dff291b4bcbc52e4cd.tar.bz2