vect: Hookize better_loop_vinfo_p

One of the things we want to do on AArch64 is compare vector loops side-by-side and pick the best one. For some targets, we want this to be based on issue rates as well as the usual latency-based costs (at least for loops with relatively high iteration counts). The current approach to doing this is: when costing vectorisation candidate A, try to guess what the other main candidate B will look like and adjust A's latency-based cost up or down based on the likely difference between A and B's issue rates. This effectively means that we try to cost parts of B at the same time as A, without actually being able to see B. This is needlessly indirect and complex. It was a compromise due to the code being added (too) late in the GCC 11 cycle, so that target-independent changes weren't possible. The target-independent code already compares two candidate loop_vec_infos side-by-side, so that information about A and B above are available directly. This patch creates a way for targets to hook into this comparison. The AArch64 code can therefore hook into better_main_loop_than_p to compare issue rates. If the issue rate comparison isn't decisive, the code can fall back to the normal latency-based comparison instead. gcc/ * tree-vectorizer.h (vector_costs::better_main_loop_than_p) (vector_costs::better_epilogue_loop_than_p) (vector_costs::compare_inside_loop_cost) (vector_costs::compare_outside_loop_cost): Likewise. * tree-vectorizer.c (vector_costs::better_main_loop_than_p) (vector_costs::better_epilogue_loop_than_p) (vector_costs::compare_inside_loop_cost) (vector_costs::compare_outside_loop_cost): New functions, containing code moved from... * tree-vect-loop.c (vect_better_loop_vinfo_p): ...here.
author: Richard Sandiford <richard.sandiford@arm.com> 2021-11-10 12:31:01 +0000
committer: Richard Sandiford <richard.sandiford@arm.com> 2021-11-10 12:31:01 +0000
commit: 5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf (patch)
tree: 3f728ad5f83db3e244ed18b3efb52e90f059de6b /gcc/tree-vect-loop.c
parent: 772d76acb5aead98eb3c47a78363d867287d5e77 (diff)
download: gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.zip
gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.tar.gz
gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.tar.bz2
1 files changed, 5 insertions, 137 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 11ffc59..3d9033f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2784,144 +2784,12 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
 	return new_simdlen_p;
     }
 
-  loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo);
-  if (main_loop)
-    {
-      poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
-      unsigned HOST_WIDE_INT main_vf;
-      unsigned HOST_WIDE_INT old_factor, new_factor, old_cost, new_cost;
-      /* If we can determine how many iterations are left for the epilogue
-	 loop, that is if both the main loop's vectorization factor and number
-	 of iterations are constant, then we use them to calculate the cost of
-	 the epilogue loop together with a 'likely value' for the epilogues
-	 vectorization factor.  Otherwise we use the main loop's vectorization
-	 factor and the maximum poly value for the epilogue's.  If the target
-	 has not provided with a sensible upper bound poly vectorization
-	 factors are likely to be favored over constant ones.  */
-      if (main_poly_vf.is_constant (&main_vf)
-	  && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
-	{
-	  unsigned HOST_WIDE_INT niters
-	    = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
-	  HOST_WIDE_INT old_likely_vf
-	    = estimated_poly_value (old_vf, POLY_VALUE_LIKELY);
-	  HOST_WIDE_INT new_likely_vf
-	    = estimated_poly_value (new_vf, POLY_VALUE_LIKELY);
-
-	  /* If the epilogue is using partial vectors we account for the
-	     partial iteration here too.  */
-	  old_factor = niters / old_likely_vf;
-	  if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (old_loop_vinfo)
-	      && niters % old_likely_vf != 0)
-	    old_factor++;
-
-	  new_factor = niters / new_likely_vf;
-	  if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (new_loop_vinfo)
-	      && niters % new_likely_vf != 0)
-	    new_factor++;
-	}
-      else
-	{
-	  unsigned HOST_WIDE_INT main_vf_max
-	    = estimated_poly_value (main_poly_vf, POLY_VALUE_MAX);
-
-	  old_factor = main_vf_max / estimated_poly_value (old_vf,
-							   POLY_VALUE_MAX);
-	  new_factor = main_vf_max / estimated_poly_value (new_vf,
-							   POLY_VALUE_MAX);
-
-	  /* If the loop is not using partial vectors then it will iterate one
-	     time less than one that does.  It is safe to subtract one here,
-	     because the main loop's vf is always at least 2x bigger than that
-	     of an epilogue.  */
-	  if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (old_loop_vinfo))
-	    old_factor -= 1;
-	  if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (new_loop_vinfo))
-	    new_factor -= 1;
-	}
-
-      /* Compute the costs by multiplying the inside costs with the factor and
-	 add the outside costs for a more complete picture.  The factor is the
-	 amount of times we are expecting to iterate this epilogue.  */
-      old_cost = old_loop_vinfo->vector_costs->body_cost () * old_factor;
-      new_cost = new_loop_vinfo->vector_costs->body_cost () * new_factor;
-      old_cost += old_loop_vinfo->vector_costs->outside_cost ();
-      new_cost += new_loop_vinfo->vector_costs->outside_cost ();
-      return new_cost < old_cost;
-    }
-
-  /* Limit the VFs to what is likely to be the maximum number of iterations,
-     to handle cases in which at least one loop_vinfo is fully-masked.  */
-  HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
-  if (estimated_max_niter != -1)
-    {
-      if (known_le (estimated_max_niter, new_vf))
-	new_vf = estimated_max_niter;
-      if (known_le (estimated_max_niter, old_vf))
-	old_vf = estimated_max_niter;
-    }
-
-  /* Check whether the (fractional) cost per scalar iteration is lower
-     or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf.  */
-  poly_int64 rel_new = new_loop_vinfo->vector_costs->body_cost () * old_vf;
-  poly_int64 rel_old = old_loop_vinfo->vector_costs->body_cost () * new_vf;
-
-  HOST_WIDE_INT est_rel_new_min
-    = estimated_poly_value (rel_new, POLY_VALUE_MIN);
-  HOST_WIDE_INT est_rel_new_max
-    = estimated_poly_value (rel_new, POLY_VALUE_MAX);
-
-  HOST_WIDE_INT est_rel_old_min
-    = estimated_poly_value (rel_old, POLY_VALUE_MIN);
-  HOST_WIDE_INT est_rel_old_max
-    = estimated_poly_value (rel_old, POLY_VALUE_MAX);
-
-  /* Check first if we can make out an unambigous total order from the minimum
-     and maximum estimates.  */
-  if (est_rel_new_min < est_rel_old_min
-      && est_rel_new_max < est_rel_old_max)
-    return true;
-  else if (est_rel_old_min < est_rel_new_min
-	   && est_rel_old_max < est_rel_new_max)
-    return false;
-  /* When old_loop_vinfo uses a variable vectorization factor,
-     we know that it has a lower cost for at least one runtime VF.
-     However, we don't know how likely that VF is.
-
-     One option would be to compare the costs for the estimated VFs.
-     The problem is that that can put too much pressure on the cost
-     model.  E.g. if the estimated VF is also the lowest possible VF,
-     and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
-     for the estimated VF, we'd then choose new_loop_vinfo even
-     though (a) new_loop_vinfo might not actually be better than
-     old_loop_vinfo for that VF and (b) it would be significantly
-     worse at larger VFs.
-
-     Here we go for a hacky compromise: pick new_loop_vinfo if it is
-     no more expensive than old_loop_vinfo even after doubling the
-     estimated old_loop_vinfo VF.  For all but trivial loops, this
-     ensures that we only pick new_loop_vinfo if it is significantly
-     better than old_loop_vinfo at the estimated VF.  */
-
-  if (est_rel_old_min != est_rel_new_min
-      || est_rel_old_max != est_rel_new_max)
-    {
-      HOST_WIDE_INT est_rel_new_likely
-	= estimated_poly_value (rel_new, POLY_VALUE_LIKELY);
-      HOST_WIDE_INT est_rel_old_likely
-	= estimated_poly_value (rel_old, POLY_VALUE_LIKELY);
-
-      return est_rel_new_likely * 2 <= est_rel_old_likely;
-    }
-
-  /* If there's nothing to choose between the loop bodies, see whether
-     there's a difference in the prologue and epilogue costs.  */
-  auto old_outside_cost = old_loop_vinfo->vector_costs->outside_cost ();
-  auto new_outside_cost = new_loop_vinfo->vector_costs->outside_cost ();
-  if (new_outside_cost != old_outside_cost)
-    return new_outside_cost < old_outside_cost;
+  const auto *old_costs = old_loop_vinfo->vector_costs;
+  const auto *new_costs = new_loop_vinfo->vector_costs;
+  if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo))
+    return new_costs->better_epilogue_loop_than_p (old_costs, main_loop);
 
-  return false;
+  return new_costs->better_main_loop_than_p (old_costs);
 }
 
 /* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO.  Return
author	Richard Sandiford <richard.sandiford@arm.com>	2021-11-10 12:31:01 +0000
committer	Richard Sandiford <richard.sandiford@arm.com>	2021-11-10 12:31:01 +0000
commit	5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf (patch)
tree	3f728ad5f83db3e244ed18b3efb52e90f059de6b /gcc/tree-vect-loop.c
parent	772d76acb5aead98eb3c47a78363d867287d5e77 (diff)
download	gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.zip gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.tar.gz gcc-5720a9d5beacb558c1ddccbbfef9f9e4f91b14cf.tar.bz2