Allow the number of iterations to be smaller than VF

Fully-masked loops can be profitable even if the iteration count is smaller than the vectorisation factor. In this case we're effectively doing a complete unroll followed by SLP. The documentation for min-vect-loop-bound says that the default value was 0, but actually the default and minimum were 1. We need it to be 0 for this case since the parameter counts a whole number of vector iterations. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/sourcebuild.texi (vect_fully_masked): Document. * params.def (PARAM_MIN_VECT_LOOP_BOUND): Change minimum and default value to 0. * tree-vect-loop.c (vect_analyze_loop_costing): New function, split out from... (vect_analyze_loop_2): ...here. Don't check the vectorization factor against the number of loop iterations if the loop is fully-masked. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_fully_masked): New proc. * gcc.dg/vect/slp-3.c: Expect all loops to be vectorized if vect_fully_masked. * gcc.target/aarch64/sve/loop_add_4.c: New test. * gcc.target/aarch64/sve/loop_add_4_run.c: Likewise. * gcc.target/aarch64/sve/loop_add_5.c: Likewise. * gcc.target/aarch64/sve/loop_add_5_run.c: Likewise. * gcc.target/aarch64/sve/miniloop_1.c: Likewise. * gcc.target/aarch64/sve/miniloop_2.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256629
author: Richard Sandiford <richard.sandiford@linaro.org> 2018-01-13 17:59:23 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2018-01-13 17:59:23 +0000
commit: c2700f7466bac153def05a0e070aa78cd2ffc0ae (patch)
tree: 50556d047879e76735dd263eb9a0712f9066545e /gcc/tree-vect-loop.c
parent: 8277ddf9eeae431d432855e41537df1c3a4fa323 (diff)
download: gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.zip
gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.gz
gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.bz2
1 files changed, 103 insertions, 71 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 15d36b2..1666332 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1896,6 +1896,101 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
   return true;
 }
 
+/* Analyze the cost of the loop described by LOOP_VINFO.  Decide if it
+   is worthwhile to vectorize.  Return 1 if definitely yes, 0 if
+   definitely no, or -1 if it's worth retrying.  */
+
+static int
+vect_analyze_loop_costing (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+
+  /* Only fully-masked loops can have iteration counts less than the
+     vectorization factor.  */
+  if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+    {
+      HOST_WIDE_INT max_niter;
+
+      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+	max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
+      else
+	max_niter = max_stmt_executions_int (loop);
+
+      if (max_niter != -1
+	  && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "not vectorized: iteration count smaller than "
+			     "vectorization factor.\n");
+	  return 0;
+	}
+    }
+
+  int min_profitable_iters, min_profitable_estimate;
+  vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
+				      &min_profitable_estimate);
+
+  if (min_profitable_iters < 0)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not vectorized: vectorization not profitable.\n");
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not vectorized: vector version will never be "
+			 "profitable.\n");
+      return -1;
+    }
+
+  int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+			       * assumed_vf);
+
+  /* Use the cost model only if it is more conservative than user specified
+     threshold.  */
+  unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
+				    min_profitable_iters);
+
+  LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
+
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not vectorized: vectorization not profitable.\n");
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "not vectorized: iteration count smaller than user "
+			 "specified loop bound parameter or minimum profitable "
+			 "iterations (whichever is more conservative).\n");
+      return 0;
+    }
+
+  HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
+  if (estimated_niter == -1)
+    estimated_niter = likely_max_stmt_executions_int (loop);
+  if (estimated_niter != -1
+      && ((unsigned HOST_WIDE_INT) estimated_niter
+	  < MAX (th, (unsigned) min_profitable_estimate)))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not vectorized: estimated iteration count too "
+			 "small.\n");
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "not vectorized: estimated iteration count smaller "
+			 "than specified loop bound parameter or minimum "
+			 "profitable iterations (whichever is more "
+			 "conservative).\n");
+      return -1;
+    }
+
+  return 1;
+}
+
 
 /* Function vect_analyze_loop_2.
 
@@ -1906,6 +2001,7 @@ static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
 {
   bool ok;
+  int res;
   unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
   poly_uint64 min_vf = 2;
   unsigned int n_stmts = 0;
@@ -2063,9 +2159,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
   vect_compute_single_scalar_iteration_cost (loop_vinfo);
 
   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  HOST_WIDE_INT estimated_niter;
   unsigned th;
-  int min_scalar_loop_bound;
 
   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
   ok = vect_analyze_slp (loop_vinfo, n_stmts);
@@ -2095,7 +2189,6 @@ start_over:
   /* Now the vectorization factor is final.  */
   poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   gcc_assert (known_ne (vectorization_factor, 0U));
-  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
 
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
     {
@@ -2108,17 +2201,6 @@ start_over:
 
   HOST_WIDE_INT max_niter
     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
-  if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < assumed_vf))
-      || (max_niter != -1
-	  && (unsigned HOST_WIDE_INT) max_niter < assumed_vf))
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "not vectorized: iteration count smaller than "
-			 "vectorization factor.\n");
-      return false;
-    }
 
   /* Analyze the alignment of the data-refs in the loop.
      Fail if a data reference is found that cannot be vectorized.  */
@@ -2232,65 +2314,16 @@ start_over:
 	}
     }
 
-  /* Analyze cost.  Decide if worth while to vectorize.  */
-  int min_profitable_estimate, min_profitable_iters;
-  vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
-				      &min_profitable_estimate);
-
-  if (min_profitable_iters < 0)
+  /* Check the costings of the loop make vectorizing worthwhile.  */
+  res = vect_analyze_loop_costing (loop_vinfo);
+  if (res < 0)
+    goto again;
+  if (!res)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "not vectorized: vectorization not profitable.\n");
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "not vectorized: vector version will never be "
-			 "profitable.\n");
-      goto again;
-    }
-
-  min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
-			   * assumed_vf);
-
-  /* Use the cost model only if it is more conservative than user specified
-     threshold.  */
-  th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters);
-
-  LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
-
-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "not vectorized: vectorization not profitable.\n");
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-			 "not vectorized: iteration count smaller than user "
-			 "specified loop bound parameter or minimum profitable "
-			 "iterations (whichever is more conservative).\n");
-      goto again;
-    }
-
-  estimated_niter
-    = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
-  if (estimated_niter == -1)
-    estimated_niter = max_niter;
-  if (estimated_niter != -1
-      && ((unsigned HOST_WIDE_INT) estimated_niter
-          < MAX (th, (unsigned) min_profitable_estimate)))
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "not vectorized: estimated iteration count too "
-                         "small.\n");
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-			 "not vectorized: estimated iteration count smaller "
-                         "than specified loop bound parameter or minimum "
-                         "profitable iterations (whichever is more "
-                         "conservative).\n");
-      goto again;
+			 "Loop costings not worthwhile.\n");
+      return false;
     }
 
   /* Decide whether we need to create an epilogue loop to handle
@@ -3881,7 +3914,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 			      * assumed_vf
 			      - vec_inside_cost * peel_iters_prologue
 			      - vec_inside_cost * peel_iters_epilogue);
-
       if (min_profitable_iters <= 0)
         min_profitable_iters = 0;
       else
author	Richard Sandiford <richard.sandiford@linaro.org>	2018-01-13 17:59:23 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2018-01-13 17:59:23 +0000
commit	c2700f7466bac153def05a0e070aa78cd2ffc0ae (patch)
tree	50556d047879e76735dd263eb9a0712f9066545e /gcc/tree-vect-loop.c
parent	8277ddf9eeae431d432855e41537df1c3a4fa323 (diff)
download	gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.zip gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.gz gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.bz2