aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 17:59:23 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 17:59:23 +0000
commitc2700f7466bac153def05a0e070aa78cd2ffc0ae (patch)
tree50556d047879e76735dd263eb9a0712f9066545e /gcc/tree-vect-loop.c
parent8277ddf9eeae431d432855e41537df1c3a4fa323 (diff)
downloadgcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.zip
gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.gz
gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.bz2
Allow the number of iterations to be smaller than VF
Fully-masked loops can be profitable even if the iteration count is smaller than the vectorisation factor. In this case we're effectively doing a complete unroll followed by SLP. The documentation for min-vect-loop-bound says that the default value was 0, but actually the default and minimum were 1. We need it to be 0 for this case since the parameter counts a whole number of vector iterations. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/sourcebuild.texi (vect_fully_masked): Document. * params.def (PARAM_MIN_VECT_LOOP_BOUND): Change minimum and default value to 0. * tree-vect-loop.c (vect_analyze_loop_costing): New function, split out from... (vect_analyze_loop_2): ...here. Don't check the vectorization factor against the number of loop iterations if the loop is fully-masked. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_fully_masked): New proc. * gcc.dg/vect/slp-3.c: Expect all loops to be vectorized if vect_fully_masked. * gcc.target/aarch64/sve/loop_add_4.c: New test. * gcc.target/aarch64/sve/loop_add_4_run.c: Likewise. * gcc.target/aarch64/sve/loop_add_5.c: Likewise. * gcc.target/aarch64/sve/loop_add_5_run.c: Likewise. * gcc.target/aarch64/sve/miniloop_1.c: Likewise. * gcc.target/aarch64/sve/miniloop_2.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256629
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c174
1 files changed, 103 insertions, 71 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 15d36b2..1666332 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1896,6 +1896,101 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
return true;
}
+/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
+ is worthwhile to vectorize. Return 1 if definitely yes, 0 if
+ definitely no, or -1 if it's worth retrying. */
+
+static int
+vect_analyze_loop_costing (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+
+ /* Only fully-masked loops can have iteration counts less than the
+ vectorization factor. */
+ if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ HOST_WIDE_INT max_niter;
+
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ else
+ max_niter = max_stmt_executions_int (loop);
+
+ if (max_niter != -1
+ && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: iteration count smaller than "
+ "vectorization factor.\n");
+ return 0;
+ }
+ }
+
+ int min_profitable_iters, min_profitable_estimate;
+ vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
+ &min_profitable_estimate);
+
+ if (min_profitable_iters < 0)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: vectorization not profitable.\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: vector version will never be "
+ "profitable.\n");
+ return -1;
+ }
+
+ int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+ * assumed_vf);
+
+ /* Use the cost model only if it is more conservative than user specified
+ threshold. */
+ unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
+ min_profitable_iters);
+
+ LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
+
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: vectorization not profitable.\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not vectorized: iteration count smaller than user "
+ "specified loop bound parameter or minimum profitable "
+ "iterations (whichever is more conservative).\n");
+ return 0;
+ }
+
+ HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
+ if (estimated_niter == -1)
+ estimated_niter = likely_max_stmt_executions_int (loop);
+ if (estimated_niter != -1
+ && ((unsigned HOST_WIDE_INT) estimated_niter
+ < MAX (th, (unsigned) min_profitable_estimate)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: estimated iteration count too "
+ "small.\n");
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not vectorized: estimated iteration count smaller "
+ "than specified loop bound parameter or minimum "
+ "profitable iterations (whichever is more "
+ "conservative).\n");
+ return -1;
+ }
+
+ return 1;
+}
+
/* Function vect_analyze_loop_2.
@@ -1906,6 +2001,7 @@ static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
{
bool ok;
+ int res;
unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
poly_uint64 min_vf = 2;
unsigned int n_stmts = 0;
@@ -2063,9 +2159,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
vect_compute_single_scalar_iteration_cost (loop_vinfo);
poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- HOST_WIDE_INT estimated_niter;
unsigned th;
- int min_scalar_loop_bound;
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
ok = vect_analyze_slp (loop_vinfo, n_stmts);
@@ -2095,7 +2189,6 @@ start_over:
/* Now the vectorization factor is final. */
poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
gcc_assert (known_ne (vectorization_factor, 0U));
- unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
{
@@ -2108,17 +2201,6 @@ start_over:
HOST_WIDE_INT max_niter
= likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
- if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && (LOOP_VINFO_INT_NITERS (loop_vinfo) < assumed_vf))
- || (max_niter != -1
- && (unsigned HOST_WIDE_INT) max_niter < assumed_vf))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: iteration count smaller than "
- "vectorization factor.\n");
- return false;
- }
/* Analyze the alignment of the data-refs in the loop.
Fail if a data reference is found that cannot be vectorized. */
@@ -2232,65 +2314,16 @@ start_over:
}
}
- /* Analyze cost. Decide if worth while to vectorize. */
- int min_profitable_estimate, min_profitable_iters;
- vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
- &min_profitable_estimate);
-
- if (min_profitable_iters < 0)
+ /* Check the costings of the loop make vectorizing worthwhile. */
+ res = vect_analyze_loop_costing (loop_vinfo);
+ if (res < 0)
+ goto again;
+ if (!res)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vectorization not profitable.\n");
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vector version will never be "
- "profitable.\n");
- goto again;
- }
-
- min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
- * assumed_vf);
-
- /* Use the cost model only if it is more conservative than user specified
- threshold. */
- th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters);
-
- LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
-
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: vectorization not profitable.\n");
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "not vectorized: iteration count smaller than user "
- "specified loop bound parameter or minimum profitable "
- "iterations (whichever is more conservative).\n");
- goto again;
- }
-
- estimated_niter
- = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
- if (estimated_niter == -1)
- estimated_niter = max_niter;
- if (estimated_niter != -1
- && ((unsigned HOST_WIDE_INT) estimated_niter
- < MAX (th, (unsigned) min_profitable_estimate)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: estimated iteration count too "
- "small.\n");
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "not vectorized: estimated iteration count smaller "
- "than specified loop bound parameter or minimum "
- "profitable iterations (whichever is more "
- "conservative).\n");
- goto again;
+ "Loop costings not worthwhile.\n");
+ return false;
}
/* Decide whether we need to create an epilogue loop to handle
@@ -3881,7 +3914,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
* assumed_vf
- vec_inside_cost * peel_iters_prologue
- vec_inside_cost * peel_iters_epilogue);
-
if (min_profitable_iters <= 0)
min_profitable_iters = 0;
else