diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 17:59:23 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 17:59:23 +0000 |
commit | c2700f7466bac153def05a0e070aa78cd2ffc0ae (patch) | |
tree | 50556d047879e76735dd263eb9a0712f9066545e /gcc/tree-vect-loop.c | |
parent | 8277ddf9eeae431d432855e41537df1c3a4fa323 (diff) | |
download | gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.zip gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.gz gcc-c2700f7466bac153def05a0e070aa78cd2ffc0ae.tar.bz2 |
Allow the number of iterations to be smaller than VF
Fully-masked loops can be profitable even if the iteration
count is smaller than the vectorisation factor. In this case
we're effectively doing a complete unroll followed by SLP.
The documentation for min-vect-loop-bound says that the
default value was 0, but actually the default and minimum
were 1. We need it to be 0 for this case since the parameter
counts a whole number of vector iterations.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/sourcebuild.texi (vect_fully_masked): Document.
* params.def (PARAM_MIN_VECT_LOOP_BOUND): Change minimum and
default value to 0.
* tree-vect-loop.c (vect_analyze_loop_costing): New function,
split out from...
(vect_analyze_loop_2): ...here. Don't check the vectorization
factor against the number of loop iterations if the loop is
fully-masked.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_fully_masked):
New proc.
* gcc.dg/vect/slp-3.c: Expect all loops to be vectorized if
vect_fully_masked.
* gcc.target/aarch64/sve/loop_add_4.c: New test.
* gcc.target/aarch64/sve/loop_add_4_run.c: Likewise.
* gcc.target/aarch64/sve/loop_add_5.c: Likewise.
* gcc.target/aarch64/sve/loop_add_5_run.c: Likewise.
* gcc.target/aarch64/sve/miniloop_1.c: Likewise.
* gcc.target/aarch64/sve/miniloop_2.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256629
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 174 |
1 files changed, 103 insertions, 71 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 15d36b2..1666332 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1896,6 +1896,101 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) return true; } +/* Analyze the cost of the loop described by LOOP_VINFO. Decide if it + is worthwhile to vectorize. Return 1 if definitely yes, 0 if + definitely no, or -1 if it's worth retrying. */ + +static int +vect_analyze_loop_costing (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); + + /* Only fully-masked loops can have iteration counts less than the + vectorization factor. */ + if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + { + HOST_WIDE_INT max_niter; + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); + else + max_niter = max_stmt_executions_int (loop); + + if (max_niter != -1 + && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: iteration count smaller than " + "vectorization factor.\n"); + return 0; + } + } + + int min_profitable_iters, min_profitable_estimate; + vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, + &min_profitable_estimate); + + if (min_profitable_iters < 0) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: vectorization not profitable.\n"); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: vector version will never be " + "profitable.\n"); + return -1; + } + + int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) + * assumed_vf); + + /* Use the cost model only if it is more conservative than user specified + threshold. */ + unsigned int th = (unsigned) MAX (min_scalar_loop_bound, + min_profitable_iters); + + LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th; + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_INT_NITERS (loop_vinfo) < th) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: vectorization not profitable.\n"); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "not vectorized: iteration count smaller than user " + "specified loop bound parameter or minimum profitable " + "iterations (whichever is more conservative).\n"); + return 0; + } + + HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop); + if (estimated_niter == -1) + estimated_niter = likely_max_stmt_executions_int (loop); + if (estimated_niter != -1 + && ((unsigned HOST_WIDE_INT) estimated_niter + < MAX (th, (unsigned) min_profitable_estimate))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: estimated iteration count too " + "small.\n"); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "not vectorized: estimated iteration count smaller " + "than specified loop bound parameter or minimum " + "profitable iterations (whichever is more " + "conservative).\n"); + return -1; + } + + return 1; +} + /* Function vect_analyze_loop_2. @@ -1906,6 +2001,7 @@ static bool vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal) { bool ok; + int res; unsigned int max_vf = MAX_VECTORIZATION_FACTOR; poly_uint64 min_vf = 2; unsigned int n_stmts = 0; @@ -2063,9 +2159,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal) vect_compute_single_scalar_iteration_cost (loop_vinfo); poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - HOST_WIDE_INT estimated_niter; unsigned th; - int min_scalar_loop_bound; /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ ok = vect_analyze_slp (loop_vinfo, n_stmts); @@ -2095,7 +2189,6 @@ start_over: /* Now the vectorization factor is final. */ poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); gcc_assert (known_ne (vectorization_factor, 0U)); - unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) { @@ -2108,17 +2201,6 @@ start_over: HOST_WIDE_INT max_niter = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); - if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && (LOOP_VINFO_INT_NITERS (loop_vinfo) < assumed_vf)) - || (max_niter != -1 - && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: iteration count smaller than " - "vectorization factor.\n"); - return false; - } /* Analyze the alignment of the data-refs in the loop. Fail if a data reference is found that cannot be vectorized. */ @@ -2232,65 +2314,16 @@ start_over: } } - /* Analyze cost. Decide if worth while to vectorize. */ - int min_profitable_estimate, min_profitable_iters; - vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, - &min_profitable_estimate); - - if (min_profitable_iters < 0) + /* Check the costings of the loop make vectorizing worthwhile. */ + res = vect_analyze_loop_costing (loop_vinfo); + if (res < 0) + goto again; + if (!res) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: vectorization not profitable.\n"); - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: vector version will never be " - "profitable.\n"); - goto again; - } - - min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) - * assumed_vf); - - /* Use the cost model only if it is more conservative than user specified - threshold. */ - th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters); - - LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th; - - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_INT_NITERS (loop_vinfo) < th) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: vectorization not profitable.\n"); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "not vectorized: iteration count smaller than user " - "specified loop bound parameter or minimum profitable " - "iterations (whichever is more conservative).\n"); - goto again; - } - - estimated_niter - = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); - if (estimated_niter == -1) - estimated_niter = max_niter; - if (estimated_niter != -1 - && ((unsigned HOST_WIDE_INT) estimated_niter - < MAX (th, (unsigned) min_profitable_estimate))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: estimated iteration count too " - "small.\n"); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "not vectorized: estimated iteration count smaller " - "than specified loop bound parameter or minimum " - "profitable iterations (whichever is more " - "conservative).\n"); - goto again; + "Loop costings not worthwhile.\n"); + return false; } /* Decide whether we need to create an epilogue loop to handle @@ -3881,7 +3914,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, * assumed_vf - vec_inside_cost * peel_iters_prologue - vec_inside_cost * peel_iters_epilogue); - if (min_profitable_iters <= 0) min_profitable_iters = 0; else |