diff options
author | Richard Biener <rguenther@suse.de> | 2023-07-03 13:59:33 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2023-07-04 09:04:51 +0200 |
commit | 0682a32c026f1e246eb07bb8066abca4636f01d8 (patch) | |
tree | 7398ac782ac4989b785cb1b387c8f9feb6b77658 /gcc/tree-vect-loop.cc | |
parent | eed9eeaab30fd7b9e509ec3cf78f5f3c881b0abf (diff) | |
download | gcc-0682a32c026f1e246eb07bb8066abca4636f01d8.zip gcc-0682a32c026f1e246eb07bb8066abca4636f01d8.tar.gz gcc-0682a32c026f1e246eb07bb8066abca4636f01d8.tar.bz2 |
tree-optimization/110310 - move vector epilogue disabling to analysis phase
The following removes late deciding to elide vectorized epilogues to
the analysis phase and also avoids altering the epilogues niter.
The costing part from vect_determine_partial_vectors_and_peeling is
moved to vect_analyze_loop_costing where we use the main loop
analysis to constrain the epilogue scalar iterations.
I have not tried to integrate this with vect_known_niters_smaller_than_vf.
It seems the for_epilogue_p parameter in
vect_determine_partial_vectors_and_peeling is largely useless and
we could compute that in the function itself.
PR tree-optimization/110310
* tree-vect-loop.cc (vect_determine_partial_vectors_and_peeling):
Move costing part ...
(vect_analyze_loop_costing): ... here. Integrate better
estimate for epilogues from ...
(vect_analyze_loop_2): Call vect_determine_partial_vectors_and_peeling
with actual epilogue status.
* tree-vect-loop-manip.cc (vect_do_peeling): ... here and
avoid cancelling epilogue vectorization.
(vect_update_epilogue_niters): Remove. No longer update
epilogue LOOP_VINFO_NITERS.
* gcc.target/i386/pr110310.c: New testcase.
* gcc.dg/vect/slp-perm-12.c: Disable epilogue vectorization.
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 98 |
1 files changed, 68 insertions, 30 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0a03f56..f39a1ec 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2144,14 +2144,76 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo, /* Only loops that can handle partially-populated vectors can have iteration counts less than the vectorization factor. */ - if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) + && vect_known_niters_smaller_than_vf (loop_vinfo)) { - if (vect_known_niters_smaller_than_vf (loop_vinfo)) + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: iteration count smaller than " + "vectorization factor.\n"); + return 0; + } + + /* If we know the number of iterations we can do better, for the + epilogue we can also decide whether the main loop leaves us + with enough iterations, prefering a smaller vector epilog then + also possibly used for the case we skip the vector loop. */ + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) + && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + widest_int scalar_niters + = wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + 1; + if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + { + loop_vec_info orig_loop_vinfo + = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + unsigned lowest_vf + = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)); + int prolog_peeling = 0; + if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) + prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo); + if (prolog_peeling >= 0 + && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo), + lowest_vf)) + { + unsigned gap + = LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0; + scalar_niters = ((scalar_niters - gap - prolog_peeling) + % lowest_vf + gap); + if (scalar_niters == 0) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: loop never entered\n"); + return 0; + } + } + } + + /* Check that the loop processes at least one full vector. */ + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + if (known_lt (scalar_niters, vf)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: iteration count smaller than " - "vectorization factor.\n"); + "loop does not have enough iterations " + "to support vectorization.\n"); + return 0; + } + + /* If we need to peel an extra epilogue iteration to handle data + accesses with gaps, check that there are enough scalar iterations + available. + + The check above is redundant with this one when peeling for gaps, + but the distinction is useful for diagnostics. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + && known_le (scalar_niters, vf)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "loop does not have enough iterations " + "to support peeling for gaps.\n"); return 0; } } @@ -2502,31 +2564,6 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))); } - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) - { - /* Check that the loop processes at least one full vector. */ - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo); - if (known_lt (wi::to_widest (scalar_niters), vf)) - return opt_result::failure_at (vect_location, - "loop does not have enough iterations" - " to support vectorization.\n"); - - /* If we need to peel an extra epilogue iteration to handle data - accesses with gaps, check that there are enough scalar iterations - available. - - The check above is redundant with this one when peeling for gaps, - but the distinction is useful for diagnostics. */ - tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo); - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - && known_lt (wi::to_widest (scalar_nitersm1), vf)) - return opt_result::failure_at (vect_location, - "loop does not have enough iterations" - " to support peeling for gaps.\n"); - } - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) && need_peeling_or_partial_vectors_p); @@ -3002,7 +3039,8 @@ start_over: assuming that the loop will be used as a main loop. We will redo this analysis later if we instead decide to use the loop as an epilogue loop. */ - ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); + ok = vect_determine_partial_vectors_and_peeling + (loop_vinfo, LOOP_VINFO_EPILOGUE_P (loop_vinfo)); if (!ok) return ok; |