aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2023-07-24 01:20:30 -0500
committerKewen Lin <linkw@linux.ibm.com>2023-07-24 01:20:30 -0500
commitd07504725973ccdec78929a09dc13e5ebd9472f6 (patch)
treeeea5e0232b3e05ebb8c7d5cf0af11160e3d52397
parent73ff915a169bf3f4b15c75fa3b6e658f7fe86b46 (diff)
downloadgcc-d07504725973ccdec78929a09dc13e5ebd9472f6.zip
gcc-d07504725973ccdec78929a09dc13e5ebd9472f6.tar.gz
gcc-d07504725973ccdec78929a09dc13e5ebd9472f6.tar.bz2
vect: Don't vectorize a single scalar iteration loop [PR110740]
The function vect_update_epilogue_niters which has been removed by r14-2281 has some code taking care of that if there is only one scalar iteration left for epilogue then we won't try to vectorize it any more. Although costing should be able to care about it eventually, I think we still want this special casing without costing enabled, so this patch is to add it back in function vect_analyze_loop_costing, and make it more general for both main and epilogue loops as Richi suggested, it can fix some exposed failures on Power10: - gcc.target/powerpc/p9-vec-length-epil-{1,8}.c - gcc.dg/vect/slp-perm-{1,5,6,7}.c PR tree-optimization/110740 gcc/ChangeLog: * tree-vect-loop.cc (vect_analyze_loop_costing): Do not vectorize a loop with a single scalar iteration.
-rw-r--r--gcc/tree-vect-loop.cc55
1 files changed, 34 insertions, 21 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d036a7d..71589b2 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2158,8 +2158,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
epilogue we can also decide whether the main loop leaves us
with enough iterations, prefering a smaller vector epilog then
also possibly used for the case we skip the vector loop. */
- if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
widest_int scalar_niters
= wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + 1;
@@ -2182,32 +2181,46 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
% lowest_vf + gap);
}
}
-
- /* Check that the loop processes at least one full vector. */
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- if (known_lt (scalar_niters, vf))
+ /* Reject vectorizing for a single scalar iteration, even if
+ we could in principle implement that using partial vectors. */
+ unsigned peeling_gap = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
+ if (scalar_niters <= peeling_gap + 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "loop does not have enough iterations "
- "to support vectorization.\n");
+ "not vectorized: loop only has a single "
+ "scalar iteration.\n");
return 0;
}
- /* If we need to peel an extra epilogue iteration to handle data
- accesses with gaps, check that there are enough scalar iterations
- available.
-
- The check above is redundant with this one when peeling for gaps,
- but the distinction is useful for diagnostics. */
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && known_le (scalar_niters, vf))
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "loop does not have enough iterations "
- "to support peeling for gaps.\n");
- return 0;
+ /* Check that the loop processes at least one full vector. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (known_lt (scalar_niters, vf))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "loop does not have enough iterations "
+ "to support vectorization.\n");
+ return 0;
+ }
+
+ /* If we need to peel an extra epilogue iteration to handle data
+ accesses with gaps, check that there are enough scalar iterations
+ available.
+
+ The check above is redundant with this one when peeling for gaps,
+ but the distinction is useful for diagnostics. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && known_le (scalar_niters, vf))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "loop does not have enough iterations "
+ "to support peeling for gaps.\n");
+ return 0;
+ }
}
}