aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-12-15 10:32:29 +0100
committerRichard Biener <rguenther@suse.de>2024-01-08 14:45:56 +0100
commitb3cc5a1efead520bc977b4ba51f1328d01b3e516 (patch)
tree7695ab4fd594ff8ec83b51a549513d47588d267e
parent8c0dd8a6ff85d6e7b38957f2da400f5cfa8fef6b (diff)
downloadgcc-b3cc5a1efead520bc977b4ba51f1328d01b3e516.zip
gcc-b3cc5a1efead520bc977b4ba51f1328d01b3e516.tar.gz
gcc-b3cc5a1efead520bc977b4ba51f1328d01b3e516.tar.bz2
tree-optimization/113026 - avoid vector epilog in more cases
The following avoids creating a niter peeling epilog more consistently, matching what peeling later uses for the skip_vector condition, in particular when versioning is required which then also ensures the vector loop is entered unless the epilog is vectorized. This should ideally match LOOP_VINFO_VERSIONING_THRESHOLD which is only computed later, some refactoring could make that better matching. The patch also makes sure to adjust the upper bound of the epilogues when we do not have a skip edge around the vector loop. PR tree-optimization/113026 * tree-vect-loop.cc (vect_need_peeling_or_partial_vectors_p): Avoid an epilog in more cases. * tree-vect-loop-manip.cc (vect_do_peeling): Adjust the epilogues niter upper bounds and estimates. * gcc.dg/torture/pr113026-1.c: New testcase. * gcc.dg/torture/pr113026-2.c: Likewise.
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr113026-1.c11
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr113026-2.c18
-rw-r--r--gcc/tree-vect-loop-manip.cc32
-rw-r--r--gcc/tree-vect-loop.cc6
4 files changed, 66 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/torture/pr113026-1.c b/gcc/testsuite/gcc.dg/torture/pr113026-1.c
new file mode 100644
index 0000000..56dfef3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113026-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wall" } */
+
+char dst[16];
+
+void
+foo (char *src, long n)
+{
+ for (long i = 0; i < n; i++)
+ dst[i] = src[i]; /* { dg-bogus "" } */
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr113026-2.c b/gcc/testsuite/gcc.dg/torture/pr113026-2.c
new file mode 100644
index 0000000..b9d5857a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113026-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wall" } */
+
+char dst1[17];
+void
+foo1 (char *src, long n)
+{
+ for (long i = 0; i < n; i++)
+ dst1[i] = src[i]; /* { dg-bogus "" } */
+}
+
+char dst2[18];
+void
+foo2 (char *src, long n)
+{
+ for (long i = 0; i < n; i++)
+ dst2[i] = src[i]; /* { dg-bogus "" } */
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 9330183..927f76a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3364,6 +3364,38 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
bb_before_epilog = loop_preheader_edge (epilog)->src;
}
+ else
+ {
+ /* When we do not have a loop-around edge to the epilog we know
+ the vector loop covered at least VF scalar iterations unless
+ we have early breaks and the epilog will cover at most
+ VF - 1 + gap peeling iterations.
+ Update any known upper bound with this knowledge. */
+ if (! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ {
+ if (epilog->any_upper_bound)
+ epilog->nb_iterations_upper_bound -= lowest_vf;
+ if (epilog->any_likely_upper_bound)
+ epilog->nb_iterations_likely_upper_bound -= lowest_vf;
+ if (epilog->any_estimate)
+ epilog->nb_iterations_estimate -= lowest_vf;
+ }
+ unsigned HOST_WIDE_INT const_vf;
+ if (vf.is_constant (&const_vf))
+ {
+ const_vf += LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - 1;
+ if (epilog->any_upper_bound)
+ epilog->nb_iterations_upper_bound
+ = wi::umin (epilog->nb_iterations_upper_bound, const_vf);
+ if (epilog->any_likely_upper_bound)
+ epilog->nb_iterations_likely_upper_bound
+ = wi::umin (epilog->nb_iterations_likely_upper_bound,
+ const_vf);
+ if (epilog->any_estimate)
+ epilog->nb_iterations_estimate
+ = wi::umin (epilog->nb_iterations_estimate, const_vf);
+ }
+ }
/* If loop is peeled for non-zero constant times, now niters refers to
orig_niters - prolog_peeling, it won't overflow even the orig_niters
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index a067716..9dd573e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1261,7 +1261,11 @@ vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
the epilogue is unnecessary. */
&& (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
|| ((unsigned HOST_WIDE_INT) max_niter
- > (th / const_vf) * const_vf))))
+ /* We'd like to use LOOP_VINFO_VERSIONING_THRESHOLD
+ but that's only computed later based on our result.
+ The following is the most conservative approximation. */
+ > (std::max ((unsigned HOST_WIDE_INT) th,
+ const_vf) / const_vf) * const_vf))))
return true;
return false;