aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHao Liu <hliu@os.amperecomputing.com>2023-07-06 10:03:47 +0800
committerHao Liu <hliu@os.amperecomputing.com>2023-07-06 10:06:01 +0800
commit7339e725b995912747c01c3ec80ce602512f45df (patch)
treeaa48dddc73b417862209f342bf1d7d765a25ec83
parent5158918aa211ee85176c058831707dbb3eaf0fb4 (diff)
downloadgcc-7339e725b995912747c01c3ec80ce602512f45df.zip
gcc-7339e725b995912747c01c3ec80ce602512f45df.tar.gz
gcc-7339e725b995912747c01c3ec80ce602512f45df.tar.bz2
tree-optimization/110474 - Vect: select small VF for epilog of unrolled loop
If a loop is unrolled during vectorization (i.e. suggested_unroll_factor > 1), the VFs of both main and epilog loop are enlarged. The epilog vect loop is specific for a loop with small iteration counts, so a large VF may hurt performance. This patch unscales the main loop VF by suggested_unroll_factor while selecting the epilog loop VF, so that it will be the same as vectorized loop without unrolling (i.e. suggested_unroll_factor = 1). gcc/ChangeLog: PR tree-optimization/110474 * tree-vect-loop.cc (vect_analyze_loop_2): unscale the VF by suggested unroll factor while selecting the epilog vect loop VF. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr110474.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr110474.c37
-rw-r--r--gcc/tree-vect-loop.cc16
2 files changed, 47 insertions, 6 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/pr110474.c b/gcc/testsuite/gcc.target/aarch64/pr110474.c
new file mode 100644
index 0000000..e548416
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr110474.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mtune=neoverse-n2 -mcpu=neoverse-n1 -fdump-tree-vect-details --param aarch64-vect-unroll-limit=2" } */
+/* { dg-final { scan-tree-dump "Choosing vector mode V8HI" "vect" } } */
+/* { dg-final { scan-tree-dump "Choosing epilogue vector mode V8QI" "vect" } } */
+
+/* Do not increase the the vector factor of the epilog vectorized loop
+ for a loop with suggested_unroll_factor > 1.
+
+ before (suggested_unroll_factor=1):
+ if N >= 16:
+ main vect loop
+ if N >= 8:
+ epilog vect loop
+ scalar code
+
+ before (suggested_unroll_factor=2):
+ if N >= 32:
+ main vect loop
+ if N >= 16: // May fail to execute vectorized code (e.g. N is 8)
+ epilog vect loop
+ scalar code
+
+ after (suggested_unroll_factor=2):
+ if N >= 32:
+ main vect loop
+ if N >= 8: // The same VF as suggested_unroll_factor=1
+ epilog vect loop
+ scalar code */
+
+int
+foo (short *A, char *B, int N)
+{
+ int sum = 0;
+ for (int i = 0; i < N; ++i)
+ sum += A[i] * B[i];
+ return sum;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 3b46c58..4d9abd0 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3021,12 +3021,16 @@ start_over:
to be able to handle fewer than VF scalars, or needs to have a lower VF
than the main loop. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
- && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
- LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
- return opt_result::failure_at (vect_location,
- "Vectorization factor too high for"
- " epilogue loop.\n");
+ && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ {
+ poly_uint64 unscaled_vf
+ = exact_div (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
+ orig_loop_vinfo->suggested_unroll_factor);
+ if (maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo), unscaled_vf))
+ return opt_result::failure_at (vect_location,
+ "Vectorization factor too high for"
+ " epilogue loop.\n");
+ }
/* Decide whether this loop_vinfo should use partial vectors or peeling,
assuming that the loop will be used as a main loop. We will redo