aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr110449.c40
-rw-r--r--gcc/tree-vect-loop.cc21
2 files changed, 58 insertions, 3 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c b/gcc/testsuite/gcc.target/aarch64/pr110449.c
new file mode 100644
index 0000000..bb3b6dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mcpu=neoverse-n2 --param aarch64-vect-unroll-limit=2" } */
+/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
+
+/* Calcualte the vectorized induction with smaller step for an unrolled loop.
+
+ before (suggested_unroll_factor=2):
+ fmov s30, 8.0e+0
+ fmov s31, 4.0e+0
+ dup v27.4s, v30.s[0]
+ dup v28.4s, v31.s[0]
+ .L6:
+ mov v30.16b, v31.16b
+ fadd v31.4s, v31.4s, v27.4s
+ fadd v29.4s, v30.4s, v28.4s
+ stp q30, q29, [x0]
+ add x0, x0, 32
+ cmp x1, x0
+ bne .L6
+
+ after:
+ fmov s31, 4.0e+0
+ dup v29.4s, v31.s[0]
+ .L6:
+ fadd v30.4s, v31.4s, v29.4s
+ stp q31, q30, [x0]
+ add x0, x0, 32
+ fadd v31.4s, v29.4s, v30.4s
+ cmp x0, x1
+ bne .L6 */
+
+void
+foo2 (float *arr, float freq, float step)
+{
+ for (int i = 0; i < 1024; i++)
+ {
+ arr[i] = freq;
+ freq += step;
+ }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 36d19a5..7d917bf 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10098,7 +10098,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
new_vec, step_vectype, NULL);
vec_def = induc_def;
- for (i = 1; i < ncopies; i++)
+ for (i = 1; i < ncopies + 1; i++)
{
/* vec_i = vec_prev + vec_step */
gimple_seq stmts = NULL;
@@ -10108,8 +10108,23 @@ vectorizable_induction (loop_vec_info loop_vinfo,
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- new_stmt = SSA_NAME_DEF_STMT (vec_def);
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ if (i < ncopies)
+ {
+ new_stmt = SSA_NAME_DEF_STMT (vec_def);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ }
+ else
+ {
+ /* vec_1 = vec_iv + (VF/n * S)
+ vec_2 = vec_1 + (VF/n * S)
+ ...
+ vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop
+
+ vec_n is used as vec_loop to save the large step register and
+ related operations. */
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
+ }
}
}