aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr110449.c25
-rw-r--r--gcc/tree-ssa-loop-manip.cc62
-rw-r--r--gcc/tree-ssa-loop-manip.h1
-rw-r--r--gcc/tree-vect-loop.cc6
4 files changed, 57 insertions, 37 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/pr110449.c b/gcc/testsuite/gcc.target/aarch64/pr110449.c
index bb3b6dc..51ca3f4 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr110449.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr110449.c
@@ -1,8 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-Ofast -mcpu=neoverse-n2 --param aarch64-vect-unroll-limit=2" } */
-/* { dg-final { scan-assembler-not "8.0e\\+0" } } */
+/* { dg-final { scan-assembler {, #?8.0e\+0} } } */
+/* { dg-final { scan-assembler-not {\tmov\tv} } } */
-/* Calcualte the vectorized induction with smaller step for an unrolled loop.
+/* Insert the induction IV updates before the exit condition, rather than
+ at the start of the loop body.
before (suggested_unroll_factor=2):
fmov s30, 8.0e+0
@@ -19,15 +21,16 @@
bne .L6
after:
- fmov s31, 4.0e+0
- dup v29.4s, v31.s[0]
- .L6:
- fadd v30.4s, v31.4s, v29.4s
- stp q31, q30, [x0]
- add x0, x0, 32
- fadd v31.4s, v29.4s, v30.4s
- cmp x0, x1
- bne .L6 */
+ fmov s31, 8.0e+0
+ fmov s29, 4.0e+0
+ dup v31.4s, v31.s[0]
+ dup v29.4s, v29.s[0]
+ .L2:
+ fadd v30.4s, v0.4s, v29.4s
+ stp q0, q30, [x0], 32
+ fadd v0.4s, v0.4s, v31.4s
+ cmp x1, x0
+ bne .L2 */
void
foo2 (float *arr, float freq, float step)
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
index 6ceb9df..2907fa6 100644
--- a/gcc/tree-ssa-loop-manip.cc
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -47,6 +47,39 @@ along with GCC; see the file COPYING3. If not see
so that we can free them all at once. */
static bitmap_obstack loop_renamer_obstack;
+/* Insert IV increment statements STMTS before or after INCR_POS;
+ AFTER selects which. INCR_POS and AFTER can be computed using
+ standard_iv_increment_position. */
+
+void
+insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after,
+ gimple_seq stmts)
+{
+ /* Prevent the increment from inheriting a bogus location if it is not put
+ immediately after a statement whose location is known. */
+ if (after)
+ {
+ gimple_stmt_iterator gsi = *incr_pos;
+ if (!gsi_end_p (gsi))
+ gsi_next_nondebug (&gsi);
+ if (gsi_end_p (gsi))
+ {
+ edge e = single_succ_edge (gsi_bb (*incr_pos));
+ gimple_seq_set_location (stmts, e->goto_locus);
+ }
+ gsi_insert_seq_after (incr_pos, stmts, GSI_NEW_STMT);
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = *incr_pos;
+ if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
+ gsi_next_nondebug (&gsi);
+ if (!gsi_end_p (gsi))
+ gimple_seq_set_location (stmts, gimple_location (gsi_stmt (gsi)));
+ gsi_insert_seq_before (incr_pos, stmts, GSI_NEW_STMT);
+ }
+}
+
/* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP.
If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration.
If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration.
@@ -63,7 +96,6 @@ create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop,
gimple_stmt_iterator *incr_pos, bool after, tree *var_before,
tree *var_after)
{
- gassign *stmt;
gphi *phi;
tree initial, step1;
gimple_seq stmts;
@@ -126,30 +158,10 @@ create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop,
if (stmts)
gsi_insert_seq_on_edge_immediate (pe, stmts);
- stmt = gimple_build_assign (va, incr_op, vb, step);
- /* Prevent the increment from inheriting a bogus location if it is not put
- immediately after a statement whose location is known. */
- if (after)
- {
- gimple_stmt_iterator gsi = *incr_pos;
- if (!gsi_end_p (gsi))
- gsi_next_nondebug (&gsi);
- if (gsi_end_p (gsi))
- {
- edge e = single_succ_edge (gsi_bb (*incr_pos));
- gimple_set_location (stmt, e->goto_locus);
- }
- gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
- }
- else
- {
- gimple_stmt_iterator gsi = *incr_pos;
- if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
- gsi_next_nondebug (&gsi);
- if (!gsi_end_p (gsi))
- gimple_set_location (stmt, gimple_location (gsi_stmt (gsi)));
- gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
- }
+ gimple_seq incr_stmts = nullptr;
+ gimple_seq_add_stmt (&incr_stmts,
+ gimple_build_assign (va, incr_op, vb, step));
+ insert_iv_increment (incr_pos, after, incr_stmts);
initial = force_gimple_operand (base, &stmts, true, var);
if (stmts)
diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h
index b1f65e3..80f6805 100644
--- a/gcc/tree-ssa-loop-manip.h
+++ b/gcc/tree-ssa-loop-manip.h
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
typedef void (*transform_callback)(class loop *, void *);
+extern void insert_iv_increment (gimple_stmt_iterator *, bool, gimple_seq);
extern void create_iv (tree, tree_code, tree, tree, class loop *,
gimple_stmt_iterator *, bool, tree *, tree *);
extern void rewrite_into_loop_closed_ssa (bitmap, unsigned);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0342620..eea0b89 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10580,6 +10580,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
+ gimple_stmt_iterator incr_si;
+ bool insert_after;
+ standard_iv_increment_position (iv_loop, &incr_si, &insert_after);
+
/* The initial values are vectorized, but any lanes > group_size
need adjustment. */
slp_tree init_node
@@ -10810,7 +10814,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
vec_def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, vec_def, up);
vec_def = gimple_convert (&stmts, vectype, vec_def);
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ insert_iv_increment (&incr_si, insert_after, stmts);
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);