aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2024-01-12 15:24:49 +0000
committerTamar Christina <tamar.christina@arm.com>2024-01-12 15:24:49 +0000
commit6cb155a6cf314232248a12bdd395ed4151ae5a28 (patch)
tree778cb185e087339d488e24b9737fe13568cfe391
parenta0e3d2ff6219d860c5108b3d1ff25a05a9a5559f (diff)
downloadgcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.zip
gcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.tar.gz
gcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.tar.bz2
middle-end: make memory analysis for early break more deterministic [PR113135]
Instead of searching for where to move stores to, they should always be in exit belonging to the latch. We can only ever delay stores and even if we pick a different exit than the latch one as the main one, effects still happen in program order when vectorized. If we don't move the stores to the latch exit but instead to whever we pick as the "main" exit then we can perform incorrect memory accesses (luckily these are trapped by verify_ssa). We used to iterate over the conds and check the loads and stores inside them. However this relies on the conds being ordered in program order. Additionally if there is a basic block between two conds we would not have analyzed it. Instead this now walks from the preds of the destination basic block up to the loop header and analyzes every block along the way. As a later optimization we could stop as soon as we've seen all the BBs we have conds for. For now the header will always contain the first cond, but this can change when we support arbitrary control flow. gcc/ChangeLog: PR tree-optimization/113135 * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Rework dependency analysis. gcc/testsuite/ChangeLog: PR tree-optimization/113135 * gcc.dg/vect/vect-early-break_103-pr113135.c: New test.
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c14
-rw-r--r--gcc/tree-vect-data-refs.cc43
2 files changed, 32 insertions, 25 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c
new file mode 100644
index 0000000..bbad7ee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-w" } */
+
+char UnpackReadTables_BitLength[20];
+int UnpackReadTables_ZeroCount;
+void UnpackReadTables() {
+ for (unsigned I = 0; I < 20;)
+ while (UnpackReadTables_ZeroCount-- &&
+ I < sizeof(UnpackReadTables_BitLength))
+ UnpackReadTables_BitLength[I++] = 0;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 752c34c..5e86da3 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -671,13 +671,20 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"loop contains multiple exits, analyzing"
" statement dependencies.\n");
- for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+ /* Since we don't support general control flow, the location we'll move the
+ side-effects to is always the latch connected exit. When we support
+ general control flow we can do better but for now this is fine. */
+ dest_bb = single_pred (loop->latch);
+ basic_block bb = dest_bb;
+
+ do
{
- stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
- if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
+ /* If the destination block is also the header then we have nothing to do. */
+ if (!single_pred_p (bb))
continue;
- gimple_stmt_iterator gsi = gsi_for_stmt (c);
+ bb = single_pred (bb);
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
/* Now analyze all the remaining statements and try to determine which
instructions are allowed/needed to be moved. */
@@ -705,10 +712,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"early breaks only supported on statically"
" allocated objects.\n");
- return opt_result::failure_at (c,
+ return opt_result::failure_at (stmt,
"can't safely apply code motion to "
"dependencies of %G to vectorize "
- "the early exit.\n", c);
+ "the early exit.\n", stmt);
}
tree refop = TREE_OPERAND (obj, 0);
@@ -720,10 +727,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"early breaks only supported on"
" statically allocated objects.\n");
- return opt_result::failure_at (c,
+ return opt_result::failure_at (stmt,
"can't safely apply code motion to "
"dependencies of %G to vectorize "
- "the early exit.\n", c);
+ "the early exit.\n", stmt);
}
/* Check if vector accesses to the object will be within bounds.
@@ -736,10 +743,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"early breaks not supported: vectorization "
"would %s beyond size of obj.",
DR_IS_READ (dr_ref) ? "read" : "write");
- return opt_result::failure_at (c,
+ return opt_result::failure_at (stmt,
"can't safely apply code motion to "
"dependencies of %G to vectorize "
- "the early exit.\n", c);
+ "the early exit.\n", stmt);
}
if (DR_IS_READ (dr_ref))
@@ -801,27 +808,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
"marked statement for vUSE update: %G", stmt);
}
}
-
- /* Save destination as we go, BB are visited in order and the last one
- is where statements should be moved to. */
- if (!dest_bb)
- dest_bb = gimple_bb (c);
- else
- {
- basic_block curr_bb = gimple_bb (c);
- if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
- dest_bb = curr_bb;
- }
}
+ while (bb != loop->header);
- basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest;
- basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest;
- dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1;
/* We don't allow outer -> inner loop transitions which should have been
trapped already during loop form analysis. */
gcc_assert (dest_bb->loop_father == loop);
- gcc_assert (dest_bb);
LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())