diff options
author | Tamar Christina <tamar.christina@arm.com> | 2024-01-12 15:24:49 +0000 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2024-01-12 15:24:49 +0000 |
commit | 6cb155a6cf314232248a12bdd395ed4151ae5a28 (patch) | |
tree | 778cb185e087339d488e24b9737fe13568cfe391 | |
parent | a0e3d2ff6219d860c5108b3d1ff25a05a9a5559f (diff) | |
download | gcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.zip gcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.tar.gz gcc-6cb155a6cf314232248a12bdd395ed4151ae5a28.tar.bz2 |
middle-end: make memory analysis for early break more deterministic [PR113135]
Instead of searching for where to move stores to, they should always be in
exit belonging to the latch. We can only ever delay stores and even if we
pick a different exit than the latch one as the main one, effects still
happen in program order when vectorized. If we don't move the stores to the
latch exit but instead to whever we pick as the "main" exit then we can
perform incorrect memory accesses (luckily these are trapped by verify_ssa).
We used to iterate over the conds and check the loads and stores inside them.
However this relies on the conds being ordered in program order. Additionally
if there is a basic block between two conds we would not have analyzed it.
Instead this now walks from the preds of the destination basic block up to the
loop header and analyzes every block along the way. As a later optimization we
could stop as soon as we've seen all the BBs we have conds for. For now the
header will always contain the first cond, but this can change when we support
arbitrary control flow.
gcc/ChangeLog:
PR tree-optimization/113135
* tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Rework
dependency analysis.
gcc/testsuite/ChangeLog:
PR tree-optimization/113135
* gcc.dg/vect/vect-early-break_103-pr113135.c: New test.
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c | 14 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.cc | 43 |
2 files changed, 32 insertions, 25 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c new file mode 100644 index 0000000..bbad7ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-w" } */ + +char UnpackReadTables_BitLength[20]; +int UnpackReadTables_ZeroCount; +void UnpackReadTables() { + for (unsigned I = 0; I < 20;) + while (UnpackReadTables_ZeroCount-- && + I < sizeof(UnpackReadTables_BitLength)) + UnpackReadTables_BitLength[I++] = 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 752c34c..5e86da3 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -671,13 +671,20 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) "loop contains multiple exits, analyzing" " statement dependencies.\n"); - for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) + /* Since we don't support general control flow, the location we'll move the + side-effects to is always the latch connected exit. When we support + general control flow we can do better but for now this is fine. */ + dest_bb = single_pred (loop->latch); + basic_block bb = dest_bb; + + do { - stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c); - if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type) + /* If the destination block is also the header then we have nothing to do. */ + if (!single_pred_p (bb)) continue; - gimple_stmt_iterator gsi = gsi_for_stmt (c); + bb = single_pred (bb); + gimple_stmt_iterator gsi = gsi_last_bb (bb); /* Now analyze all the remaining statements and try to determine which instructions are allowed/needed to be moved. */ @@ -705,10 +712,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "early breaks only supported on statically" " allocated objects.\n"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } tree refop = TREE_OPERAND (obj, 0); @@ -720,10 +727,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "early breaks only supported on" " statically allocated objects.\n"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } /* Check if vector accesses to the object will be within bounds. @@ -736,10 +743,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) "early breaks not supported: vectorization " "would %s beyond size of obj.", DR_IS_READ (dr_ref) ? "read" : "write"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } if (DR_IS_READ (dr_ref)) @@ -801,27 +808,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) "marked statement for vUSE update: %G", stmt); } } - - /* Save destination as we go, BB are visited in order and the last one - is where statements should be moved to. */ - if (!dest_bb) - dest_bb = gimple_bb (c); - else - { - basic_block curr_bb = gimple_bb (c); - if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) - dest_bb = curr_bb; - } } + while (bb != loop->header); - basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest; - basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest; - dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1; /* We don't allow outer -> inner loop transitions which should have been trapped already during loop form analysis. */ gcc_assert (dest_bb->loop_father == loop); - gcc_assert (dest_bb); LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ()) |