aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2022-04-13 13:49:45 +0200
committerRichard Biener <rguenther@suse.de>2022-04-19 16:42:04 +0200
commit353434b65ef7972172597d232ae17022d9a57244 (patch)
tree49896352786db10b425a1fc134d52b776f9b1696
parentcd3964ebd3e94ed0df4ecaadb7fd34e991cec753 (diff)
downloadgcc-353434b65ef7972172597d232ae17022d9a57244.zip
gcc-353434b65ef7972172597d232ae17022d9a57244.tar.gz
gcc-353434b65ef7972172597d232ae17022d9a57244.tar.bz2
tree-optimization/104010 - fix SLP scalar costing with patterns
When doing BB vectorization the scalar cost compute is derailed by patterns, causing lanes to be considered live and thus not costed on the scalar side. For the testcase in PR104010 this prevents vectorization which was done by GCC 11. PR103941 shows similar cases of missed optimizations that are fixed by this patch. 2022-04-13 Richard Biener <rguenther@suse.de> PR tree-optimization/104010 PR tree-optimization/103941 * tree-vect-slp.cc (vect_bb_slp_scalar_cost): When we run into stmts in patterns continue walking those for uses outside of the vectorized region instead of marking the lane live. * gcc.target/i386/pr103941-1.c: New testcase. * gcc.target/i386/pr103941-2.c: Likewise.
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103941-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103941-2.c12
-rw-r--r--gcc/tree-vect-slp.cc48
3 files changed, 62 insertions, 12 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr103941-1.c b/gcc/testsuite/gcc.target/i386/pr103941-1.c
new file mode 100644
index 0000000..524fdd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103941-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+unsigned char ur[16], ua[16], ub[16];
+
+void avgu_v2qi (void)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ ur[i] = (ua[i] + ub[i] + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler "pavgb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr103941-2.c b/gcc/testsuite/gcc.target/i386/pr103941-2.c
new file mode 100644
index 0000000..972a32b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103941-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void foo (int *c, float *x, float *y)
+{
+ c[0] = x[0] < y[0];
+ c[1] = x[1] < y[1];
+ c[2] = x[2] < y[2];
+ c[3] = x[3] < y[3];
+}
+
+/* { dg-final { scan-assembler "cmpltps" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4ac2b70..805dd7e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5185,22 +5185,46 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
the scalar cost. */
if (!STMT_VINFO_LIVE_P (stmt_info))
{
- FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+ auto_vec<gimple *, 8> worklist;
+ hash_set<gimple *> *worklist_visited = NULL;
+ worklist.quick_push (orig_stmt);
+ do
{
- imm_use_iterator use_iter;
- gimple *use_stmt;
- FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
- if (!is_gimple_debug (use_stmt))
- {
- stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
- if (!use_stmt_info
- || !vectorized_scalar_stmts.contains (use_stmt_info))
+ gimple *work_stmt = worklist.pop ();
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, work_stmt, op_iter, SSA_OP_DEF)
+ {
+ imm_use_iterator use_iter;
+ gimple *use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter,
+ DEF_FROM_PTR (def_p))
+ if (!is_gimple_debug (use_stmt))
{
- (*life)[i] = true;
- break;
+ stmt_vec_info use_stmt_info
+ = vinfo->lookup_stmt (use_stmt);
+ if (!use_stmt_info
+ || !vectorized_scalar_stmts.contains (use_stmt_info))
+ {
+ if (use_stmt_info
+ && STMT_VINFO_IN_PATTERN_P (use_stmt_info))
+ {
+ /* For stmts participating in patterns we have
+ to check its uses recursively. */
+ if (!worklist_visited)
+ worklist_visited = new hash_set<gimple *> ();
+ if (!worklist_visited->add (use_stmt))
+ worklist.safe_push (use_stmt);
+ continue;
+ }
+ (*life)[i] = true;
+ goto next_lane;
+ }
}
- }
+ }
}
+ while (!worklist.is_empty ());
+next_lane:
+ if (worklist_visited)
+ delete worklist_visited;
if ((*life)[i])
continue;
}