aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-07-27 15:34:12 +0200
committerRichard Biener <rguenther@suse.de>2023-08-02 10:45:12 +0200
commit399c8dd44ff44f4b496223c7cc980651c4d6f6a0 (patch)
tree9a88c939fd53b01c26014f219191d46251408406
parentee20be8325f7f257ba91a0201cfb3bd6bfbceba9 (diff)
downloadgcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.zip
gcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.tar.gz
gcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.tar.bz2
tree-optimization/92335 - Improve sinking heuristics for vectorization
The following delays sinking of loads within the same innermost loop when it was unconditional before. That's a not uncommon issue preventing vectorization when masked loads are not available. PR tree-optimization/92335 * tree-ssa-sink.cc (select_best_block): Before loop optimizations avoid sinking unconditional loads/stores in innermost loops to conditional executed places. * gcc.dg/tree-ssa/ssa-sink-10.c: Disable vectorizing. * gcc.dg/tree-ssa/predcom-9.c: Clone from ssa-sink-10.c, expect predictive commoning to happen instead of sinking. * gcc.dg/vect/pr65947-3.c: Ajdust.
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c20
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-3.c6
-rw-r--r--gcc/tree-ssa-sink.cc12
4 files changed, 34 insertions, 6 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c
new file mode 100644
index 0000000..b0fb0e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-details -fdump-tree-pcom-details" } */
+
+int x[1024], y[1024], z[1024], w[1024];
+void foo (void)
+{
+ int i;
+ for (i = 1; i < 1024; ++i)
+ {
+ int a = x[i];
+ int b = y[i];
+ int c = x[i-1];
+ int d = y[i-1];
+ if (w[i])
+ z[i] = (a + b) + (c + d);
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "Sinking # VUSE" "sink1" } } */
+/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
index 535cb32..a35014b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sink-details -fno-tree-pre" } */
+/* { dg-options "-O2 -fdump-tree-sink-details -fno-tree-vectorize -fno-tree-pre" } */
int x[1024], y[1024], z[1024], w[1024];
void foo (void)
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index f1bfad6..6b4077e 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -51,10 +51,6 @@ main (void)
return 0;
}
-/* Since the fix for PR97307 which sinks the load of a[i], preventing
- if-conversion to happen, targets that cannot do masked loads only
- vectorize the inline copy. */
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_masked_load } } } */
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_masked_load } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index cf0a32a..d83d7be 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -220,6 +220,18 @@ select_best_block (basic_block early_bb,
if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
return best_bb;
+ /* Avoid turning an unconditional read into a conditional one when we
+ still might want to perform vectorization. */
+ if (best_bb->loop_father == early_bb->loop_father
+ && loop_outer (best_bb->loop_father)
+ && !best_bb->loop_father->inner
+ && gimple_vuse (stmt)
+ && flag_tree_loop_vectorize
+ && !(cfun->curr_properties & PROP_loop_opts_done)
+ && dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, early_bb)
+ && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
+ return early_bb;
+
/* Get the sinking threshold. If the statement to be moved has memory
operands, then increase the threshold by 7% as those are even more
profitable to avoid, clamping at 100%. */