diff options
author | Richard Biener <rguenther@suse.de> | 2023-07-27 15:34:12 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2023-08-02 10:45:12 +0200 |
commit | 399c8dd44ff44f4b496223c7cc980651c4d6f6a0 (patch) | |
tree | 9a88c939fd53b01c26014f219191d46251408406 /gcc | |
parent | ee20be8325f7f257ba91a0201cfb3bd6bfbceba9 (diff) | |
download | gcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.zip gcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.tar.gz gcc-399c8dd44ff44f4b496223c7cc980651c4d6f6a0.tar.bz2 |
tree-optimization/92335 - Improve sinking heuristics for vectorization
The following delays sinking of loads within the same innermost
loop when it was unconditional before. That's a not uncommon
issue preventing vectorization when masked loads are not available.
PR tree-optimization/92335
* tree-ssa-sink.cc (select_best_block): Before loop
optimizations avoid sinking unconditional loads/stores
in innermost loops to conditional executed places.
* gcc.dg/tree-ssa/ssa-sink-10.c: Disable vectorizing.
* gcc.dg/tree-ssa/predcom-9.c: Clone from ssa-sink-10.c,
expect predictive commoning to happen instead of sinking.
* gcc.dg/vect/pr65947-3.c: Ajdust.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr65947-3.c | 6 | ||||
-rw-r--r-- | gcc/tree-ssa-sink.cc | 12 |
4 files changed, 34 insertions, 6 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c new file mode 100644 index 0000000..b0fb0e2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-sink-details -fdump-tree-pcom-details" } */ + +int x[1024], y[1024], z[1024], w[1024]; +void foo (void) +{ + int i; + for (i = 1; i < 1024; ++i) + { + int a = x[i]; + int b = y[i]; + int c = x[i-1]; + int d = y[i-1]; + if (w[i]) + z[i] = (a + b) + (c + d); + } +} + +/* { dg-final { scan-tree-dump-not "Sinking # VUSE" "sink1" } } */ +/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c index 535cb32..a35014b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-sink-details -fno-tree-pre" } */ +/* { dg-options "-O2 -fdump-tree-sink-details -fno-tree-vectorize -fno-tree-pre" } */ int x[1024], y[1024], z[1024], w[1024]; void foo (void) diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index f1bfad6..6b4077e 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -51,10 +51,6 @@ main (void) return 0; } -/* Since the fix for PR97307 which sinks the load of a[i], preventing - if-conversion to happen, targets that cannot do masked loads only - vectorize the inline copy. */ -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_masked_load } } } */ -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_masked_load } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc index cf0a32a..d83d7be 100644 --- a/gcc/tree-ssa-sink.cc +++ b/gcc/tree-ssa-sink.cc @@ -220,6 +220,18 @@ select_best_block (basic_block early_bb, if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb)) return best_bb; + /* Avoid turning an unconditional read into a conditional one when we + still might want to perform vectorization. */ + if (best_bb->loop_father == early_bb->loop_father + && loop_outer (best_bb->loop_father) + && !best_bb->loop_father->inner + && gimple_vuse (stmt) + && flag_tree_loop_vectorize + && !(cfun->curr_properties & PROP_loop_opts_done) + && dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, early_bb) + && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb)) + return early_bb; + /* Get the sinking threshold. If the statement to be moved has memory operands, then increase the threshold by 7% as those are even more profitable to avoid, clamping at 100%. */ |