aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-08-03 10:59:52 +0200
committerRichard Biener <rguenther@suse.de>2023-08-03 13:20:00 +0200
commit3d48c11ad082def8ee237e5778d8a5d569bff96d (patch)
tree9b254147c9b6be872a17208f62d62bf38b24935c
parentfab08d12b40ad637c5a4ce8e026fb43cd3f0fad1 (diff)
downloadgcc-3d48c11ad082def8ee237e5778d8a5d569bff96d.zip
gcc-3d48c11ad082def8ee237e5778d8a5d569bff96d.tar.gz
gcc-3d48c11ad082def8ee237e5778d8a5d569bff96d.tar.bz2
Swap loop splitting and final value replacement
The following swaps the loop splitting pass and the final value replacement pass to avoid keeping the IV of the earlier loop live when not necessary. The existing gcc.target/i386/pr87007-5.c testcase shows that we otherwise fail to elide an empty loop later. I don't see any good reason why loop splitting would need final value replacement, all exit values honor the constraints we place on loop header PHIs automatically. * passes.def: Exchange loop splitting and final value replacement passes. * gcc.target/i386/pr87007-5.c: Make sure we split the loop and eliminate both in the end.
-rw-r--r--gcc/passes.def2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr87007-5.c4
2 files changed, 4 insertions, 2 deletions
diff --git a/gcc/passes.def b/gcc/passes.def
index f2893ae..ef5a21a 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -282,8 +282,8 @@ along with GCC; see the file COPYING3. If not see
form if possible. */
NEXT_PASS (pass_tree_loop_init);
NEXT_PASS (pass_tree_unswitch);
- NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_split);
+ NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_versioning);
NEXT_PASS (pass_loop_jam);
/* All unswitching, final value replacement and splitting can expose
diff --git a/gcc/testsuite/gcc.target/i386/pr87007-5.c b/gcc/testsuite/gcc.target/i386/pr87007-5.c
index b36e81c..a6cdf11 100644
--- a/gcc/testsuite/gcc.target/i386/pr87007-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr87007-5.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse -fno-tree-vectorize" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mfpmath=sse -fno-tree-vectorize -fdump-tree-cddce3-details -fdump-tree-lsplit-optimized" } */
/* Load of d2/d3 is hoisted out, vrndscalesd will reuse loades register to avoid partial dependence. */
#include<math.h>
@@ -15,4 +15,6 @@ foo (int n, int k)
d1 = sqrt (d3);
}
+/* { dg-final { scan-tree-dump "optimized: loop split" "lsplit" } } */
+/* { dg-final { scan-tree-dump-times "removing loop" 2 "cddce3" } } */
/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 0 } } */