aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-12-06 09:59:12 +0100
committerJakub Jelinek <jakub@redhat.com>2023-12-06 09:59:12 +0100
commite44ed92dbbe9d4e5c23f486cd2f77a6f9ee513c5 (patch)
tree25d5ab78e493c8fe40a5fc3749380df85a94ea29
parent0ca64f846edce3c7b7f26bcc5978118e560e65b1 (diff)
downloadgcc-e44ed92dbbe9d4e5c23f486cd2f77a6f9ee513c5.zip
gcc-e44ed92dbbe9d4e5c23f486cd2f77a6f9ee513c5.tar.gz
gcc-e44ed92dbbe9d4e5c23f486cd2f77a6f9ee513c5.tar.bz2
i386: Move vzeroupper pass from after reload pass to after postreload_cse [PR112760]
Regardless of the outcome of the REG_UNUSED discussions, I think it is a good idea to move the vzeroupper pass one pass later. As can be seen in the multiple PRs and as postreload.cc documents, reload/LRA is known to create dead statements quite often, which is the reason why we have postreload_cse pass at all. Doing vzeroupper pass before such cleanup means the pass including df_analyze for it needs to process more instructions than needed and because mode switching adds note problem, also higher chance of having stale REG_UNUSED notes. And, I really don't see why vzeroupper can't wait until those cleanups are done. 2023-12-06 Jakub Jelinek <jakub@redhat.com> PR rtl-optimization/112760 * config/i386/i386-passes.def (pass_insert_vzeroupper): Insert after pass_postreload_cse rather than pass_reload. * config/i386/i386-features.cc (rest_of_handle_insert_vzeroupper): Adjust comment for it. * gcc.dg/pr112760.c: New test.
-rw-r--r--gcc/config/i386/i386-features.cc9
-rw-r--r--gcc/config/i386/i386-passes.def2
-rw-r--r--gcc/testsuite/gcc.dg/pr112760.c22
3 files changed, 28 insertions, 5 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 6fac67e..e6fc135 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -2627,10 +2627,11 @@ convert_scalars_to_vector (bool timode_p)
static unsigned int
rest_of_handle_insert_vzeroupper (void)
{
- /* vzeroupper instructions are inserted immediately after reload to
- account for possible spills from 256bit or 512bit registers. The pass
- reuses mode switching infrastructure by re-running mode insertion
- pass, so disable entities that have already been processed. */
+ /* vzeroupper instructions are inserted immediately after reload and
+ postreload_cse to clean up after it a little bit to account for possible
+ spills from 256bit or 512bit registers. The pass reuses mode switching
+ infrastructure by re-running mode insertion pass, so disable entities
+ that have already been processed. */
for (int i = 0; i < MAX_386_ENTITIES; i++)
ix86_optimize_mode_switching[i] = 0;
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 90f2234..2d18981 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -24,7 +24,7 @@ along with GCC; see the file COPYING3. If not see
REPLACE_PASS (PASS, INSTANCE, TGT_PASS)
*/
- INSERT_PASS_AFTER (pass_reload, 1, pass_insert_vzeroupper);
+ INSERT_PASS_AFTER (pass_postreload_cse, 1, pass_insert_vzeroupper);
INSERT_PASS_AFTER (pass_combine, 1, pass_stv, false /* timode_p */);
/* Run the 64-bit STV pass before the CSE pass so that CONST0_RTX and
CONSTM1_RTX generated by the STV pass can be CSEed. */
diff --git a/gcc/testsuite/gcc.dg/pr112760.c b/gcc/testsuite/gcc.dg/pr112760.c
new file mode 100644
index 0000000..b4ec70e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112760.c
@@ -0,0 +1,22 @@
+/* PR rtl-optimization/112760 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-dce -fno-guess-branch-probability --param=max-cse-insns=0" } */
+/* { dg-additional-options "-m8bit-idiv -mavx" { target i?86-*-* x86_64-*-* } } */
+
+unsigned g;
+
+__attribute__((__noipa__)) unsigned short
+foo (unsigned short a, unsigned short b)
+{
+ unsigned short x = __builtin_add_overflow_p (a, g, (unsigned short) 0);
+ g -= g / b;
+ return x;
+}
+
+int
+main ()
+{
+ unsigned short x = foo (40, 6);
+ if (x != 0)
+ __builtin_abort ();
+}