diff options
author | Jakub Jelinek <jakub@redhat.com> | 2023-11-29 09:14:03 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2023-11-29 09:14:03 +0100 |
commit | 9582538cf07d83d7e80553827de8b0f91e4705d8 (patch) | |
tree | 65bd835e1ff3a8d743cf95c270258949c89d95f1 | |
parent | 3f9eb37fb765c23ff3af8185570c56cfb6c037f3 (diff) | |
download | gcc-9582538cf07d83d7e80553827de8b0f91e4705d8.zip gcc-9582538cf07d83d7e80553827de8b0f91e4705d8.tar.gz gcc-9582538cf07d83d7e80553827de8b0f91e4705d8.tar.bz2 |
fold-mem-offsets: Fix powerpc64le-linux profiledbootstrap [PR111601]
The introduction of the fold-mem-offsets pass breaks profiledbootstrap
on powerpc64le-linux.
From what I can see, the pass works one basic block at a time and
will punt on any non-DEBUG_INSN uses outside of the current block
(I believe because of the
/* This use affects instructions outside of CAN_FOLD_INSNS. */
if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
return 0;
test and can_fold_insns only set in do_analysis (when processing insns in
current bb, cleared at the end) or results of get_single_def_in_bb
(which are checked to be in the same bb).
But, while get_single_def_in_bb checks for
if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
return NULL;
The basic block in the PR in question has:
...
(insn 212 210 215 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) [2 *last_viable_336+0 S8 A64])
(reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":50:17 683 {*movdi_internal64}
(expr_list:REG_DEAD (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
(nil)))
(insn 215 212 484 25 (set (reg:DI 5 5 [226])
(const_int 0 [0])) "pr111601.ii":52:12 683 {*movdi_internal64}
(expr_list:REG_EQUIV (const_int 0 [0])
(nil)))
(insn 484 215 218 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
(reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":52:12 683 {*movdi_internal64}
(nil))
...
(insn 564 214 216 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
(plus:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
(const_int 96 [0x60]))) "pr111601.ii":52:12 66 {*adddi3}
(nil))
(insn 216 564 219 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) [2 _343->next+0 S8 A64])
(reg:DI 5 5 [226])) "pr111601.ii":52:12 683 {*movdi_internal64}
(expr_list:REG_DEAD (reg:DI 5 5 [226])
(nil)))
...
and when asking for all uses of %r10 from def 564, it will see uses
in 216 and 212; the former is after the += 96 addition and gets changed
to load from %r10+96 with the addition being dropped, but there is
the other store which is a use across the backedge and when reached
from other edges certainly doesn't have the + 96 addition anywhere,
so the pass doesn't actually change that location.
This patch adds checks from get_single_def_in_bb to get_uses as well,
in particular check that the (regular non-debug) use only appears in the
same basic block as the definition and that it doesn't appear before it (i.e.
use across backedge).
2023-11-29 Jakub Jelinek <jakub@redhat.com>
PR bootstrap/111601
* fold-mem-offsets.cc (get_uses): Ignore DEBUG_INSN uses. Otherwise,
punt if use is in a different basic block from INSN or appears before
INSN in the same basic block. Formatting fixes.
(get_single_def_in_bb): Formatting fixes.
(fold_offsets_1, pass_fold_mem_offsets::execute): Comment formatting
fixes.
* g++.dg/opt/pr111601.C: New test.
-rw-r--r-- | gcc/fold-mem-offsets.cc | 28 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/opt/pr111601.C | 86 |
2 files changed, 105 insertions, 9 deletions
diff --git a/gcc/fold-mem-offsets.cc b/gcc/fold-mem-offsets.cc index 6263fc7..7ba5600 100644 --- a/gcc/fold-mem-offsets.cc +++ b/gcc/fold-mem-offsets.cc @@ -154,7 +154,7 @@ static int stats_fold_count; The definition is desired for REG used in INSN. Return the definition insn or NULL if there's no definition with the desired criteria. */ -static rtx_insn* +static rtx_insn * get_single_def_in_bb (rtx_insn *insn, rtx reg) { df_ref use; @@ -205,11 +205,10 @@ get_single_def_in_bb (rtx_insn *insn, rtx reg) /* Get all uses of REG which is set in INSN. Return the use list or NULL if a use is missing / irregular. If SUCCESS is not NULL then set it to false if there are missing / irregular uses and true otherwise. */ -static struct df_link* +static df_link * get_uses (rtx_insn *insn, rtx reg, bool *success) { df_ref def; - struct df_link *ref_chain, *ref_link; if (success) *success = false; @@ -221,18 +220,30 @@ get_uses (rtx_insn *insn, rtx reg, bool *success) if (!def) return NULL; - ref_chain = DF_REF_CHAIN (def); + df_link *ref_chain = DF_REF_CHAIN (def); + int insn_luid = DF_INSN_LUID (insn); + basic_block insn_bb = BLOCK_FOR_INSN (insn); - for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) + for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next) { /* Problem getting a use for this instruction. */ if (ref_link->ref == NULL) return NULL; if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR) return NULL; + + rtx_insn *use = DF_REF_INSN (ref_link->ref); + if (DEBUG_INSN_P (use)) + continue; + /* We do not handle REG_EQUIV/REG_EQ notes for now. */ if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE) return NULL; + if (BLOCK_FOR_INSN (use) != insn_bb) + return NULL; + /* Punt if use appears before def in the basic block. See PR111601. */ + if (DF_INSN_LUID (use) < insn_luid) + return NULL; } if (success) @@ -255,8 +266,7 @@ fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns); If DO_RECURSION is true and ANALYZE is false then offset that would result from folding is computed and is returned through the pointer OFFSET_OUT. - The instructions that can be folded are recorded in FOLDABLE_INSNS. -*/ + The instructions that can be folded are recorded in FOLDABLE_INSNS. */ static bool fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion, HOST_WIDE_INT *offset_out, bitmap foldable_insns) @@ -846,8 +856,8 @@ pass_fold_mem_offsets::execute (function *fn) FOR_ALL_BB_FN (bb, fn) { /* There is a conflict between this pass and RISCV's shorten-memrefs - pass. For now disable folding if optimizing for size because - otherwise this cancels the effects of shorten-memrefs. */ + pass. For now disable folding if optimizing for size because + otherwise this cancels the effects of shorten-memrefs. */ if (optimize_bb_for_size_p (bb)) continue; diff --git a/gcc/testsuite/g++.dg/opt/pr111601.C b/gcc/testsuite/g++.dg/opt/pr111601.C new file mode 100644 index 0000000..a5019e9 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr111601.C @@ -0,0 +1,86 @@ +// PR bootstrap/111601 +// { dg-do run { target c++11 } } +// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" } +// { dg-require-profiling "-fprofile-generate" } +// { dg-final { cleanup-coverage-files } } + +struct tree_base +{ + int code:16; +}; +struct saved_scope +{ + void *pad[14]; + int x_processing_template_decl; +}; +struct saved_scope *scope_chain; +struct z_candidate +{ + tree_base *fn; + void *pad[11]; + z_candidate *next; + int viable; + int flags; +}; + +__attribute__((noipa)) struct z_candidate * +splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p) +{ + struct z_candidate *viable; + struct z_candidate **last_viable; + struct z_candidate **cand; + bool found_strictly_viable = false; + if (scope_chain->x_processing_template_decl) + strict_p = true; + viable = (z_candidate *) 0; + last_viable = &viable; + *any_viable_p = false; + cand = &cands; + while (*cand) + { + struct z_candidate *c = *cand; + if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273)) + { + strict_p = true; + if (viable && !found_strictly_viable) + { + *any_viable_p = false; + *last_viable = cands; + cands = viable; + viable = (z_candidate *) 0; + last_viable = &viable; + } + } + if (strict_p ? c->viable == 1 : c->viable) + { + *last_viable = c; + *cand = c->next; + c->next = (z_candidate *) 0; + last_viable = &c->next; + *any_viable_p = true; + if (c->viable == 1) + found_strictly_viable = true; + } + else + cand = &c->next; + } + return viable ? viable : cands; +} + +int +main () +{ + saved_scope s{}; + scope_chain = &s; + z_candidate z[4] = {}; + z[0].next = &z[1]; + z[1].viable = 1; + z[1].next = &z[2]; + z[2].viable = 1; + z[2].next = &z[3]; + bool b; + z_candidate *c = splice_viable (&z[0], true, &b); + if (c != &z[1] || z[1].next != &z[2] || z[2].next) + __builtin_abort (); + return 0; +} |