aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-11-29 09:14:03 +0100
committerJakub Jelinek <jakub@redhat.com>2023-11-29 09:14:03 +0100
commit9582538cf07d83d7e80553827de8b0f91e4705d8 (patch)
tree65bd835e1ff3a8d743cf95c270258949c89d95f1
parent3f9eb37fb765c23ff3af8185570c56cfb6c037f3 (diff)
downloadgcc-9582538cf07d83d7e80553827de8b0f91e4705d8.zip
gcc-9582538cf07d83d7e80553827de8b0f91e4705d8.tar.gz
gcc-9582538cf07d83d7e80553827de8b0f91e4705d8.tar.bz2
fold-mem-offsets: Fix powerpc64le-linux profiledbootstrap [PR111601]
The introduction of the fold-mem-offsets pass breaks profiledbootstrap on powerpc64le-linux. From what I can see, the pass works one basic block at a time and will punt on any non-DEBUG_INSN uses outside of the current block (I believe because of the /* This use affects instructions outside of CAN_FOLD_INSNS. */ if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use))) return 0; test and can_fold_insns only set in do_analysis (when processing insns in current bb, cleared at the end) or results of get_single_def_in_bb (which are checked to be in the same bb). But, while get_single_def_in_bb checks for if (DF_INSN_LUID (def) > DF_INSN_LUID (insn)) return NULL; The basic block in the PR in question has: ... (insn 212 210 215 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) [2 *last_viable_336+0 S8 A64]) (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":50:17 683 {*movdi_internal64} (expr_list:REG_DEAD (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) (nil))) (insn 215 212 484 25 (set (reg:DI 5 5 [226]) (const_int 0 [0])) "pr111601.ii":52:12 683 {*movdi_internal64} (expr_list:REG_EQUIV (const_int 0 [0]) (nil))) (insn 484 215 218 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":52:12 683 {*movdi_internal64} (nil)) ... (insn 564 214 216 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) (plus:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) (const_int 96 [0x60]))) "pr111601.ii":52:12 66 {*adddi3} (nil)) (insn 216 564 219 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152]) [2 _343->next+0 S8 A64]) (reg:DI 5 5 [226])) "pr111601.ii":52:12 683 {*movdi_internal64} (expr_list:REG_DEAD (reg:DI 5 5 [226]) (nil))) ... and when asking for all uses of %r10 from def 564, it will see uses in 216 and 212; the former is after the += 96 addition and gets changed to load from %r10+96 with the addition being dropped, but there is the other store which is a use across the backedge and when reached from other edges certainly doesn't have the + 96 addition anywhere, so the pass doesn't actually change that location. This patch adds checks from get_single_def_in_bb to get_uses as well, in particular check that the (regular non-debug) use only appears in the same basic block as the definition and that it doesn't appear before it (i.e. use across backedge). 2023-11-29 Jakub Jelinek <jakub@redhat.com> PR bootstrap/111601 * fold-mem-offsets.cc (get_uses): Ignore DEBUG_INSN uses. Otherwise, punt if use is in a different basic block from INSN or appears before INSN in the same basic block. Formatting fixes. (get_single_def_in_bb): Formatting fixes. (fold_offsets_1, pass_fold_mem_offsets::execute): Comment formatting fixes. * g++.dg/opt/pr111601.C: New test.
-rw-r--r--gcc/fold-mem-offsets.cc28
-rw-r--r--gcc/testsuite/g++.dg/opt/pr111601.C86
2 files changed, 105 insertions, 9 deletions
diff --git a/gcc/fold-mem-offsets.cc b/gcc/fold-mem-offsets.cc
index 6263fc7..7ba5600 100644
--- a/gcc/fold-mem-offsets.cc
+++ b/gcc/fold-mem-offsets.cc
@@ -154,7 +154,7 @@ static int stats_fold_count;
The definition is desired for REG used in INSN.
Return the definition insn or NULL if there's no definition with
the desired criteria. */
-static rtx_insn*
+static rtx_insn *
get_single_def_in_bb (rtx_insn *insn, rtx reg)
{
df_ref use;
@@ -205,11 +205,10 @@ get_single_def_in_bb (rtx_insn *insn, rtx reg)
/* Get all uses of REG which is set in INSN. Return the use list or NULL if a
use is missing / irregular. If SUCCESS is not NULL then set it to false if
there are missing / irregular uses and true otherwise. */
-static struct df_link*
+static df_link *
get_uses (rtx_insn *insn, rtx reg, bool *success)
{
df_ref def;
- struct df_link *ref_chain, *ref_link;
if (success)
*success = false;
@@ -221,18 +220,30 @@ get_uses (rtx_insn *insn, rtx reg, bool *success)
if (!def)
return NULL;
- ref_chain = DF_REF_CHAIN (def);
+ df_link *ref_chain = DF_REF_CHAIN (def);
+ int insn_luid = DF_INSN_LUID (insn);
+ basic_block insn_bb = BLOCK_FOR_INSN (insn);
- for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
{
/* Problem getting a use for this instruction. */
if (ref_link->ref == NULL)
return NULL;
if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
return NULL;
+
+ rtx_insn *use = DF_REF_INSN (ref_link->ref);
+ if (DEBUG_INSN_P (use))
+ continue;
+
/* We do not handle REG_EQUIV/REG_EQ notes for now. */
if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
return NULL;
+ if (BLOCK_FOR_INSN (use) != insn_bb)
+ return NULL;
+ /* Punt if use appears before def in the basic block. See PR111601. */
+ if (DF_INSN_LUID (use) < insn_luid)
+ return NULL;
}
if (success)
@@ -255,8 +266,7 @@ fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns);
If DO_RECURSION is true and ANALYZE is false then offset that would result
from folding is computed and is returned through the pointer OFFSET_OUT.
- The instructions that can be folded are recorded in FOLDABLE_INSNS.
-*/
+ The instructions that can be folded are recorded in FOLDABLE_INSNS. */
static bool
fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
HOST_WIDE_INT *offset_out, bitmap foldable_insns)
@@ -846,8 +856,8 @@ pass_fold_mem_offsets::execute (function *fn)
FOR_ALL_BB_FN (bb, fn)
{
/* There is a conflict between this pass and RISCV's shorten-memrefs
- pass. For now disable folding if optimizing for size because
- otherwise this cancels the effects of shorten-memrefs. */
+ pass. For now disable folding if optimizing for size because
+ otherwise this cancels the effects of shorten-memrefs. */
if (optimize_bb_for_size_p (bb))
continue;
diff --git a/gcc/testsuite/g++.dg/opt/pr111601.C b/gcc/testsuite/g++.dg/opt/pr111601.C
new file mode 100644
index 0000000..a5019e9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr111601.C
@@ -0,0 +1,86 @@
+// PR bootstrap/111601
+// { dg-do run { target c++11 } }
+// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" }
+// { dg-require-profiling "-fprofile-generate" }
+// { dg-final { cleanup-coverage-files } }
+
+struct tree_base
+{
+ int code:16;
+};
+struct saved_scope
+{
+ void *pad[14];
+ int x_processing_template_decl;
+};
+struct saved_scope *scope_chain;
+struct z_candidate
+{
+ tree_base *fn;
+ void *pad[11];
+ z_candidate *next;
+ int viable;
+ int flags;
+};
+
+__attribute__((noipa)) struct z_candidate *
+splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p)
+{
+ struct z_candidate *viable;
+ struct z_candidate **last_viable;
+ struct z_candidate **cand;
+ bool found_strictly_viable = false;
+ if (scope_chain->x_processing_template_decl)
+ strict_p = true;
+ viable = (z_candidate *) 0;
+ last_viable = &viable;
+ *any_viable_p = false;
+ cand = &cands;
+ while (*cand)
+ {
+ struct z_candidate *c = *cand;
+ if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273))
+ {
+ strict_p = true;
+ if (viable && !found_strictly_viable)
+ {
+ *any_viable_p = false;
+ *last_viable = cands;
+ cands = viable;
+ viable = (z_candidate *) 0;
+ last_viable = &viable;
+ }
+ }
+ if (strict_p ? c->viable == 1 : c->viable)
+ {
+ *last_viable = c;
+ *cand = c->next;
+ c->next = (z_candidate *) 0;
+ last_viable = &c->next;
+ *any_viable_p = true;
+ if (c->viable == 1)
+ found_strictly_viable = true;
+ }
+ else
+ cand = &c->next;
+ }
+ return viable ? viable : cands;
+}
+
+int
+main ()
+{
+ saved_scope s{};
+ scope_chain = &s;
+ z_candidate z[4] = {};
+ z[0].next = &z[1];
+ z[1].viable = 1;
+ z[1].next = &z[2];
+ z[2].viable = 1;
+ z[2].next = &z[3];
+ bool b;
+ z_candidate *c = splice_viable (&z[0], true, &b);
+ if (c != &z[1] || z[1].next != &z[2] || z[2].next)
+ __builtin_abort ();
+ return 0;
+}