From 4aded535ea6ad7c362ab62d99af70e53c186d582 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 12 Mar 2020 16:09:27 -0600 Subject: Remove no-op register to register copies in CSE just like we remove no-op memory to memory copies. PR rtl-optimization/90275 * cse.c (cse_insn): Delete no-op register moves too. PR rtl-optimization/90275 * gcc.c-torture/compile/pr90275.c: New test. --- gcc/cse.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'gcc/cse.c') diff --git a/gcc/cse.c b/gcc/cse.c index 79ee0ce..08984c1 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -4625,7 +4625,7 @@ cse_insn (rtx_insn *insn) for (i = 0; i < n_sets; i++) { bool repeat = false; - bool mem_noop_insn = false; + bool noop_insn = false; rtx src, dest; rtx src_folded; struct table_elt *elt = 0, *p; @@ -5324,9 +5324,11 @@ cse_insn (rtx_insn *insn) } /* Similarly, lots of targets don't allow no-op - (set (mem x) (mem x)) moves. */ + (set (mem x) (mem x)) moves. Even (set (reg x) (reg x)) + might be impossible for certain registers (like CC registers). */ else if (n_sets == 1 - && MEM_P (trial) + && !CALL_P (insn) + && (MEM_P (trial) || REG_P (trial)) && MEM_P (dest) && rtx_equal_p (trial, dest) && !side_effects_p (dest) @@ -5334,7 +5336,7 @@ cse_insn (rtx_insn *insn) || insn_nothrow_p (insn))) { SET_SRC (sets[i].rtl) = trial; - mem_noop_insn = true; + noop_insn = true; break; } @@ -5562,8 +5564,8 @@ cse_insn (rtx_insn *insn) sets[i].rtl = 0; } - /* Similarly for no-op MEM moves. */ - else if (mem_noop_insn) + /* Similarly for no-op moves. */ + else if (noop_insn) { if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) cse_cfg_altered = true; -- cgit v1.1 From 529ea7d9596b26ba103578eeab448e9862a2d2c5 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Wed, 18 Mar 2020 16:07:28 -0600 Subject: Complete change to resolve pr90275. PR rtl-optimization/90275 * cse.c (cse_insn): Delete no-op register moves too. --- gcc/cse.c | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc/cse.c') diff --git a/gcc/cse.c b/gcc/cse.c index 08984c1..3e8724b 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -5329,7 +5329,6 @@ cse_insn (rtx_insn *insn) else if (n_sets == 1 && !CALL_P (insn) && (MEM_P (trial) || REG_P (trial)) - && MEM_P (dest) && rtx_equal_p (trial, dest) && !side_effects_p (dest) && (cfun->can_delete_dead_exceptions -- cgit v1.1 From dd9ca9d770a18ce4b16d867f49fef3293b483ff5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 8 Apr 2020 14:04:35 +0200 Subject: rtl-optimization/93946 - fix TBAA for redundant store removal in CSE It turns out RTL CSE tries to remove redundant stores but fails to do the usual validity check what such a change is TBAA neutral to later loads. This now triggers with the PR93946 testcases on nios2. 2020-04-08 Richard Biener PR rtl-optimization/93946 * cse.c (cse_insn): Record the tabled expression in src_related. Verify a redundant store removal is valid. --- gcc/cse.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'gcc/cse.c') diff --git a/gcc/cse.c b/gcc/cse.c index 3e8724b..f07bbdb 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -5074,7 +5074,7 @@ cse_insn (rtx_insn *insn) to prefer it. Copy it to src_related. The code below will then give it a negative cost. */ if (GET_CODE (dest) == code && rtx_equal_p (p->exp, dest)) - src_related = dest; + src_related = p->exp; } /* Find the cheapest valid equivalent, trying all the available @@ -5332,7 +5332,16 @@ cse_insn (rtx_insn *insn) && rtx_equal_p (trial, dest) && !side_effects_p (dest) && (cfun->can_delete_dead_exceptions - || insn_nothrow_p (insn))) + || insn_nothrow_p (insn)) + /* We can only remove the later store if the earlier aliases + at least all accesses the later one. */ + && (!MEM_P (trial) + || ((MEM_ALIAS_SET (dest) == MEM_ALIAS_SET (trial) + || alias_set_subset_of (MEM_ALIAS_SET (dest), + MEM_ALIAS_SET (trial))) + && (!MEM_EXPR (trial) + || refs_same_for_tbaa_p (MEM_EXPR (trial), + MEM_EXPR (dest)))))) { SET_SRC (sets[i].rtl) = trial; noop_insn = true; -- cgit v1.1 From 3737ccc424c56a2cecff202dd79f88d28850eeb2 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Fri, 17 Apr 2020 15:38:13 -0600 Subject: [committed] [PR rtl-optimization/90275] Another 90275 related cse.c fix This time instead of having a NOP copy insn that we can completely ignore and ultimately remove, we have a NOP set within a multi-set PARALLEL. It triggers, the same failure when the source of such a set is a hard register for the same reasons as we've already noted in the BZ and patches-to-date. For prior cases we've been able to mark the insn as a nop set and ignore it for the rest of cse_insn, ultimately removing it. That's not really an option here as there are other sets that we have to preserve. We might be able to fix this instance by splitting the multi-set insn, but I'm not keen to introduce splitting into cse. Furthermore, the target may not be able to split the insn. So I considered this is non-starter. What I finally settled on was to use the existing do_not_record machinery to ignore the nop set within the parallel (and only that set within the parallel). One might argue that we should always ignore a REG_UNUSED set. But I rejected that idea -- we could have cse-able divmod insns where the first had a REG_UNUSED note for a destination, but the second did not. One might also argue that we could have a nop set without a REG_UNUSED in a multi-set parallel and thus we could trigger yet another insert_regs ICE at some point. I tend to think this is a possibility. If we see this happen, we'll have to revisit. PR rtl-optimization/90275 * cse.c (cse_insn): Avoid recording nop sets in multi-set parallels when the destination has a REG_UNUSED note. --- gcc/cse.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'gcc/cse.c') diff --git a/gcc/cse.c b/gcc/cse.c index f07bbdb..5aaba8d 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -4715,8 +4715,20 @@ cse_insn (rtx_insn *insn) /* Compute SRC's hash code, and also notice if it should not be recorded at all. In that case, - prevent any further processing of this assignment. */ - do_not_record = 0; + prevent any further processing of this assignment. + + We set DO_NOT_RECORD if the destination has a REG_UNUSED note. + This avoids getting the source register into the tables, where it + may be invalidated later (via REG_QTY), then trigger an ICE upon + re-insertion. + + This is only a problem in multi-set insns. If it were a single + set the dead copy would have been removed. If the RHS were anything + but a simple REG, then we won't call insert_regs and thus there's + no potential for triggering the ICE. */ + do_not_record = (REG_P (dest) + && REG_P (src) + && find_reg_note (insn, REG_UNUSED, dest)); hash_arg_in_memory = 0; sets[i].src = src; -- cgit v1.1 From 66ec22b0d3feb96049283abe5c6c9a05ecef8b86 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 30 Apr 2020 20:00:52 +0100 Subject: cse: Use simplify_replace_fn_rtx to process notes [PR94740] cse_process_notes did a very simple substitution, which in the wrong circumstances could create non-canonical RTL and invalid MEMs. Various sticking plasters have been applied to cse_process_notes_1 to handle cases like ZERO_EXTEND, SIGN_EXTEND and UNSIGNED_FLOAT, but I think this PR is a plaster too far. The code is trying hard to avoid creating unnecessary rtl, which of course is a good thing. If we continue to do that, then we can end up changing subexpressions while keeping the containing rtx. This in turn means that validate_change will be a no-op on the containing rtx, even if its contents have changed. So in these cases we have to apply validate_change to the individual subexpressions. On the other hand, if we always apply validate_change to the individual subexpressions, we'll end up calling validate_change on something before it has been simplified and canonicalised. And that's one of the situations we're trying to avoid. There might be a middle ground in which we queue the validate_changes as part of a group, and so can cancel the pending validate_changes for subexpressions if there's a change in the outer expression. But that seems even more ad-hoc than the current code. It would also be quite an invasive change. I think the best thing is just to hook into the existing simplify_replace_fn_rtx function, keeping the REG and MEM handling from cse_process_notes_1 essentially unchanged. It can generate more redundant rtl when a simplification takes place, but it has the advantage of being relative well-used code (both directly and via simplify_replace_rtx). 2020-04-30 Richard Sandiford gcc/ PR rtl-optimization/94740 * cse.c (cse_process_notes_1): Replace with... (cse_process_note_1): ...this new function, acting as a simplify_replace_fn_rtx callback to process_note. Handle only REGs and MEMs directly. Validate the MEM if cse_process_note changes its address. (cse_process_notes): Replace with... (cse_process_note): ...this new function. (cse_extended_basic_block): Update accordingly, iterating over the register notes and passing individual notes to cse_process_note. --- gcc/cse.c | 118 ++++++++++++++++++-------------------------------------------- 1 file changed, 34 insertions(+), 84 deletions(-) (limited to 'gcc/cse.c') diff --git a/gcc/cse.c b/gcc/cse.c index 5aaba8d..36bcfc3 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -585,7 +585,6 @@ static void cse_insn (rtx_insn *); static void cse_prescan_path (struct cse_basic_block_data *); static void invalidate_from_clobbers (rtx_insn *); static void invalidate_from_sets_and_clobbers (rtx_insn *); -static rtx cse_process_notes (rtx, rtx, bool *); static void cse_extended_basic_block (struct cse_basic_block_data *); extern void dump_class (struct table_elt*); static void get_cse_reg_info_1 (unsigned int regno); @@ -6222,75 +6221,28 @@ invalidate_from_sets_and_clobbers (rtx_insn *insn) } } -/* Process X, part of the REG_NOTES of an insn. Look at any REG_EQUAL notes - and replace any registers in them with either an equivalent constant - or the canonical form of the register. If we are inside an address, - only do this if the address remains valid. +static rtx cse_process_note (rtx); - OBJECT is 0 except when within a MEM in which case it is the MEM. +/* A simplify_replace_fn_rtx callback for cse_process_note. Process X, + part of the REG_NOTES of an insn. Replace any registers with either + an equivalent constant or the canonical form of the register. + Only replace addresses if the containing MEM remains valid. - Return the replacement for X. */ + Return the replacement for X, or null if it should be simplified + recursively. */ static rtx -cse_process_notes_1 (rtx x, rtx object, bool *changed) +cse_process_note_1 (rtx x, const_rtx, void *) { - enum rtx_code code = GET_CODE (x); - const char *fmt = GET_RTX_FORMAT (code); - int i; - - switch (code) + if (MEM_P (x)) { - case CONST: - case SYMBOL_REF: - case LABEL_REF: - CASE_CONST_ANY: - case PC: - case CC0: - case LO_SUM: + validate_change (x, &XEXP (x, 0), cse_process_note (XEXP (x, 0)), false); return x; + } - case MEM: - validate_change (x, &XEXP (x, 0), - cse_process_notes (XEXP (x, 0), x, changed), 0); - return x; - - case EXPR_LIST: - if (REG_NOTE_KIND (x) == REG_EQUAL) - XEXP (x, 0) = cse_process_notes (XEXP (x, 0), NULL_RTX, changed); - /* Fall through. */ - - case INSN_LIST: - case INT_LIST: - if (XEXP (x, 1)) - XEXP (x, 1) = cse_process_notes (XEXP (x, 1), NULL_RTX, changed); - return x; - - case SIGN_EXTEND: - case ZERO_EXTEND: - case SUBREG: - { - rtx new_rtx = cse_process_notes (XEXP (x, 0), object, changed); - /* We don't substitute VOIDmode constants into these rtx, - since they would impede folding. */ - if (GET_MODE (new_rtx) != VOIDmode) - validate_change (object, &XEXP (x, 0), new_rtx, 0); - return x; - } - - case UNSIGNED_FLOAT: - { - rtx new_rtx = cse_process_notes (XEXP (x, 0), object, changed); - /* We don't substitute negative VOIDmode constants into these rtx, - since they would impede folding. */ - if (GET_MODE (new_rtx) != VOIDmode - || (CONST_INT_P (new_rtx) && INTVAL (new_rtx) >= 0) - || (CONST_DOUBLE_P (new_rtx) && CONST_DOUBLE_HIGH (new_rtx) >= 0)) - validate_change (object, &XEXP (x, 0), new_rtx, 0); - return x; - } - - case REG: - i = REG_QTY (REGNO (x)); + if (REG_P (x)) + { + int i = REG_QTY (REGNO (x)); /* Return a constant or a constant register. */ if (REGNO_QTY_VALID_P (REGNO (x))) @@ -6309,26 +6261,19 @@ cse_process_notes_1 (rtx x, rtx object, bool *changed) /* Otherwise, canonicalize this register. */ return canon_reg (x, NULL); - - default: - break; } - for (i = 0; i < GET_RTX_LENGTH (code); i++) - if (fmt[i] == 'e') - validate_change (object, &XEXP (x, i), - cse_process_notes (XEXP (x, i), object, changed), 0); - - return x; + return NULL_RTX; } +/* Process X, part of the REG_NOTES of an insn. Replace any registers in it + with either an equivalent constant or the canonical form of the register. + Only replace addresses if the containing MEM remains valid. */ + static rtx -cse_process_notes (rtx x, rtx object, bool *changed) +cse_process_note (rtx x) { - rtx new_rtx = cse_process_notes_1 (x, object, changed); - if (new_rtx != x) - *changed = true; - return new_rtx; + return simplify_replace_fn_rtx (x, NULL_RTX, cse_process_note_1, NULL); } @@ -6623,14 +6568,19 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data) { /* Process notes first so we have all notes in canonical forms when looking for duplicate operations. */ - if (REG_NOTES (insn)) - { - bool changed = false; - REG_NOTES (insn) = cse_process_notes (REG_NOTES (insn), - NULL_RTX, &changed); - if (changed) - df_notes_rescan (insn); - } + bool changed = false; + for (rtx note = REG_NOTES (insn); note; note = XEXP (note, 1)) + if (REG_NOTE_KIND (note) == REG_EQUAL) + { + rtx newval = cse_process_note (XEXP (note, 0)); + if (newval != XEXP (note, 0)) + { + XEXP (note, 0) = newval; + changed = true; + } + } + if (changed) + df_notes_rescan (insn); cse_insn (insn); -- cgit v1.1