aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2023-05-22 04:12:15 +0200
committerHans-Peter Nilsson <hp@bitrange.com>2023-06-04 03:13:03 +0200
commit3991b2f623d22dea19c2558852a96e313a521a44 (patch)
treeb0c8f2c3410512aaac9ddae37bdfdb624a247b71
parent8bdfa8a67a54545be1d90c46fb88a676695f25dc (diff)
downloadgcc-3991b2f623d22dea19c2558852a96e313a521a44.zip
gcc-3991b2f623d22dea19c2558852a96e313a521a44.tar.gz
gcc-3991b2f623d22dea19c2558852a96e313a521a44.tar.bz2
reload_cse_move2add: Handle trivial single_set:s
The reload_cse_move2add part of "postreload" handled only insns whose PATTERN was a SET. That excludes insns that e.g. clobber a flags register, which it does only for "simplicity". The patch extends the "simplicity" to most single_set insns. For a subset of those insns there's still an assumption; that the single_set of a PARALLEL insn is the first element in the PARALLEL. If the assumption fails, it's no biggie; the optimization just isn't performed. Don't let the name deceive you, this optimization doesn't hit often, but as often (or as rarely) for LRA as for reload at least on e.g. cris-elf where the biggest effect was seen in reducing repeated addresses in copies from fixed-address arrays, like in gcc.c-torture/compile/pr78694.c. * postreload.cc (move2add_use_add2_insn): Handle trivial single_sets. Rename variable PAT to SET. (move2add_use_add3_insn, reload_cse_move2add): Similar.
-rw-r--r--gcc/postreload.cc65
1 files changed, 36 insertions, 29 deletions
diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index fb39265..b479d4b 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -1744,8 +1744,8 @@ static bool
move2add_use_add2_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
rtx_insn *insn)
{
- rtx pat = PATTERN (insn);
- rtx src = SET_SRC (pat);
+ rtx set = single_set (insn);
+ rtx src = SET_SRC (set);
int regno = REGNO (reg);
rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[regno], mode);
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
@@ -1764,21 +1764,21 @@ move2add_use_add2_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
(reg)), would be discarded. Maybe we should
try a truncMN pattern? */
if (INTVAL (off) == reg_offset [regno])
- changed = validate_change (insn, &SET_SRC (pat), reg, 0);
+ changed = validate_change (insn, &SET_SRC (set), reg, 0);
}
else
{
struct full_rtx_costs oldcst, newcst;
rtx tem = gen_rtx_PLUS (mode, reg, new_src);
- get_full_set_rtx_cost (pat, &oldcst);
- SET_SRC (pat) = tem;
- get_full_set_rtx_cost (pat, &newcst);
- SET_SRC (pat) = src;
+ get_full_set_rtx_cost (set, &oldcst);
+ SET_SRC (set) = tem;
+ get_full_set_rtx_cost (set, &newcst);
+ SET_SRC (set) = src;
if (costs_lt_p (&newcst, &oldcst, speed)
&& have_add2_insn (reg, new_src))
- changed = validate_change (insn, &SET_SRC (pat), tem, 0);
+ changed = validate_change (insn, &SET_SRC (set), tem, 0);
else if (sym == NULL_RTX && mode != BImode)
{
scalar_int_mode narrow_mode;
@@ -1796,10 +1796,15 @@ move2add_use_add2_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
narrow_reg),
narrow_src);
get_full_set_rtx_cost (new_set, &newcst);
- if (costs_lt_p (&newcst, &oldcst, speed))
+
+ /* We perform this replacement only if NEXT is either a
+ naked SET, or else its single_set is the first element
+ in a PARALLEL. */
+ rtx *setloc = GET_CODE (PATTERN (insn)) == PARALLEL
+ ? &XEXP (PATTERN (insn), 0) : &PATTERN (insn);
+ if (*setloc == set && costs_lt_p (&newcst, &oldcst, speed))
{
- changed = validate_change (insn, &PATTERN (insn),
- new_set, 0);
+ changed = validate_change (insn, setloc, new_set, 0);
if (changed)
break;
}
@@ -1825,8 +1830,8 @@ static bool
move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
rtx_insn *insn)
{
- rtx pat = PATTERN (insn);
- rtx src = SET_SRC (pat);
+ rtx set = single_set (insn);
+ rtx src = SET_SRC (set);
int regno = REGNO (reg);
int min_regno = 0;
bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
@@ -1836,10 +1841,10 @@ move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
rtx plus_expr;
init_costs_to_max (&mincst);
- get_full_set_rtx_cost (pat, &oldcst);
+ get_full_set_rtx_cost (set, &oldcst);
plus_expr = gen_rtx_PLUS (GET_MODE (reg), reg, const0_rtx);
- SET_SRC (pat) = plus_expr;
+ SET_SRC (set) = plus_expr;
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (move2add_valid_value_p (i, mode)
@@ -1864,7 +1869,7 @@ move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
else
{
XEXP (plus_expr, 1) = new_src;
- get_full_set_rtx_cost (pat, &newcst);
+ get_full_set_rtx_cost (set, &newcst);
if (costs_lt_p (&newcst, &mincst, speed))
{
@@ -1873,7 +1878,7 @@ move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
}
}
}
- SET_SRC (pat) = src;
+ SET_SRC (set) = src;
if (costs_lt_p (&mincst, &oldcst, speed))
{
@@ -1886,7 +1891,7 @@ move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
GET_MODE (reg));
tem = gen_rtx_PLUS (GET_MODE (reg), tem, new_src);
}
- if (validate_change (insn, &SET_SRC (pat), tem, 0))
+ if (validate_change (insn, &SET_SRC (set), tem, 0))
changed = true;
}
reg_set_luid[regno] = move2add_luid;
@@ -1916,7 +1921,7 @@ reload_cse_move2add (rtx_insn *first)
move2add_luid = 2;
for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
{
- rtx pat, note;
+ rtx set, note;
if (LABEL_P (insn))
{
@@ -1929,17 +1934,17 @@ reload_cse_move2add (rtx_insn *first)
}
if (! INSN_P (insn))
continue;
- pat = PATTERN (insn);
+ set = single_set (insn);
/* For simplicity, we only perform this optimization on
- straightforward SETs. */
+ single-sets. */
scalar_int_mode mode;
- if (GET_CODE (pat) == SET
- && REG_P (SET_DEST (pat))
- && is_a <scalar_int_mode> (GET_MODE (SET_DEST (pat)), &mode))
+ if (set
+ && REG_P (SET_DEST (set))
+ && is_a <scalar_int_mode> (GET_MODE (SET_DEST (set)), &mode))
{
- rtx reg = SET_DEST (pat);
+ rtx reg = SET_DEST (set);
int regno = REGNO (reg);
- rtx src = SET_SRC (pat);
+ rtx src = SET_SRC (set);
/* Check if we have valid information on the contents of this
register in the mode of REG. */
@@ -2021,13 +2026,15 @@ reload_cse_move2add (rtx_insn *first)
SET_SRC (set) = old_src;
costs_add_n_insns (&oldcst, 1);
- if (costs_lt_p (&newcst, &oldcst, speed)
+ rtx *setloc = GET_CODE (PATTERN (next)) == PARALLEL
+ ? &XEXP (PATTERN (next), 0) : &PATTERN (next);
+ if (*setloc == set
+ && costs_lt_p (&newcst, &oldcst, speed)
&& have_add2_insn (reg, new_src))
{
rtx newpat = gen_rtx_SET (reg, tem);
success
- = validate_change (next, &PATTERN (next),
- newpat, 0);
+ = validate_change (next, setloc, newpat, 0);
}
}
if (success)