aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-01-04 10:49:33 +0000
committerRoger Sayle <roger@nextmovesoftware.com>2024-01-04 10:49:33 +0000
commit3ac58063114cf491891072be6205d32a42c6707d (patch)
tree2b7ed16c12612dff778b37514385466d90d18379 /gcc
parent4831ad982c08a7d926263666c62cd6e52674f885 (diff)
downloadgcc-3ac58063114cf491891072be6205d32a42c6707d.zip
gcc-3ac58063114cf491891072be6205d32a42c6707d.tar.gz
gcc-3ac58063114cf491891072be6205d32a42c6707d.tar.bz2
Improved RTL expansion of field assignments into promoted registers.
This patch fixes PR rtl-optmization/104914 by tweaking/improving the way the fields are written into a pseudo register that needs to be kept sign extended. The motivating example from the bugzilla PR is: extern void ext(int); void foo(const unsigned char *buf) { int val; ((unsigned char*)&val)[0] = *buf++; ((unsigned char*)&val)[1] = *buf++; ((unsigned char*)&val)[2] = *buf++; ((unsigned char*)&val)[3] = *buf++; if(val > 0) ext(1); else ext(0); } which at the end of the tree optimization passes looks like: void foo (const unsigned char * buf) { int val; unsigned char _1; unsigned char _2; unsigned char _3; unsigned char _4; int val.5_5; <bb 2> [local count: 1073741824]: _1 = *buf_7(D); MEM[(unsigned char *)&val] = _1; _2 = MEM[(const unsigned char *)buf_7(D) + 1B]; MEM[(unsigned char *)&val + 1B] = _2; _3 = MEM[(const unsigned char *)buf_7(D) + 2B]; MEM[(unsigned char *)&val + 2B] = _3; _4 = MEM[(const unsigned char *)buf_7(D) + 3B]; MEM[(unsigned char *)&val + 3B] = _4; val.5_5 = val; if (val.5_5 > 0) goto <bb 3>; [59.00%] else goto <bb 4>; [41.00%] <bb 3> [local count: 633507681]: ext (1); goto <bb 5>; [100.00%] <bb 4> [local count: 440234144]: ext (0); <bb 5> [local count: 1073741824]: val ={v} {CLOBBER(eol)}; return; } Here four bytes are being sequentially written into the SImode value val. On some platforms, such as MIPS64, this SImode value is kept in a 64-bit register, suitably sign-extended. The function expand_assignment contains logic to handle this via SUBREG_PROMOTED_VAR_P (around line 6264 in expr.cc) which outputs an explicit extension operation after each store_field (typically insv) to such promoted/extended pseudos. The first observation is that there's no need to perform sign extension after each byte in the example above; the extension is only required after changes to the most significant byte (i.e. to a field that overlaps the most significant bit). The bug fix is actually a bit more subtle, but at this point during code expansion it's not safe to use a SUBREG when sign-extending this field. Currently, GCC generates (sign_extend:DI (subreg:SI (reg:DI) 0)) but combine (and other RTL optimizers) later realize that because SImode values are always sign-extended in their 64-bit hard registers that this is a no-op and eliminates it. The trouble is that it's unsafe to refer to the SImode lowpart of a 64-bit register using SUBREG at those critical points when temporarily the value isn't correctly sign-extended, and the usual backend invariants don't hold. At these critical points, the middle-end needs to use an explicit TRUNCATE rtx (as this isn't a TRULY_NOOP_TRUNCATION), so that the explicit sign-extension looks like (sign_extend:DI (truncate:SI (reg:DI)), which avoids the problem. 2024-01-04 Roger Sayle <roger@nextmovesoftware.com> Jeff Law <jlaw@ventanamicro.com> gcc/ChangeLog PR rtl-optimization/104914 * expr.cc (expand_assignment): When target is SUBREG_PROMOTED_VAR_P a sign or zero extension is only required if the modified field overlaps the SUBREG's most significant bit. On MODE_REP_EXTENDED targets, don't refer to the temporarily incorrectly extended value using a SUBREG, but instead generate an explicit TRUNCATE rtx.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/expr.cc23
1 files changed, 18 insertions, 5 deletions
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 4f42c0f..dc816bc 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -6272,19 +6272,32 @@ expand_assignment (tree to, tree from, bool nontemporal)
&& known_eq (bitpos, 0)
&& known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (to_rtx))))
result = store_expr (from, to_rtx, 0, nontemporal, false);
- else
+ /* Check if the field overlaps the MSB, requiring extension. */
+ else if (maybe_eq (bitpos + bitsize,
+ GET_MODE_BITSIZE (GET_MODE (to_rtx))))
{
- rtx to_rtx1
- = lowpart_subreg (subreg_unpromoted_mode (to_rtx),
- SUBREG_REG (to_rtx),
- subreg_promoted_mode (to_rtx));
+ scalar_int_mode imode = subreg_unpromoted_mode (to_rtx);
+ scalar_int_mode omode = subreg_promoted_mode (to_rtx);
+ rtx to_rtx1 = lowpart_subreg (imode, SUBREG_REG (to_rtx),
+ omode);
result = store_field (to_rtx1, bitsize, bitpos,
bitregion_start, bitregion_end,
mode1, from, get_alias_set (to),
nontemporal, reversep);
+ /* If the target usually keeps IMODE appropriately
+ extended in OMODE it's unsafe to refer to it using
+ a SUBREG whilst this invariant doesn't hold. */
+ if (targetm.mode_rep_extended (imode, omode) != UNKNOWN)
+ to_rtx1 = simplify_gen_unary (TRUNCATE, imode,
+ SUBREG_REG (to_rtx), omode);
convert_move (SUBREG_REG (to_rtx), to_rtx1,
SUBREG_PROMOTED_SIGN (to_rtx));
}
+ else
+ result = store_field (to_rtx, bitsize, bitpos,
+ bitregion_start, bitregion_end,
+ mode1, from, get_alias_set (to),
+ nontemporal, reversep);
}
else
result = store_field (to_rtx, bitsize, bitpos,