aboutsummaryrefslogtreecommitdiff
path: root/gcc/combine.cc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2023-10-04 17:11:23 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2023-10-04 17:11:23 +0100
commit263369b2f7f726a3d4b269678d2c13a9d06a041e (patch)
treefe23117341406e09d3501e1779a1926a656ca12c /gcc/combine.cc
parentd342c9de6a1534cbce324bcc3c7c0898ff74d386 (diff)
downloadgcc-263369b2f7f726a3d4b269678d2c13a9d06a041e.zip
gcc-263369b2f7f726a3d4b269678d2c13a9d06a041e.tar.gz
gcc-263369b2f7f726a3d4b269678d2c13a9d06a041e.tar.bz2
PR rtl-optimization/110701: Fix SUBREG SET_DEST handling in combine.
This patch is my proposed fix to PR rtl-optimization 110701, a latent bug in combine's record_dead_and_set_regs_1 exposed by recent improvements to simplify_subreg. The issue involves the handling of (normal) SUBREG SET_DESTs as in the instruction: (set (subreg:HI (reg:SI x) 0) (expr:HI y)) The semantics of this are that the bits specified by the SUBREG are set to the SET_SRC, y, and that the other bits of the SET_DEST are left/become undefined. To simplify explanation, we'll only consider lowpart SUBREGs (though in theory non-lowpart SUBREGS could be handled), and the fact that bits outside of the lowpart WORD retain their original values (treating these as undefined is a missed optimization rather than incorrect code bug, that only affects targets with less than 64-bit words). The bug is that combine simulates the behaviour of the above instruction, for calculating nonzero_bits and set_sign_bit_copies, in the function record_value_for_reg, by using the equivalent of: (set (reg:SI x) (subreg:SI (expr:HI y)) by calling gen_lowpart on the SET_SRC. Alas, the semantics of this revised instruction aren't always equivalent to the original. In the test case for PR110701, the original instruction (set (subreg:HI (reg:SI x), 0) (and:HI (subreg:HI (reg:SI y) 0) (const_int 340))) which (by definition) leaves the top bits of x undefined, is mistakenly considered to be equivalent to (set (reg:SI x) (and:SI (reg:SI y) (const_int 340))) where gen_lowpart's freedom to do anything with paradoxical SUBREG bits, has now cleared the high bits. The same bug also triggers when the SET_SRC is say (subreg:HI (reg:DI z)), where gen_lowpart transforms this into (subreg:SI (reg:DI z)) which defines bits 16-31 to be the same as bits 16-31 of z. The fix is that after calling record_value_for_reg, we need to mark the bits that should be undefined as undefined, in case gen_lowpart, which performs transforms appropriate for r-values, has changed the interpretation of the SUBREG when used as an l-value. 2023-10-04 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR rtl-optimization/110701 * combine.cc (record_dead_and_set_regs_1): Split comment into pieces placed before the relevant clauses. When the SET_DEST is a partial_subreg_p, mark the bits outside of the updated portion of the destination as undefined. gcc/testsuite/ChangeLog PR rtl-optimization/110701 * gcc.target/i386/pr110701.c: New test case.
Diffstat (limited to 'gcc/combine.cc')
-rw-r--r--gcc/combine.cc42
1 files changed, 29 insertions, 13 deletions
diff --git a/gcc/combine.cc b/gcc/combine.cc
index 468b7fd..360aa2f 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -13411,27 +13411,43 @@ record_dead_and_set_regs_1 (rtx dest, const_rtx setter, void *data)
if (REG_P (dest))
{
- /* If we are setting the whole register, we know its value. Otherwise
- show that we don't know the value. We can handle a SUBREG if it's
- the low part, but we must be careful with paradoxical SUBREGs on
- RISC architectures because we cannot strip e.g. an extension around
- a load and record the naked load since the RTL middle-end considers
- that the upper bits are defined according to LOAD_EXTEND_OP. */
+ /* If we are setting the whole register, we know its value. */
if (GET_CODE (setter) == SET && dest == SET_DEST (setter))
record_value_for_reg (dest, record_dead_insn, SET_SRC (setter));
+ /* We can handle a SUBREG if it's the low part, but we must be
+ careful with paradoxical SUBREGs on RISC architectures because
+ we cannot strip e.g. an extension around a load and record the
+ naked load since the RTL middle-end considers that the upper bits
+ are defined according to LOAD_EXTEND_OP. */
else if (GET_CODE (setter) == SET
&& GET_CODE (SET_DEST (setter)) == SUBREG
&& SUBREG_REG (SET_DEST (setter)) == dest
&& known_le (GET_MODE_PRECISION (GET_MODE (dest)),
BITS_PER_WORD)
&& subreg_lowpart_p (SET_DEST (setter)))
- record_value_for_reg (dest, record_dead_insn,
- WORD_REGISTER_OPERATIONS
- && word_register_operation_p (SET_SRC (setter))
- && paradoxical_subreg_p (SET_DEST (setter))
- ? SET_SRC (setter)
- : gen_lowpart (GET_MODE (dest),
- SET_SRC (setter)));
+ {
+ if (WORD_REGISTER_OPERATIONS
+ && word_register_operation_p (SET_SRC (setter))
+ && paradoxical_subreg_p (SET_DEST (setter)))
+ record_value_for_reg (dest, record_dead_insn, SET_SRC (setter));
+ else if (!partial_subreg_p (SET_DEST (setter)))
+ record_value_for_reg (dest, record_dead_insn,
+ gen_lowpart (GET_MODE (dest),
+ SET_SRC (setter)));
+ else
+ {
+ record_value_for_reg (dest, record_dead_insn,
+ gen_lowpart (GET_MODE (dest),
+ SET_SRC (setter)));
+
+ unsigned HOST_WIDE_INT mask;
+ reg_stat_type *rsp = &reg_stat[REGNO (dest)];
+ mask = GET_MODE_MASK (GET_MODE (SET_DEST (setter)));
+ rsp->last_set_nonzero_bits |= ~mask;
+ rsp->last_set_sign_bit_copies = 1;
+ }
+ }
+ /* Otherwise show that we don't know the value. */
else
record_value_for_reg (dest, record_dead_insn, NULL_RTX);
}