diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-12 14:12:34 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-12 14:12:34 +0100 |
commit | d2c18b4a16f9e1a6ed271ec1efaf94533d1c4a94 (patch) | |
tree | 1742aa675fa78ff1cfb56bb6f159d8912a1e365a /gcc | |
parent | 46ade8c9cc860170ab4253cffd24169efa46ca70 (diff) | |
download | gcc-d2c18b4a16f9e1a6ed271ec1efaf94533d1c4a94.zip gcc-d2c18b4a16f9e1a6ed271ec1efaf94533d1c4a94.tar.gz gcc-d2c18b4a16f9e1a6ed271ec1efaf94533d1c4a94.tar.bz2 |
PR target/110598: Fix rega = 0; rega ^= rega regression in i386.md
This patch fixes the regression PR target/110598 caused by my recent
addition of a peephole2. The intention of that optimization was to
simplify zeroing a register, followed by an IOR, XOR or PLUS operation
on it into a move, or as described in the comment:
;; Peephole2 rega = 0; rega op= regb into rega = regb.
The issue is that I'd failed to consider the (rare and unusual) case,
where regb is rega, where the transformation leads to the incorrect
"rega = rega", when it should be "rega = 0". The minimal fix is to
add a !reg_mentioned_p check to the recent peephole2.
In addition to resolving the regression, I've added a second peephole2
to optimize the problematic case above, which contains a false
dependency and is therefore tricky to optimize elsewhere. This is an
improvement over GCC 13, for example, that generates the redundant:
xorl %edx, %edx
xorq %rdx, %rdx
2023-07-12 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
PR target/110598
* config/i386/i386.md (peephole2): Check !reg_mentioned_p when
optimizing rega = 0; rega op= regb for op in [XOR,IOR,PLUS].
(peephole2): Simplify rega = 0; rega op= rega cases.
gcc/testsuite/ChangeLog
PR target/110598
* gcc.target/i386/pr110598.c: New test case.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr110598.c | 46 |
2 files changed, 60 insertions, 2 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ef96834..d4a948d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12325,9 +12325,21 @@ (any_or_plus:SWI (match_dup 0) (match_operand:SWI 1 "<general_operand>"))) (clobber (reg:CC FLAGS_REG))])] - "" + "!reg_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (match_dup 1))]) - + +;; Peephole2 dead instruction in rega = 0; rega op= rega. +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "general_reg_operand") + (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (any_or_plus:SWI (match_dup 0) (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + ;; Split DST = (HI<<32)|LO early to minimize register usage. (define_insn_and_split "*concat<mode><dwi>3_1" [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") diff --git a/gcc/testsuite/gcc.target/i386/pr110598.c b/gcc/testsuite/gcc.target/i386/pr110598.c new file mode 100644 index 0000000..1c88031 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110598.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +typedef unsigned long long u64; + +#define MAX_SUBTARGET_WORDS 4 + +int notequal(const void *a, const void *b) +{ + return __builtin_memcmp(a,b,MAX_SUBTARGET_WORDS*sizeof(u64)) != 0; +} +typedef struct FeatureBitset { + u64 Bits[MAX_SUBTARGET_WORDS]; +}FeatureBitset; + +__attribute__((noipa)) +_Bool is_eq_buggy (const FeatureBitset * lf, const FeatureBitset * rf) { + u64 Bits_l[MAX_SUBTARGET_WORDS]; + Bits_l[0] = lf->Bits[0]&1; + Bits_l[1] = 0; + Bits_l[2] = 0; + Bits_l[3] = 0; + u64 Bits_r[MAX_SUBTARGET_WORDS]; + Bits_r[0] = rf->Bits[0]&1; + Bits_r[1] = 0; + Bits_r[2] = 0; + Bits_r[3] = 0; + return !notequal(Bits_l, Bits_r); +} + +__attribute__((noipa)) +void bug(void) { + FeatureBitset lf, rf; + lf.Bits[0] = rf.Bits[0] = 1; + lf.Bits[1] = rf.Bits[1] = 1; + lf.Bits[2] = rf.Bits[2] = 1; + lf.Bits[3] = rf.Bits[3] = 1; + + _Bool r = is_eq_buggy (&lf, &rf); + if (!r) __builtin_trap(); +} + +__attribute__((noipa)) +int main(void) { + bug(); +} |