diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2022-05-24 15:18:56 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2022-05-24 15:18:56 +0100 |
commit | e8a25550dac458a2afd8d456540e94e060fa2384 (patch) | |
tree | db87c6ca8900ef43e803cf1922166474602b6115 /gcc | |
parent | 793f847ba7dbe7638f1c27178868edbefd3a8108 (diff) | |
download | gcc-e8a25550dac458a2afd8d456540e94e060fa2384.zip gcc-e8a25550dac458a2afd8d456540e94e060fa2384.tar.gz gcc-e8a25550dac458a2afd8d456540e94e060fa2384.tar.bz2 |
Optimize double word negation of zero extended values on x86.
It's not uncommon for GCC to convert between a (zero or one) Boolean
value and a (zero or all ones) mask value, possibly of a wider type,
using negation.
Currently on x86_64, the following simple test case:
__int128 foo(unsigned long x) { return -(__int128)x; }
compiles with -O2 to:
movq %rdi, %rax
xorl %edx, %edx
negq %rax
adcq $0, %rdx
negq %rdx
ret
with this patch, which adds an additional peephole2 to i386.md,
we instead generate the improved:
movq %rdi, %rax
negq %rax
sbbq %rdx, %rdx
ret
[and likewise for the (DImode) long long version using -m32.]
A peephole2 is appropriate as the double word negation and the
operation providing the xor are typically only split after combine.
In fact, the new peephole2 sequence:
;; Convert:
;; xorl %edx, %edx
;; negl %eax
;; adcl $0, %edx
;; negl %edx
;; to:
;; negl %eax
;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
is nearly identical to (and placed immediately after) the existing:
;; Convert:
;; mov %esi, %edx
;; negl %eax
;; adcl $0, %edx
;; negl %edx
;; to:
;; xorl %edx, %edx
;; negl %eax
;; sbbl %esi, %edx
One potential objection/concern is that "sbb? %reg,%reg" may possibly be
incorrectly perceived as a false register dependency on older hardware,
much like "xor? %reg,%reg" may be perceived as a false dependency on
really old hardware. This doesn't currently appear to be a concern
for the i386 backend's *x86_move<mode>cc_0_m1 as shown by the following
test code:
int bar(unsigned int x, unsigned int y) {
return x > y ? -1 : 0;
}
which currently generates a "naked" sbb:
cmp esi, edi
sbb eax, eax
ret
If anyone does potentially encounter a stall, it would easy to add
a splitter or peephole2 controlled by a tuning flag to insert an additional
xor to break the false dependency chain (when not optimizing for size),
but I don't believe this is required on recent microarchitectures.
2022-05-24 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/i386/i386.md (peephole2): Convert xor;neg;adc;neg,
i.e. a double word negation of a zero extended operand, to
neg;sbb.
gcc/testsuite/ChangeLog
* gcc.target/i386/neg-zext-1.c: New test case for -m32.
* gcc.target/i386/neg-zext-2.c: New test case for -m64.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/neg-zext-1.c | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/neg-zext-2.c | 7 |
3 files changed, 54 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 050dee7..b9b8f78 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -11040,6 +11040,46 @@ (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);") +;; Convert: +;; xorl %edx, %edx +;; negl %eax +;; adcl $0, %edx +;; negl %edx +;; to: +;; negl %eax +;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1 + +(define_peephole2 + [(parallel + [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CCC FLAGS_REG) + (ne:CCC (match_operand:SWI48 1 "general_reg_operand") (const_int 0))) + (set (match_dup 1) (neg:SWI48 (match_dup 1)))]) + (parallel + [(set (match_dup 0) + (plus:SWI48 (plus:SWI48 + (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 0)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) + (neg:SWI48 (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[0]) != REGNO (operands[1])" + [(parallel + [(set (reg:CCC FLAGS_REG) + (ne:CCC (match_dup 1) (const_int 0))) + (set (match_dup 1) (neg:SWI48 (match_dup 1)))]) + (parallel + [(set (match_dup 0) + (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)) + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])]) + (define_insn "*neg<mode>_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))) diff --git a/gcc/testsuite/gcc.target/i386/neg-zext-1.c b/gcc/testsuite/gcc.target/i386/neg-zext-1.c new file mode 100644 index 0000000..ec91fb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/neg-zext-1.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2" } */ + +long long foo(unsigned int x) { return -(long long)x; } + +/* { dg-final { scan-assembler "sbb" } } */ +/* { dg-final { scan-assembler-not "adc" } } */ diff --git a/gcc/testsuite/gcc.target/i386/neg-zext-2.c b/gcc/testsuite/gcc.target/i386/neg-zext-2.c new file mode 100644 index 0000000..a6ed077 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/neg-zext-2.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2" } */ + +__int128 fool(unsigned long x) { return -(__int128)x; } + +/* { dg-final { scan-assembler "sbb" } } */ +/* { dg-final { scan-assembler-not "adc" } } */ |