aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-07-04 07:31:17 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2024-07-04 07:31:17 +0100
commit727f8b142b7d5442af6c2e903293abc367a8de5f (patch)
treeac6fa39036ed9b63f86ab5dfaf0b51fcc35869e1 /gcc
parent759f4abe1220a8202b8389f9b756c35b6c9c439d (diff)
downloadgcc-727f8b142b7d5442af6c2e903293abc367a8de5f.zip
gcc-727f8b142b7d5442af6c2e903293abc367a8de5f.tar.gz
gcc-727f8b142b7d5442af6c2e903293abc367a8de5f.tar.bz2
i386: Add additional variant of bswaphisi2_lowpart peephole2.
This patch adds an additional variation of the peephole2 used to convert bswaphisi2_lowpart into rotlhi3_1_slp, which converts xchgb %ah,%al into rotw if the flags register isn't live. The motivating example is: void ext(int x); void foo(int x) { ext((x&~0xffff)|((x>>8)&0xff)|((x&0xff)<<8)); } where GCC with -O2 currently produces: foo: movl %edi, %eax rolw $8, %ax movl %eax, %edi jmp ext The issue is that the original xchgb (bswaphisi2_lowpart) can only be performed in "Q" registers that allow the %?h register to be used, so reload generates the above two movl. However, it's later in peephole2 where we see that CC_FLAGS can be clobbered, so we can use a rotate word, which is more forgiving with register allocations. With the additional peephole2 proposed here, we now generate: foo: rolw $8, %di jmp ext 2024-07-04 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386.md (bswaphisi2_lowpart peephole2): New peephole2 variant to eliminate register shuffling. gcc/testsuite/ChangeLog * gcc.target/i386/xchg-4.c: New test case.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md24
-rw-r--r--gcc/testsuite/gcc.target/i386/xchg-4.c11
2 files changed, 35 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4a44b69..b24c4fe 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -21489,6 +21489,30 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[0] = gen_lowpart (HImode, operands[0]);")
+;; Variant of above peephole2 to improve register allocation.
+(define_peephole2
+ [(set (match_operand:SI 0 "general_reg_operand")
+ (match_operand:SI 1 "register_operand"))
+ (set (match_dup 0)
+ (ior:SI (and:SI (match_dup 0)
+ (const_int -65536))
+ (lshiftrt:SI (bswap:SI (match_dup 0))
+ (const_int 16))))
+ (set (match_operand:SI 2 "general_reg_operand") (match_dup 0))]
+ "!(TARGET_USE_XCHGB ||
+ TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && peep2_regno_dead_p (0, FLAGS_REG)
+ && peep2_reg_dead_p(3, operands[0])"
+ [(parallel
+ [(set (strict_low_part (match_dup 3))
+ (rotate:HI (match_dup 3) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[1], operands[2]))
+ emit_move_insn (operands[2], operands[1]);
+ operands[3] = gen_lowpart (HImode, operands[2]);
+})
+
(define_expand "paritydi2"
[(set (match_operand:DI 0 "register_operand")
(parity:DI (match_operand:DI 1 "register_operand")))]
diff --git a/gcc/testsuite/gcc.target/i386/xchg-4.c b/gcc/testsuite/gcc.target/i386/xchg-4.c
new file mode 100644
index 0000000..de099e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/xchg-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void ext(int x);
+void foo(int x)
+{
+ ext((x&~0xffff)|((x>>8)&0xff)|((x&0xff)<<8));
+}
+
+/* { dg-final { scan-assembler "rolw" } } */
+/* { dg-final { scan-assembler-not "mov" } } */