aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2024-05-30 21:27:42 +0200
committerUros Bizjak <ubizjak@gmail.com>2024-05-31 00:03:38 +0200
commite715204f203d318524ae86f3f2a1e8d5d7cb08dc (patch)
tree7e97cfd9aa554cba199490d74b9d20e2f09e2120 /gcc
parent46d931b3dd31cbba7c3355ada63f155aa24a4e2b (diff)
downloadgcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.zip
gcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.tar.gz
gcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.tar.bz2
i386: Rewrite bswaphi2 handling [PR115102]
Introduce *bswaphi2 instruction pattern and enable bswaphi2 expander also for non-movbe targets. The testcase: unsigned short bswap8 (unsigned short val) { return ((val & 0xff00) >> 8) | ((val & 0xff) << 8); } now expands through bswaphi2 named expander. Rewrite bswaphi_lowpart insn pattern as bswaphisi2_lowpart in the RTX form that combine pass can use to simplify: Trying 6, 9, 8 -> 10: 6: r99:SI=bswap(r103:SI) 9: {r107:SI=r103:SI&0xffffffffffff0000;clobber flags:CC;} REG_DEAD r103:SI REG_UNUSED flags:CC 8: {r106:SI=r99:SI 0>>0x10;clobber flags:CC;} REG_DEAD r99:SI REG_UNUSED flags:CC 10: {r104:SI=r106:SI|r107:SI;clobber flags:CC;} REG_DEAD r107:SI REG_DEAD r106:SI REG_UNUSED flags:CC Successfully matched this instruction: (set (reg:SI 104 [ _8 ]) (ior:SI (and:SI (reg/v:SI 103 [ val ]) (const_int -65536 [0xffffffffffff0000])) (lshiftrt:SI (bswap:SI (reg/v:SI 103 [ val ])) (const_int 16 [0x10])))) allowing combination of insns 6, 8, 9 and 10 when compiling the following testcase: unsigned int bswap8 (unsigned int val) { return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8); } to produce: movl %edi, %eax xchgb %ah, %al ret The expansion now always goes through a clobberless form of the bswaphi instruction. The instruction is conditionally converted to a rotate at peephole2 pass. This significantly simplifies bswaphisi2_lowpart insn pattern attributes. PR target/115102 gcc/ChangeLog: * config/i386/i386.md (bswaphi2): Also enable for !TARGET_MOVBE. (*bswaphi2): New insn pattern. (bswaphisi2_lowpart): Rename from bswaphi_lowpart. Rewrite insn RTX to match the expected form of the combine pass. Remove rol{w} alternative and corresponding attributes. (bswsaphisi2_lowpart peephole2): New peephole2 pattern to conditionally convert bswaphisi2_lowpart to rotlhi3_1_slp. (bswapsi2): Update expander for rename. (rotlhi3_1_slp splitter): Conditionally split to bswaphi2. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115102.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md77
-rw-r--r--gcc/testsuite/gcc.target/i386/pr115102.c10
2 files changed, 60 insertions, 27 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c162cd4..375654cf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17210,9 +17210,7 @@
(clobber (reg:CC FLAGS_REG))]
"reload_completed
&& (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
- [(parallel [(set (strict_low_part (match_dup 0))
- (bswap:HI (match_dup 0)))
- (clobber (reg:CC FLAGS_REG))])])
+ [(set (match_dup 0) (bswap:HI (match_dup 0)))])
;; Rotations through carry flag
(define_insn "rcrsi2"
@@ -20730,12 +20728,11 @@
operands[1] = force_reg (SImode, operands[1]);
else
{
- rtx x = operands[0];
+ rtx x = gen_reg_rtx (SImode);
- emit_move_insn (x, operands[1]);
- emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+ emit_insn (gen_bswaphisi2_lowpart (x, operands[1]));
emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
- emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+ emit_insn (gen_bswaphisi2_lowpart (operands[0], x));
DONE;
}
})
@@ -20767,7 +20764,11 @@
(define_expand "bswaphi2"
[(set (match_operand:HI 0 "register_operand")
(bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
- "TARGET_MOVBE")
+ ""
+{
+ if (!TARGET_MOVBE)
+ operands[1] = force_reg (HImode, operands[1]);
+})
(define_insn "*bswaphi2_movbe"
[(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
@@ -20788,33 +20789,55 @@
(set_attr "bdver1_decode" "double,*,*")
(set_attr "mode" "QI,HI,HI")])
+(define_insn "*bswaphi2"
+ [(set (match_operand:HI 0 "register_operand" "=Q")
+ (bswap:HI (match_operand:HI 1 "register_operand" "0")))]
+ "!TARGET_MOVBE"
+ "xchg{b}\t{%h0, %b0|%b0, %h0}"
+ [(set_attr "type" "imov")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "bdver1_decode" "double")
+ (set_attr "mode" "QI")])
+
(define_peephole2
[(set (match_operand:HI 0 "general_reg_operand")
(bswap:HI (match_dup 0)))]
- "TARGET_MOVBE
- && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
+ "!(TARGET_USE_XCHGB ||
+ TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])])
-(define_insn "bswaphi_lowpart"
- [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
- (bswap:HI (match_dup 0)))
- (clobber (reg:CC FLAGS_REG))]
+(define_insn "bswaphisi2_lowpart"
+ [(set (match_operand:SI 0 "register_operand" "=Q")
+ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+ (const_int -65536))
+ (lshiftrt:SI (bswap:SI (match_dup 1))
+ (const_int 16))))]
""
- "@
- xchg{b}\t{%h0, %b0|%b0, %h0}
- rol{w}\t{$8, %0|%0, 8}"
- [(set (attr "preferred_for_size")
- (cond [(eq_attr "alternative" "0")
- (symbol_ref "true")]
- (symbol_ref "false")))
- (set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "0")
- (symbol_ref "TARGET_USE_XCHGB")]
- (symbol_ref "!TARGET_USE_XCHGB")))
- (set_attr "length" "2,4")
- (set_attr "mode" "QI,HI")])
+ "xchg{b}\t{%h0, %b0|%b0, %h0}"
+ [(set_attr "type" "imov")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "bdver1_decode" "double")
+ (set_attr "mode" "QI")])
+
+(define_peephole2
+ [(set (match_operand:SI 0 "general_reg_operand")
+ (ior:SI (and:SI (match_dup 0)
+ (const_int -65536))
+ (lshiftrt:SI (bswap:SI (match_dup 0))
+ (const_int 16))))]
+ "!(TARGET_USE_XCHGB ||
+ TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (rotate:HI (match_dup 0) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (HImode, operands[0]);")
(define_expand "paritydi2"
[(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr115102.c b/gcc/testsuite/gcc.target/i386/pr115102.c
new file mode 100644
index 0000000..e603980
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115102.c
@@ -0,0 +1,10 @@
+/* PR target/115102 */
+/* { dg-do compile } */
+/* { dg-options "-Os -march=x86-64 -dp" } */
+
+unsigned int bswap8 (unsigned int val)
+{
+ return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
+}
+
+/* { dg-final { scan-assembler "bswaphisi2_lowpart" } } */