diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2024-05-30 21:27:42 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2024-05-31 00:03:38 +0200 |
commit | e715204f203d318524ae86f3f2a1e8d5d7cb08dc (patch) | |
tree | 7e97cfd9aa554cba199490d74b9d20e2f09e2120 /gcc | |
parent | 46d931b3dd31cbba7c3355ada63f155aa24a4e2b (diff) | |
download | gcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.zip gcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.tar.gz gcc-e715204f203d318524ae86f3f2a1e8d5d7cb08dc.tar.bz2 |
i386: Rewrite bswaphi2 handling [PR115102]
Introduce *bswaphi2 instruction pattern and enable bswaphi2 expander
also for non-movbe targets. The testcase:
unsigned short bswap8 (unsigned short val)
{
return ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}
now expands through bswaphi2 named expander.
Rewrite bswaphi_lowpart insn pattern as bswaphisi2_lowpart in the RTX form
that combine pass can use to simplify:
Trying 6, 9, 8 -> 10:
6: r99:SI=bswap(r103:SI)
9: {r107:SI=r103:SI&0xffffffffffff0000;clobber flags:CC;}
REG_DEAD r103:SI
REG_UNUSED flags:CC
8: {r106:SI=r99:SI 0>>0x10;clobber flags:CC;}
REG_DEAD r99:SI
REG_UNUSED flags:CC
10: {r104:SI=r106:SI|r107:SI;clobber flags:CC;}
REG_DEAD r107:SI
REG_DEAD r106:SI
REG_UNUSED flags:CC
Successfully matched this instruction:
(set (reg:SI 104 [ _8 ])
(ior:SI (and:SI (reg/v:SI 103 [ val ])
(const_int -65536 [0xffffffffffff0000]))
(lshiftrt:SI (bswap:SI (reg/v:SI 103 [ val ]))
(const_int 16 [0x10]))))
allowing combination of insns 6, 8, 9 and 10
when compiling the following testcase:
unsigned int bswap8 (unsigned int val)
{
return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8);
}
to produce:
movl %edi, %eax
xchgb %ah, %al
ret
The expansion now always goes through a clobberless form of the bswaphi
instruction. The instruction is conditionally converted to a rotate at
peephole2 pass. This significantly simplifies bswaphisi2_lowpart
insn pattern attributes.
PR target/115102
gcc/ChangeLog:
* config/i386/i386.md (bswaphi2): Also enable for !TARGET_MOVBE.
(*bswaphi2): New insn pattern.
(bswaphisi2_lowpart): Rename from bswaphi_lowpart. Rewrite
insn RTX to match the expected form of the combine pass.
Remove rol{w} alternative and corresponding attributes.
(bswsaphisi2_lowpart peephole2): New peephole2 pattern to
conditionally convert bswaphisi2_lowpart to rotlhi3_1_slp.
(bswapsi2): Update expander for rename.
(rotlhi3_1_slp splitter): Conditionally split to bswaphi2.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr115102.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 77 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr115102.c | 10 |
2 files changed, 60 insertions, 27 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c162cd4..375654cf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17210,9 +17210,7 @@ (clobber (reg:CC FLAGS_REG))] "reload_completed && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))" - [(parallel [(set (strict_low_part (match_dup 0)) - (bswap:HI (match_dup 0))) - (clobber (reg:CC FLAGS_REG))])]) + [(set (match_dup 0) (bswap:HI (match_dup 0)))]) ;; Rotations through carry flag (define_insn "rcrsi2" @@ -20730,12 +20728,11 @@ operands[1] = force_reg (SImode, operands[1]); else { - rtx x = operands[0]; + rtx x = gen_reg_rtx (SImode); - emit_move_insn (x, operands[1]); - emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_bswaphisi2_lowpart (x, operands[1])); emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); - emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_bswaphisi2_lowpart (operands[0], x)); DONE; } }) @@ -20767,7 +20764,11 @@ (define_expand "bswaphi2" [(set (match_operand:HI 0 "register_operand") (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))] - "TARGET_MOVBE") + "" +{ + if (!TARGET_MOVBE) + operands[1] = force_reg (HImode, operands[1]); +}) (define_insn "*bswaphi2_movbe" [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m") @@ -20788,33 +20789,55 @@ (set_attr "bdver1_decode" "double,*,*") (set_attr "mode" "QI,HI,HI")]) +(define_insn "*bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=Q") + (bswap:HI (match_operand:HI 1 "register_operand" "0")))] + "!TARGET_MOVBE" + "xchg{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double") + (set_attr "mode" "QI")]) + (define_peephole2 [(set (match_operand:HI 0 "general_reg_operand") (bswap:HI (match_dup 0)))] - "TARGET_MOVBE - && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)) + "!(TARGET_USE_XCHGB || + TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8))) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "bswaphi_lowpart" - [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) - (bswap:HI (match_dup 0))) - (clobber (reg:CC FLAGS_REG))] +(define_insn "bswaphisi2_lowpart" + [(set (match_operand:SI 0 "register_operand" "=Q") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") + (const_int -65536)) + (lshiftrt:SI (bswap:SI (match_dup 1)) + (const_int 16))))] "" - "@ - xchg{b}\t{%h0, %b0|%b0, %h0} - rol{w}\t{$8, %0|%0, 8}" - [(set (attr "preferred_for_size") - (cond [(eq_attr "alternative" "0") - (symbol_ref "true")] - (symbol_ref "false"))) - (set (attr "preferred_for_speed") - (cond [(eq_attr "alternative" "0") - (symbol_ref "TARGET_USE_XCHGB")] - (symbol_ref "!TARGET_USE_XCHGB"))) - (set_attr "length" "2,4") - (set_attr "mode" "QI,HI")]) + "xchg{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double") + (set_attr "mode" "QI")]) + +(define_peephole2 + [(set (match_operand:SI 0 "general_reg_operand") + (ior:SI (and:SI (match_dup 0) + (const_int -65536)) + (lshiftrt:SI (bswap:SI (match_dup 0)) + (const_int 16))))] + "!(TARGET_USE_XCHGB || + TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) + (rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (HImode, operands[0]);") (define_expand "paritydi2" [(set (match_operand:DI 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/i386/pr115102.c b/gcc/testsuite/gcc.target/i386/pr115102.c new file mode 100644 index 0000000..e603980 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115102.c @@ -0,0 +1,10 @@ +/* PR target/115102 */ +/* { dg-do compile } */ +/* { dg-options "-Os -march=x86-64 -dp" } */ + +unsigned int bswap8 (unsigned int val) +{ + return (val & 0xffff0000) | ((val & 0xff00) >> 8) | ((val & 0xff) << 8); +} + +/* { dg-final { scan-assembler "bswaphisi2_lowpart" } } */ |