diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-11-13 09:16:59 +0000 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-11-13 09:16:59 +0000 |
commit | b51bfee1beed03872ea0289cb47dd2336d9f528c (patch) | |
tree | b244944edde12e78e4dc3a8782036a2279a11ec4 /gcc/config/arc/arc.md | |
parent | e9d59a2a5a8e8ce667847372cc480215fa862aa4 (diff) | |
download | gcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.zip gcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.tar.gz gcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.tar.bz2 |
ARC: Improved DImode rotates and right shifts by one bit.
This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit. In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon the x86's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html
The benefits can be seen from the four new test cases:
long long ashr(long long x) { return x >> 1; }
Before:
ashr: asl r2,r1,31
lsr_s r0,r0
or_s r0,r0,r2
j_s.d [blink]
asr_s r1,r1,1
After:
ashr: asr.f r1,r1
j_s.d [blink]
rrc r0,r0
unsigned long long lshr(unsigned long long x) { return x >> 1; }
Before:
lshr: asl r2,r1,31
lsr_s r0,r0
or_s r0,r0,r2
j_s.d [blink]
lsr_s r1,r1
After:
lshr: lsr.f r1,r1
j_s.d [blink]
rrc r0,r0
unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }
Before:
rotl: lsr r12,r1,31
lsr r2,r0,31
asl_s r3,r0,1
asl_s r1,r1,1
or r0,r12,r3
j_s.d [blink]
or_s r1,r1,r2
After:
rotl: add.f r0,r0,r0
adc.f r1,r1,r1
j_s.d [blink]
add.cs r0,r0,1
unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }
Before:
rotr: asl r12,r1,31
asl r2,r0,31
lsr_s r3,r0
lsr_s r1,r1
or r0,r12,r3
j_s.d [blink]
or_s r1,r1,r2
After:
rotr: asr.f 0,r0
rrc.f r1,r1
j_s.d [blink]
rrc r0,r0
On CPUs without a barrel shifter the improvements are even better.
2023-11-13 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
represents the carry flag being set if the operand is non-zero.
(adc_f): New define_insn representing adc with updated flags.
(ashrdi3): New define_expand that only handles shifts by 1.
(ashrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrdi3): New define_expand that only handles shifts by 1.
(lshrdi3_cnt1): New pre-reload define_insn_and_split.
(rrcsi2): New define_insn for rrc (SImode rotate right through carry).
(rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
(rotldi3): New define_expand that only handles rotates by 1.
(rotldi3_cnt1): New pre-reload define_insn_and_split.
(rotrdi3): New define_expand that only handles rotates by 1.
(rotrdi3_cnt1): New pre-reload define_insn_and_split.
(lshrsi3_cnt1_carry): New define_insn for lsr.f.
(ashrsi3_cnt1_carry): New define_insn for asr.f.
(btst_0_carry): New define_insn for asr.f without result.
gcc/testsuite/ChangeLog
* gcc.target/arc/ashrdi3-1.c: New test case.
* gcc.target/arc/lshrdi3-1.c: Likewise.
* gcc.target/arc/rotldi3-1.c: Likewise.
* gcc.target/arc/rotrdi3-1.c: Likewise.
Diffstat (limited to 'gcc/config/arc/arc.md')
-rw-r--r-- | gcc/config/arc/arc.md | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 8c121cd..4ae3a67 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -136,6 +136,7 @@ UNSPEC_ARC_VMAC2HU UNSPEC_ARC_VMPY2H UNSPEC_ARC_VMPY2HU + UNSPEC_ARC_CC_NEZ VUNSPEC_ARC_RTIE VUNSPEC_ARC_SYNC @@ -2789,6 +2790,31 @@ archs4x, archs4xd" (set_attr "type" "cc_arith") (set_attr "length" "4,4,4,4,8,8")]) +(define_insn "adc_f" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (zero_extend:DI + (plus:SI + (plus:SI + (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_operand:SI 1 "register_operand" "%r")) + (match_operand:SI 2 "register_operand" "r"))) + (plus:DI + (ltu:DI (reg:CC_C CC_REG) (const_int 0)) + (zero_extend:DI (match_dup 1))))) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (plus:SI + (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_dup 1)) + (match_dup 2)))] + "" + "adc.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "predicable" "no") + (set_attr "type" "cc_arith") + (set_attr "length" "4")]) + ; combiner-splitter cmp / scc -> cmp / adc (define_split [(set (match_operand:SI 0 "dest_reg_operand" "") @@ -3529,6 +3555,68 @@ archs4x, archs4xd" "" [(set_attr "length" "8")]) +(define_expand "ashrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (ashiftrt:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; Split into asr.f hi; rrc lo +(define_insn_and_split "*ashrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (ashiftrt:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + DONE; +} + [(set_attr "length" "8")]) + +(define_expand "lshrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; Split into lsr.f hi; rrc lo +(define_insn_and_split "*lshrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + DONE; +} + [(set_attr "length" "8")]) + ;; Rotate instructions. (define_insn "rotrsi3_insn" @@ -3570,6 +3658,103 @@ archs4x, archs4xd" } }) +;; Rotate through carry flag + +(define_insn "rrcsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") + (plus:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1)) + (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (const_int 31))))] + "" + "rrc\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "predicable" "no") + (set_attr "length" "4")]) + +(define_insn "rrcsi2_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (plus:SI + (lshiftrt:SI (match_dup 1) (const_int 1)) + (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (const_int 31))))] + "" + "rrc.f\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "predicable" "no") + (set_attr "length" "4")]) + +;; DImode Rotate instructions + +(define_expand "rotldi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (rotate:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; split into add.f lo; adc.f hi; adc lo +(define_insn_and_split "*rotldi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (rotate:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx lo0 = gen_lowpart (SImode, operands[0]); + rtx lo1 = gen_lowpart (SImode, operands[1]); + rtx hi1 = gen_highpart (SImode, operands[1]); + emit_insn (gen_add_f (lo0, lo1, lo1)); + emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1)); + emit_insn (gen_adc (lo0, lo0, const0_rtx)); + DONE; +} + [(set_attr "length" "12")]) + +(define_expand "rotrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (rotatert:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; split into asr.f lo; rrc.f hi; rrc lo +(define_insn_and_split "*rotrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (rotatert:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx lo = gen_lowpart (SImode, operands[1]); + emit_insn (gen_btst_0_carry (lo)); + emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo)); + DONE; +} + [(set_attr "length" "12")]) + ;; Compare / branch instructions. (define_expand "cbranchsi4" @@ -6009,6 +6194,18 @@ archs4x, archs4xd" (set_attr "iscompact" "maybe,false") (set_attr "predicable" "no,no")]) +(define_insn "lshrsi3_cnt1_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (lshiftrt:SI (match_dup 1) (const_int 1)))] + "" + "lsr.f\\t%0,%1" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + (define_insn "ashrsi3_cnt1" [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c") @@ -6019,6 +6216,28 @@ archs4x, archs4xd" (set_attr "iscompact" "maybe,false") (set_attr "predicable" "no,no")]) +(define_insn "ashrsi3_cnt1_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (ashiftrt:SI (match_dup 1) (const_int 1)))] + "" + "asr.f\\t%0,%1" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + +(define_insn "btst_0_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ))] + "" + "asr.f\\t0,%0" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (zero_extract:SI (match_dup 0) |