aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2023-11-13 09:16:59 +0000
committerRoger Sayle <roger@nextmovesoftware.com>2023-11-13 09:16:59 +0000
commitb51bfee1beed03872ea0289cb47dd2336d9f528c (patch)
treeb244944edde12e78e4dc3a8782036a2279a11ec4
parente9d59a2a5a8e8ce667847372cc480215fa862aa4 (diff)
downloadgcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.zip
gcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.tar.gz
gcc-b51bfee1beed03872ea0289cb47dd2336d9f528c.tar.bz2
ARC: Improved DImode rotates and right shifts by one bit.
This patch improves the code generated for DImode right shifts (both arithmetic and logical) by a single bit, and also for DImode rotates (both left and right) by a single bit. In approach, this is similar to the recently added DImode left shift by a single bit patch, but also builds upon the x86's UNSPEC carry flag representation: https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html The benefits can be seen from the four new test cases: long long ashr(long long x) { return x >> 1; } Before: ashr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] asr_s r1,r1,1 After: ashr: asr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long lshr(unsigned long long x) { return x >> 1; } Before: lshr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] lsr_s r1,r1 After: lshr: lsr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); } Before: rotl: lsr r12,r1,31 lsr r2,r0,31 asl_s r3,r0,1 asl_s r1,r1,1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotl: add.f r0,r0,r0 adc.f r1,r1,r1 j_s.d [blink] add.cs r0,r0,1 unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); } Before: rotr: asl r12,r1,31 asl r2,r0,31 lsr_s r3,r0 lsr_s r1,r1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotr: asr.f 0,r0 rrc.f r1,r1 j_s.d [blink] rrc r0,r0 On CPUs without a barrel shifter the improvements are even better. 2023-11-13 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that represents the carry flag being set if the operand is non-zero. (adc_f): New define_insn representing adc with updated flags. (ashrdi3): New define_expand that only handles shifts by 1. (ashrdi3_cnt1): New pre-reload define_insn_and_split. (lshrdi3): New define_expand that only handles shifts by 1. (lshrdi3_cnt1): New pre-reload define_insn_and_split. (rrcsi2): New define_insn for rrc (SImode rotate right through carry). (rrcsi2_carry): Likewise for rrc.f, as above but updating flags. (rotldi3): New define_expand that only handles rotates by 1. (rotldi3_cnt1): New pre-reload define_insn_and_split. (rotrdi3): New define_expand that only handles rotates by 1. (rotrdi3_cnt1): New pre-reload define_insn_and_split. (lshrsi3_cnt1_carry): New define_insn for lsr.f. (ashrsi3_cnt1_carry): New define_insn for asr.f. (btst_0_carry): New define_insn for asr.f without result. gcc/testsuite/ChangeLog * gcc.target/arc/ashrdi3-1.c: New test case. * gcc.target/arc/lshrdi3-1.c: Likewise. * gcc.target/arc/rotldi3-1.c: Likewise. * gcc.target/arc/rotrdi3-1.c: Likewise.
-rw-r--r--gcc/config/arc/arc.md219
-rw-r--r--gcc/testsuite/gcc.target/arc/ashrdi3-1.c10
-rw-r--r--gcc/testsuite/gcc.target/arc/lshrdi3-1.c10
-rw-r--r--gcc/testsuite/gcc.target/arc/rotldi3-1.c11
-rw-r--r--gcc/testsuite/gcc.target/arc/rotrdi3-1.c11
5 files changed, 261 insertions, 0 deletions
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 8c121cd..4ae3a67 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -136,6 +136,7 @@
UNSPEC_ARC_VMAC2HU
UNSPEC_ARC_VMPY2H
UNSPEC_ARC_VMPY2HU
+ UNSPEC_ARC_CC_NEZ
VUNSPEC_ARC_RTIE
VUNSPEC_ARC_SYNC
@@ -2789,6 +2790,31 @@ archs4x, archs4xd"
(set_attr "type" "cc_arith")
(set_attr "length" "4,4,4,4,8,8")])
+(define_insn "adc_f"
+ [(set (reg:CC_C CC_REG)
+ (compare:CC_C
+ (zero_extend:DI
+ (plus:SI
+ (plus:SI
+ (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+ (match_operand:SI 1 "register_operand" "%r"))
+ (match_operand:SI 2 "register_operand" "r")))
+ (plus:DI
+ (ltu:DI (reg:CC_C CC_REG) (const_int 0))
+ (zero_extend:DI (match_dup 1)))))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI
+ (plus:SI
+ (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+ (match_dup 1))
+ (match_dup 2)))]
+ ""
+ "adc.f\\t%0,%1,%2"
+ [(set_attr "cond" "set")
+ (set_attr "predicable" "no")
+ (set_attr "type" "cc_arith")
+ (set_attr "length" "4")])
+
; combiner-splitter cmp / scc -> cmp / adc
(define_split
[(set (match_operand:SI 0 "dest_reg_operand" "")
@@ -3529,6 +3555,68 @@ archs4x, archs4xd"
""
[(set_attr "length" "8")])
+(define_expand "ashrdi3"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC CC_REG))])]
+ ""
+{
+ if (operands[2] != const1_rtx)
+ FAIL;
+})
+
+;; Split into asr.f hi; rrc lo
+(define_insn_and_split "*ashrdi3_cnt1"
+ [(set (match_operand:DI 0 "register_operand")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand")
+ (const_int 1)))
+ (clobber (reg:CC CC_REG))]
+ "arc_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+ gen_highpart (SImode, operands[1])));
+ emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+ gen_lowpart (SImode, operands[1])));
+ DONE;
+}
+ [(set_attr "length" "8")])
+
+(define_expand "lshrdi3"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC CC_REG))])]
+ ""
+{
+ if (operands[2] != const1_rtx)
+ FAIL;
+})
+
+;; Split into lsr.f hi; rrc lo
+(define_insn_and_split "*lshrdi3_cnt1"
+ [(set (match_operand:DI 0 "register_operand")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand")
+ (const_int 1)))
+ (clobber (reg:CC CC_REG))]
+ "arc_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+ gen_highpart (SImode, operands[1])));
+ emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+ gen_lowpart (SImode, operands[1])));
+ DONE;
+}
+ [(set_attr "length" "8")])
+
;; Rotate instructions.
(define_insn "rotrsi3_insn"
@@ -3570,6 +3658,103 @@ archs4x, archs4xd"
}
})
+;; Rotate through carry flag
+
+(define_insn "rrcsi2"
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+ (plus:SI
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 1))
+ (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+ (const_int 31))))]
+ ""
+ "rrc\\t%0,%1"
+ [(set_attr "type" "shift")
+ (set_attr "predicable" "no")
+ (set_attr "length" "4")])
+
+(define_insn "rrcsi2_carry"
+ [(set (reg:CC_C CC_REG)
+ (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 1))] UNSPEC_ARC_CC_NEZ))
+ (set (match_operand:SI 0 "dest_reg_operand" "=r")
+ (plus:SI
+ (lshiftrt:SI (match_dup 1) (const_int 1))
+ (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+ (const_int 31))))]
+ ""
+ "rrc.f\\t%0,%1"
+ [(set_attr "type" "shift")
+ (set_attr "predicable" "no")
+ (set_attr "length" "4")])
+
+;; DImode Rotate instructions
+
+(define_expand "rotldi3"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand")
+ (rotate:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC CC_REG))])]
+ ""
+{
+ if (operands[2] != const1_rtx)
+ FAIL;
+})
+
+;; split into add.f lo; adc.f hi; adc lo
+(define_insn_and_split "*rotldi3_cnt1"
+ [(set (match_operand:DI 0 "register_operand")
+ (rotate:DI (match_operand:DI 1 "register_operand")
+ (const_int 1)))
+ (clobber (reg:CC CC_REG))]
+ "arc_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx lo0 = gen_lowpart (SImode, operands[0]);
+ rtx lo1 = gen_lowpart (SImode, operands[1]);
+ rtx hi1 = gen_highpart (SImode, operands[1]);
+ emit_insn (gen_add_f (lo0, lo1, lo1));
+ emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
+ emit_insn (gen_adc (lo0, lo0, const0_rtx));
+ DONE;
+}
+ [(set_attr "length" "12")])
+
+(define_expand "rotrdi3"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand")
+ (rotatert:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC CC_REG))])]
+ ""
+{
+ if (operands[2] != const1_rtx)
+ FAIL;
+})
+
+;; split into asr.f lo; rrc.f hi; rrc lo
+(define_insn_and_split "*rotrdi3_cnt1"
+ [(set (match_operand:DI 0 "register_operand")
+ (rotatert:DI (match_operand:DI 1 "register_operand")
+ (const_int 1)))
+ (clobber (reg:CC CC_REG))]
+ "arc_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx lo = gen_lowpart (SImode, operands[1]);
+ emit_insn (gen_btst_0_carry (lo));
+ emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
+ gen_highpart (SImode, operands[1])));
+ emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
+ DONE;
+}
+ [(set_attr "length" "12")])
+
;; Compare / branch instructions.
(define_expand "cbranchsi4"
@@ -6009,6 +6194,18 @@ archs4x, archs4xd"
(set_attr "iscompact" "maybe,false")
(set_attr "predicable" "no,no")])
+(define_insn "lshrsi3_cnt1_carry"
+ [(set (reg:CC_C CC_REG)
+ (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 1))] UNSPEC_ARC_CC_NEZ))
+ (set (match_operand:SI 0 "dest_reg_operand" "=r")
+ (lshiftrt:SI (match_dup 1) (const_int 1)))]
+ ""
+ "lsr.f\\t%0,%1"
+ [(set_attr "type" "unary")
+ (set_attr "length" "4")
+ (set_attr "predicable" "no")])
+
(define_insn "ashrsi3_cnt1"
[(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
(ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
@@ -6019,6 +6216,28 @@ archs4x, archs4xd"
(set_attr "iscompact" "maybe,false")
(set_attr "predicable" "no,no")])
+(define_insn "ashrsi3_cnt1_carry"
+ [(set (reg:CC_C CC_REG)
+ (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 1))] UNSPEC_ARC_CC_NEZ))
+ (set (match_operand:SI 0 "dest_reg_operand" "=r")
+ (ashiftrt:SI (match_dup 1) (const_int 1)))]
+ ""
+ "asr.f\\t%0,%1"
+ [(set_attr "type" "unary")
+ (set_attr "length" "4")
+ (set_attr "predicable" "no")])
+
+(define_insn "btst_0_carry"
+ [(set (reg:CC_C CC_REG)
+ (unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
+ (const_int 1))] UNSPEC_ARC_CC_NEZ))]
+ ""
+ "asr.f\\t0,%0"
+ [(set_attr "type" "unary")
+ (set_attr "length" "4")
+ (set_attr "predicable" "no")])
+
(define_peephole2
[(set (match_operand:SI 0 "register_operand" "")
(zero_extract:SI (match_dup 0)
diff --git a/gcc/testsuite/gcc.target/arc/ashrdi3-1.c b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
new file mode 100644
index 0000000..d990bfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x)
+{
+ return x >> 1;
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/lshrdi3-1.c b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
new file mode 100644
index 0000000..6542ffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+ return x >> 1;
+}
+
+/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotldi3-1.c b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
new file mode 100644
index 0000000..325996e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+ return (x << 1) | (x >> 63);
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
+/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
+/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotrdi3-1.c b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
new file mode 100644
index 0000000..cd8e0de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+ return (x >> 1) | (x << 63);
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
+/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */