diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-11-03 14:32:26 +0000 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-11-03 14:32:26 +0000 |
commit | b16845b30c8c65597275595f41a7b9aca0674bef (patch) | |
tree | 4358fcb99b2e4c467f929d90fb4a23b863bc5edb /gcc | |
parent | eb83605be3db9e8246c73755eafcac5df32ddc69 (diff) | |
download | gcc-b16845b30c8c65597275595f41a7b9aca0674bef.zip gcc-b16845b30c8c65597275595f41a7b9aca0674bef.tar.gz gcc-b16845b30c8c65597275595f41a7b9aca0674bef.tar.bz2 |
ARC: Improve DImode left shift by a single bit.
This patch improves the code generated for x << 1 (and for x + x) when
X is 64-bit DImode, using the same two instruction code sequence used
for DImode addition.
For the test case:
long long foo(long long x) { return x << 1; }
GCC -O2 currently generates the following code:
foo: lsr r2,r0,31
asl_s r1,r1,1
asl_s r0,r0,1
j_s.d [blink]
or_s r1,r1,r2
and on CPU without a barrel shifter, i.e. -mcpu=em
foo: add.f 0,r0,r0
asl_s r1,r1
rlc r2,0
asl_s r0,r0
j_s.d [blink]
or_s r1,r1,r2
with this patch (both with and without a barrel shifter):
foo: add.f r0,r0,r0
j_s.d [blink]
adc r1,r1,r1
A similar optimization is also applicable to H8300H, that could also use
a two instruction sequence (plus rts) but currently GCC generates 16
instructions (plus an rts) for foo above.
2023-11-03 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/arc/arc.md (addsi3): Fix GNU-style code formatting.
(adddi3): Change define_expand to generate a *adddi3.
(*adddi3): New define_insn_and_split to lower DImode additions
during the split1 pass (after combine and before reload).
(ashldi3): New define_expand to (only) generate *ashldi3_cnt1
for DImode left shifts by a single bit.
(*ashldi3_cnt1): New define_insn_and_split to lower DImode
left shifts by one bit to an *adddi3.
gcc/testsuite/ChangeLog
* gcc.target/arc/adddi3-1.c: New test case.
* gcc.target/arc/ashldi3-1.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arc/arc.md | 55 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arc/adddi3-1.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arc/ashldi3-1.c | 10 |
3 files changed, 66 insertions, 9 deletions
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 96ff62d..7702978 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2675,19 +2675,28 @@ archs4x, archs4xd" (plus:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")))] "" - "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false)) - { - operands[2]=force_reg(SImode, operands[2]); - } - ") +{ + if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false)) + operands[2] = force_reg (SImode, operands[2]); +}) (define_expand "adddi3" + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REG))])]) + +(define_insn_and_split "*adddi3" [(set (match_operand:DI 0 "register_operand" "") (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "nonmemory_operand" ""))) (clobber (reg:CC CC_REG))] - "" - " + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ rtx l0 = gen_lowpart (SImode, operands[0]); rtx h0 = gen_highpart (SImode, operands[0]); rtx l1 = gen_lowpart (SImode, operands[1]); @@ -2719,11 +2728,12 @@ archs4x, archs4xd" gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)), gen_rtx_SET (h0, plus_constant (SImode, h0, 1)))); DONE; - } + } emit_insn (gen_add_f (l0, l1, l2)); emit_insn (gen_adc (h0, h1, h2)); DONE; -") +} + [(set_attr "length" "8")]) (define_insn "add_f" [(set (reg:CC_C CC_REG) @@ -3493,6 +3503,33 @@ archs4x, archs4xd" [(set_attr "type" "shift") (set_attr "length" "16,20")]) +;; DImode shifts + +(define_expand "ashldi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (ashift:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +(define_insn_and_split "*ashldi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (ashift:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 1))) + (clobber (reg:CC CC_REG))])] + "" + [(set_attr "length" "8")]) + ;; Rotate instructions. (define_insn "rotrsi3_insn" diff --git a/gcc/testsuite/gcc.target/arc/adddi3-1.c b/gcc/testsuite/gcc.target/arc/adddi3-1.c new file mode 100644 index 0000000..b3077c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/adddi3-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long long foo(long long x, long long y) +{ + return x + y; +} + +/* { dg-final { scan-assembler "add.f\\s+r0,r0,r2" } } */ +/* { dg-final { scan-assembler "adc\\s+r1,r1,r3" } } */ diff --git a/gcc/testsuite/gcc.target/arc/ashldi3-1.c b/gcc/testsuite/gcc.target/arc/ashldi3-1.c new file mode 100644 index 0000000..6fe4ff4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/ashldi3-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long long foo(long long x) +{ + return x << 1; +} + +/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */ +/* { dg-final { scan-assembler "adc\\s+r1,r1,r1" } } */ |