aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2024-07-05 23:49:43 +0200
committerGeorg-Johann Lay <avr@gjlay.de>2024-07-06 12:16:49 +0200
commit96559be74bfa355507472fc020c95c14587c227f (patch)
treea2503fccdc422aa12891a16e3b32b1ee3acd6feb /gcc
parent8bc5561c43b195e1638e5acace8b41b3f7512be3 (diff)
downloadgcc-96559be74bfa355507472fc020c95c14587c227f.zip
gcc-96559be74bfa355507472fc020c95c14587c227f.tar.gz
gcc-96559be74bfa355507472fc020c95c14587c227f.tar.bz2
AVR: Create more opportunities for -mfuse-add optimization.
avr_split_tiny_move() was only run for AVR_TINY because it has no PLUS addressing modes. Same applies to the X register on ordinary cores, and also to the Z register when used with [E]LPM. For example, without this patch long long addLL (long long *a, long long *b) { return *a + *b; } compiles with "-mmcu=atmgea128 -Os -dp" to: ... movw r26,r24 ; 80 [c=4 l=1] *movhi/0 movw r30,r22 ; 81 [c=4 l=1] *movhi/0 ld r18,X ; 82 [c=4 l=1] movqi_insn/3 adiw r26,1 ; 83 [c=4 l=3] movqi_insn/3 ld r19,X sbiw r26,1 adiw r26,2 ; 84 [c=4 l=3] movqi_insn/3 ld r20,X sbiw r26,2 adiw r26,3 ; 85 [c=4 l=3] movqi_insn/3 ld r21,X sbiw r26,3 adiw r26,4 ; 86 [c=4 l=3] movqi_insn/3 ld r22,X sbiw r26,4 adiw r26,5 ; 87 [c=4 l=3] movqi_insn/3 ld r23,X sbiw r26,5 adiw r26,6 ; 88 [c=4 l=3] movqi_insn/3 ld r24,X sbiw r26,6 adiw r26,7 ; 89 [c=4 l=2] movqi_insn/3 ld r25,X ld r10,Z ; 90 [c=4 l=1] movqi_insn/3 ... whereas with this patch it becomes: ... movw r26,r24 ; 80 [c=4 l=1] *movhi/0 movw r30,r22 ; 81 [c=4 l=1] *movhi/0 ld r18,X+ ; 140 [c=4 l=1] movqi_insn/3 ld r19,X+ ; 142 [c=4 l=1] movqi_insn/3 ld r20,X+ ; 144 [c=4 l=1] movqi_insn/3 ld r21,X+ ; 146 [c=4 l=1] movqi_insn/3 ld r22,X+ ; 148 [c=4 l=1] movqi_insn/3 ld r23,X+ ; 150 [c=4 l=1] movqi_insn/3 ld r24,X+ ; 152 [c=4 l=1] movqi_insn/3 ld r25,X ; 109 [c=4 l=1] movqi_insn/3 ld r10,Z ; 111 [c=4 l=1] movqi_insn/3 ... gcc/ * config/avr/avr.md: Also split with avr_split_tiny_move() for non-AVR_TINY. * config/avr/avr.cc (avr_split_tiny_move): Don't change memory references with base regs that can do PLUS addressing. (avr_out_lpm_no_lpmx) [POST_INC]: Don't output final ADIW when the address register is unused after. gcc/testsuite/ * gcc.target/avr/torture/fuse-add.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/avr/avr.cc39
-rw-r--r--gcc/config/avr/avr.md3
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/fuse-add.c59
3 files changed, 80 insertions, 21 deletions
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index f048bf5..d299fce 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -4471,28 +4471,21 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen)
gcc_assert (REG_Z == REGNO (XEXP (addr, 0))
&& n_bytes <= 4);
- if (regno_dest == LPM_REGNO)
- avr_asm_len ("%4lpm" CR_TAB
- "adiw %2,1", xop, plen, 2);
- else
- avr_asm_len ("%4lpm" CR_TAB
- "mov %A0,%3" CR_TAB
- "adiw %2,1", xop, plen, 3);
+ for (int i = 0; i < n_bytes; ++i)
+ {
+ rtx reg = simplify_gen_subreg (QImode, dest, GET_MODE (dest), i);
- if (n_bytes >= 2)
- avr_asm_len ("%4lpm" CR_TAB
- "mov %B0,%3" CR_TAB
- "adiw %2,1", xop, plen, 3);
+ if (i > 0)
+ avr_asm_len ("adiw %2,1", xop, plen, 1);
- if (n_bytes >= 3)
- avr_asm_len ("%4lpm" CR_TAB
- "mov %C0,%3" CR_TAB
- "adiw %2,1", xop, plen, 3);
+ avr_asm_len ("%4lpm", xop, plen, 1);
- if (n_bytes >= 4)
- avr_asm_len ("%4lpm" CR_TAB
- "mov %D0,%3" CR_TAB
- "adiw %2,1", xop, plen, 3);
+ if (REGNO (reg) != LPM_REGNO)
+ avr_asm_len ("mov %0,r0", &reg, plen, 1);
+ }
+
+ if (! _reg_unused_after (insn, xop[2], false))
+ avr_asm_len ("adiw %2,1", xop, plen, 1);
break; /* POST_INC */
@@ -6685,6 +6678,14 @@ avr_split_tiny_move (rtx_insn * /*insn*/, rtx *xop)
if (REGNO (base) > REG_Z)
return false;
+ if (! AVR_TINY
+ // Only keep base registers that can't do PLUS addressing.
+ && ((REGNO (base) != REG_X
+ && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)))
+ || avr_load_libgcc_p (mem)
+ || avr_mem_memx_p (mem)))
+ return false;
+
bool volatile_p = MEM_VOLATILE_P (mem);
bool mem_volatile_p = false;
if (frame_pointer_needed
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index dabf4c0..2783b8c 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1035,8 +1035,7 @@
[(parallel [(set (match_operand:MOVMODE 0 "nonimmediate_operand")
(match_operand:MOVMODE 1 "general_operand"))
(clobber (reg:CC REG_CC))])]
- "AVR_TINY
- && reload_completed
+ "reload_completed
&& avr_fuse_add > 0
// Only split this for .split2 when we are before
// pass .avr-fuse-add (which runs after proep).
diff --git a/gcc/testsuite/gcc.target/avr/torture/fuse-add.c b/gcc/testsuite/gcc.target/avr/torture/fuse-add.c
new file mode 100644
index 0000000..b78b1aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/fuse-add.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu99" } */
+
+typedef __UINT64_TYPE__ uint64_t;
+
+extern const uint64_t aa __asm ("real_aa");
+extern const uint64_t bb __asm ("real_bb");
+
+__attribute__((used)) const uint64_t real_aa = 0x1122334455667788;
+__attribute__((used)) const uint64_t real_bb = 0x0908070605040302;
+
+__attribute__((noinline,noclone))
+uint64_t add1 (const uint64_t *aa, const uint64_t *bb)
+{
+ return *aa + *bb;
+}
+
+#ifdef __FLASH
+extern const __flash uint64_t fa __asm ("real_fa");
+extern const __flash uint64_t fb __asm ("real_fb");
+
+__attribute__((used)) const __flash uint64_t real_fa = 0x1122334455667788;
+__attribute__((used)) const __flash uint64_t real_fb = 0x0908070605040302;
+
+__attribute__((noinline,noclone))
+uint64_t add2 (const __flash uint64_t *aa, const uint64_t *bb)
+{
+ return *aa + *bb;
+}
+
+uint64_t add3 (const uint64_t *aa, const __flash uint64_t *bb)
+{
+ return *aa + *bb;
+}
+
+uint64_t add4 (const __flash uint64_t *aa, const __flash uint64_t *bb)
+{
+ return *aa + *bb;
+}
+#endif /* have __flash */
+
+int main (void)
+{
+ if (add1 (&aa, &bb) != real_aa + real_bb)
+ __builtin_exit (__LINE__);
+
+#ifdef __FLASH
+ if (add2 (&fa, &bb) != real_fa + real_bb)
+ __builtin_exit (__LINE__);
+
+ if (add3 (&aa, &fb) != real_aa + real_fb)
+ __builtin_exit (__LINE__);
+
+ if (add4 (&fa, &fb) != real_fa + real_fb)
+ __builtin_exit (__LINE__);
+#endif
+
+ return 0;
+}