aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-12-13 14:13:21 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-12-13 15:16:28 +0000
commitbb768f8b45aa7ccf12774aa0c00b295032ee7c47 (patch)
treefc97be718f95d57816cbec7102d4c0f84f4300cd
parent0caf592d6ae836a99907841fccd31c4c5f180e8d (diff)
downloadgcc-bb768f8b45aa7ccf12774aa0c00b295032ee7c47.zip
gcc-bb768f8b45aa7ccf12774aa0c00b295032ee7c47.tar.gz
gcc-bb768f8b45aa7ccf12774aa0c00b295032ee7c47.tar.bz2
aarch64: Add memmove expansion for +mops
This second patch in the series adds an inline movmem expansion for TARGET_MOPS that emits the recommended sequence. A new param aarch64-mops-memmove-size-threshold is added to control the memmove size threshold for this expansion. Its default value is zero to be consistent with the current behaviour where we always emit a libcall, as we don't currently have a movmem inline expansion (we should add a compatible-everywhere inline expansion, but that's for the future), so we should always prefer to emit the MOPS sequence when available in lieu of a libcall. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_movmemdi): Define. (movmemdi): Define. (unspec): Add UNSPEC_MOVMEM. * config/aarch64/aarch64.opt (aarch64-mops-memmove-size-threshold): New param. gcc/testsuite/ChangeLog: * gcc.target/aarch64/mops_2.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64.md47
-rw-r--r--gcc/config/aarch64/aarch64.opt4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/mops_2.c57
3 files changed, 108 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d623c1b..b71c171 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -189,6 +189,7 @@
UNSPEC_LD3_LANE
UNSPEC_LD4_LANE
UNSPEC_MB
+ UNSPEC_MOVMEM
UNSPEC_NOP
UNSPEC_PACIA1716
UNSPEC_PACIB1716
@@ -1603,6 +1604,52 @@
}
)
+(define_insn "aarch64_movmemdi"
+ [(parallel [
+ (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+ (clobber (match_operand:DI 0 "register_operand" "+&r"))
+ (clobber (match_operand:DI 1 "register_operand" "+&r"))
+ (set (mem:BLK (match_dup 0))
+ (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_MOVMEM))])]
+ "TARGET_MOPS"
+ "cpyp\t[%x0]!, [%x1]!, %x2!\;cpym\t[%x0]!, [%x1]!, %x2!\;cpye\t[%x0]!, [%x1]!, %x2!"
+ [(set_attr "length" "12")]
+)
+
+;; 0 is dst
+;; 1 is src
+;; 2 is size of copy in bytes
+;; 3 is alignment
+
+(define_expand "movmemdi"
+ [(match_operand:BLK 0 "memory_operand")
+ (match_operand:BLK 1 "memory_operand")
+ (match_operand:DI 2 "general_operand")
+ (match_operand:DI 3 "immediate_operand")]
+ "TARGET_MOPS"
+{
+ rtx sz_reg = operands[2];
+ /* For constant-sized memmoves check the threshold.
+ FIXME: We should add a non-MOPS memmove expansion for smaller,
+ constant-sized memmove to avoid going to a libcall. */
+ if (CONST_INT_P (sz_reg)
+ && INTVAL (sz_reg) < aarch64_mops_memmove_size_threshold)
+ FAIL;
+
+ rtx addr_dst = XEXP (operands[0], 0);
+ rtx addr_src = XEXP (operands[1], 0);
+
+ if (!REG_P (sz_reg))
+ sz_reg = force_reg (DImode, sz_reg);
+ if (!REG_P (addr_dst))
+ addr_dst = force_reg (DImode, addr_dst);
+ if (!REG_P (addr_src))
+ addr_src = force_reg (DImode, addr_src);
+ emit_insn (gen_aarch64_movmemdi (addr_dst, addr_src, sz_reg));
+ DONE;
+}
+)
+
;; 0 is dst
;; 1 is val
;; 2 is size of copy in bytes
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 7445ed1..33788ff 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -284,3 +284,7 @@ Target Joined UInteger Var(aarch64_loop_vect_issue_rate_niters) Init(6) IntegerR
-param=aarch64-mops-memcpy-size-threshold=
Target Joined UInteger Var(aarch64_mops_memcpy_size_threshold) Init(256) Param
Constant memcpy size in bytes above which to start using MOPS sequence.
+
+-param=aarch64-mops-memmove-size-threshold=
+Target Joined UInteger Var(aarch64_mops_memmove_size_threshold) Init(0) Param
+Constant memmove size in bytes above which to start using MOPS sequence.
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_2.c b/gcc/testsuite/gcc.target/aarch64/mops_2.c
new file mode 100644
index 0000000..6fda4dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_2.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops --param=aarch64-mops-memmove-size-threshold=0" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdlib.h>
+
+/* We want to inline variable-sized memmove.
+** do_it_mov:
+** cpyp \[x1\]\!, \[x0\]\!, x2\!
+** cpym \[x1\]\!, \[x0\]\!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov (char * in, char * out, size_t size)
+{
+ __builtin_memmove (out, in, size);
+}
+
+/*
+** do_it_mov_large:
+** mov x2, 1024
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_large (char * in, char * out)
+{
+ __builtin_memmove (out, in, 1024);
+}
+
+/*
+** do_it_mov_127:
+** mov x2, 127
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_127 (char * in, char * out)
+{
+ __builtin_memmove (out, in, 127);
+}
+
+/*
+** do_it_mov_128:
+** mov x2, 128
+** cpyp \[x1\]\!, \[x0\]!, x2\!
+** cpym \[x1\]\!, \[x0\]!, x2\!
+** cpye \[x1\]\!, \[x0\]\!, x2\!
+** ret
+*/
+void do_it_mov_128 (char * in, char * out)
+{
+ __builtin_memmove (out, in, 128);
+}
+