aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2022-04-05 17:31:36 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2022-04-05 17:31:36 +0100
commit65b77d0eece6020b927f2b8de0ac5315224e38b7 (patch)
treea5290e793ef018edb099d2ba831908e4c0db36fe /gcc
parent14814e20161d7b6a4e9cac244c7013fa56f71f55 (diff)
downloadgcc-65b77d0eece6020b927f2b8de0ac5315224e38b7.zip
gcc-65b77d0eece6020b927f2b8de0ac5315224e38b7.tar.gz
gcc-65b77d0eece6020b927f2b8de0ac5315224e38b7.tar.bz2
aarch64: Stop +mops clobbering variable values
The mops cpy* patterns take three registers: a destination address, a source address, and a size. The patterns clobber all three registers as part of the operation. The set* patterns take a destination address, a size, and a store value, and they clobber the first two registers as part of the operation. However, the associated expanders would try to use existing source, destination and size registers where possible. Any variables in those registers could therefore change unexpectedly. For example: void copy1 (int *x, int *y, long z, int **res) { __builtin_memcpy (x, y, z); *res = x; } generated: cpyfp [x0]!, [x1]!, x2! cpyfm [x0]!, [x1]!, x2! cpyfe [x0]!, [x1]!, x2! str x0, [x3] ret which stores the incremented x at *res. gcc/ * config/aarch64/aarch64.md (aarch64_cpymemdi): Turn into a define_expand and turn operands 0 and 1 from REGs to MEMs. (*aarch64_cpymemdi): New pattern. (aarch64_setmemdi): Turn into a define_expand and turn operand 0 from a REG to a MEM. (*aarch64_setmemdi): New pattern. * config/aarch64/aarch64.cc (aarch64_expand_cpymem_mops): Use copy_to_mode_reg on all three registers. Replace the original MEM addresses rather than creating wild reads and writes. (aarch64_expand_setmem_mops): Likewise for the size and for the destination memory and address. gcc/testsuite/ * gcc.target/aarch64/mops_4.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.cc38
-rw-r--r--gcc/config/aarch64/aarch64.md53
-rw-r--r--gcc/testsuite/gcc.target/aarch64/mops_4.c115
3 files changed, 171 insertions, 35 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f8049..3e2a6fb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24531,17 +24531,15 @@ aarch64_expand_cpymem_mops (rtx *operands)
{
if (!TARGET_MOPS)
return false;
- rtx addr_dst = XEXP (operands[0], 0);
- rtx addr_src = XEXP (operands[1], 0);
- rtx sz_reg = operands[2];
-
- if (!REG_P (sz_reg))
- sz_reg = force_reg (DImode, sz_reg);
- if (!REG_P (addr_dst))
- addr_dst = force_reg (DImode, addr_dst);
- if (!REG_P (addr_src))
- addr_src = force_reg (DImode, addr_src);
- emit_insn (gen_aarch64_cpymemdi (addr_dst, addr_src, sz_reg));
+
+ /* All three registers are changed by the instruction, so each one
+ must be a fresh pseudo. */
+ rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+ rtx src_addr = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+ rtx dst_mem = replace_equiv_address (operands[0], dst_addr);
+ rtx src_mem = replace_equiv_address (operands[1], src_addr);
+ rtx sz_reg = copy_to_mode_reg (DImode, operands[2]);
+ emit_insn (gen_aarch64_cpymemdi (dst_mem, src_mem, sz_reg));
return true;
}
@@ -24718,17 +24716,15 @@ aarch64_expand_setmem_mops (rtx *operands)
if (!TARGET_MOPS)
return false;
- rtx addr_dst = XEXP (operands[0], 0);
- rtx sz_reg = operands[1];
+ /* The first two registers are changed by the instruction, so both
+ of them must be a fresh pseudo. */
+ rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+ rtx dst_mem = replace_equiv_address (operands[0], dst_addr);
+ rtx sz_reg = copy_to_mode_reg (DImode, operands[1]);
rtx val = operands[2];
-
- if (!REG_P (sz_reg))
- sz_reg = force_reg (DImode, sz_reg);
- if (!REG_P (addr_dst))
- addr_dst = force_reg (DImode, addr_dst);
- if (!REG_P (val) && val != CONST0_RTX (QImode))
- val = force_reg (QImode, val);
- emit_insn (gen_aarch64_setmemdi (addr_dst, val, sz_reg));
+ if (val != CONST0_RTX (QImode))
+ val = force_reg (QImode, val);
+ emit_insn (gen_aarch64_setmemdi (dst_mem, val, sz_reg));
return true;
}
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c985250..f5c6359 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1581,16 +1581,29 @@
}
)
-(define_insn "aarch64_cpymemdi"
- [(parallel [
- (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+(define_expand "aarch64_cpymemdi"
+ [(parallel
+ [(set (match_operand 2) (const_int 0))
+ (clobber (match_dup 3))
+ (clobber (match_dup 4))
+ (set (match_operand 0)
+ (unspec:BLK [(match_operand 1) (match_dup 2)] UNSPEC_CPYMEM))])]
+ "TARGET_MOPS"
+ {
+ operands[3] = XEXP (operands[0], 0);
+ operands[4] = XEXP (operands[1], 0);
+ }
+)
+
+(define_insn "*aarch64_cpymemdi"
+ [(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (match_operand:DI 1 "register_operand" "+&r"))
(set (mem:BLK (match_dup 0))
- (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))])]
- "TARGET_MOPS"
- "cpyfp\t[%x0]!, [%x1]!, %x2!\;cpyfm\t[%x0]!, [%x1]!, %x2!\;cpyfe\t[%x0]!, [%x1]!, %x2!"
- [(set_attr "length" "12")]
+ (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))]
+ "TARGET_MOPS"
+ "cpyfp\t[%x0]!, [%x1]!, %x2!\;cpyfm\t[%x0]!, [%x1]!, %x2!\;cpyfe\t[%x0]!, [%x1]!, %x2!"
+ [(set_attr "length" "12")]
)
;; 0 is dst
@@ -1657,16 +1670,28 @@
}
)
-(define_insn "aarch64_setmemdi"
- [(parallel [
- (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+(define_expand "aarch64_setmemdi"
+ [(parallel
+ [(set (match_operand 2) (const_int 0))
+ (clobber (match_dup 3))
+ (set (match_operand 0)
+ (unspec:BLK [(match_operand 1)
+ (match_dup 2)] UNSPEC_SETMEM))])]
+ "TARGET_MOPS"
+ {
+ operands[3] = XEXP (operands[0], 0);
+ }
+)
+
+(define_insn "*aarch64_setmemdi"
+ [(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(match_operand:QI 1 "aarch64_reg_or_zero" "rZ")
- (match_dup 2)] UNSPEC_SETMEM))])]
- "TARGET_MOPS"
- "setp\t[%x0]!, %x2!, %x1\;setm\t[%x0]!, %x2!, %x1\;sete\t[%x0]!, %x2!, %x1"
- [(set_attr "length" "12")]
+ (match_dup 2)] UNSPEC_SETMEM))]
+ "TARGET_MOPS"
+ "setp\t[%x0]!, %x2!, %x1\;setm\t[%x0]!, %x2!, %x1\;sete\t[%x0]!, %x2!, %x1"
+ [(set_attr "length" "12")]
)
;; 0 is dst
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_4.c b/gcc/testsuite/gcc.target/aarch64/mops_4.c
new file mode 100644
index 0000000..1b87759
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_4.c
@@ -0,0 +1,115 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+/*
+** copy1:
+** mov (x[0-9]+), x0
+** cpyfp \[\1\]!, \[x1\]!, x2!
+** cpyfm \[\1\]!, \[x1\]!, x2!
+** cpyfe \[\1\]!, \[x1\]!, x2!
+** str x0, \[x3\]
+** ret
+*/
+void
+copy1 (int *x, int *y, long z, int **res)
+{
+ __builtin_memcpy (x, y, z);
+ *res = x;
+}
+
+/*
+** copy2:
+** mov (x[0-9]+), x1
+** cpyfp \[x0\]!, \[\1\]!, x2!
+** cpyfm \[x0\]!, \[\1\]!, x2!
+** cpyfe \[x0\]!, \[\1\]!, x2!
+** str x1, \[x3\]
+** ret
+*/
+void
+copy2 (int *x, int *y, long z, int **res)
+{
+ __builtin_memcpy (x, y, z);
+ *res = y;
+}
+
+/*
+** copy3:
+** mov (x[0-9]+), x2
+** cpyfp \[x0\]!, \[x1\]!, \1!
+** cpyfm \[x0\]!, \[x1\]!, \1!
+** cpyfe \[x0\]!, \[x1\]!, \1!
+** str x2, \[x3\]
+** ret
+*/
+void
+copy3 (int *x, int *y, long z, long *res)
+{
+ __builtin_memcpy (x, y, z);
+ *res = z;
+}
+
+/*
+** set1:
+** mov (x[0-9]+), x0
+** setp \[\1\]!, x2!, x1
+** setm \[\1\]!, x2!, x1
+** sete \[\1\]!, x2!, x1
+** str x0, \[x3\]
+** ret
+*/
+void
+set1 (char *x, char y, long z, char **res)
+{
+ __builtin_memset (x, y, z);
+ *res = x;
+}
+
+/*
+** set2:
+** ldrb w([0-9]+), \[x1\]
+** setp \[x0\]!, x2!, x\1
+** setm \[x0\]!, x2!, x\1
+** sete \[x0\]!, x2!, x\1
+** strb w\1, \[x3\]
+** ret
+*/
+void
+set2 (char *x, char *yptr, long z, char *res)
+{
+ char y = *yptr;
+ __builtin_memset (x, y, z);
+ *res = y;
+}
+
+/*
+** set3:
+** mov (x[0-9]+), x2
+** setp \[x0\]!, \1!, x1
+** setm \[x0\]!, \1!, x1
+** sete \[x0\]!, \1!, x1
+** str x2, \[x3\]
+** ret
+*/
+void
+set3 (char *x, char y, long z, long *res)
+{
+ __builtin_memset (x, y, z);
+ *res = z;
+}
+
+/*
+** set4:
+** setp \[x0\]!, x1!, xzr
+** setm \[x0\]!, x1!, xzr
+** sete \[x0\]!, x1!, xzr
+** strb wzr, \[x2\]
+** ret
+*/
+void
+set4 (char *x, long z, char *res)
+{
+ __builtin_memset (x, 0, z);
+ *res = 0;
+}