aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2022-11-30 17:38:16 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2022-11-30 17:38:16 +0000
commitcbdffae5745327b0e5eb887afc512daf34b049b1 (patch)
treeff8575e5bfb0fbd897da1a0ad1b614b0990bdb5b
parent031d3f095520f0e1ee03e29b7ad5067c2a3f96e0 (diff)
downloadgcc-cbdffae5745327b0e5eb887afc512daf34b049b1.zip
gcc-cbdffae5745327b0e5eb887afc512daf34b049b1.tar.gz
gcc-cbdffae5745327b0e5eb887afc512daf34b049b1.tar.bz2
aarch64: Specify that FEAT_MOPS sequences clobber CC
According to the architecture pseudocode the FEAT_MOPS sequences overwrite the NZCV flags as par of their operation, so GCC needs to model that in the relevant RTL patterns. For the testcase: void g(); void foo (int a, size_t N, char *__restrict__ in, char *__restrict__ out) { if (a != 3) __builtin_memcpy (out, in, N); if (a > 3) g (); } we will currently generate: foo: cmp w0, 3 bne .L6 .L1: ret .L6: cpyfp [x3]!, [x2]!, x1! cpyfm [x3]!, [x2]!, x1! cpyfe [x3]!, [x2]!, x1! ble .L1 // Flags reused after CPYF* sequence b g This is wrong as the result of cmp needs to be recalculated after the MOPS sequence. With this patch we'll insert a "cmp w0, 3" before the ble, similar to what clang does. Bootstrapped and tested on aarch64-none-linux-gnu. Pushing to trunk and to the GCC 12 branch after some baking time. gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_cpymemdi): Specify clobber of CC reg. (*aarch64_cpymemdi): Likewise. (aarch64_movmemdi): Likewise. (aarch64_setmemdi): Likewise. (*aarch64_setmemdi): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/mops_5.c: New test. * gcc.target/aarch64/mops_6.c: Likewise. * gcc.target/aarch64/mops_7.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64.md5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/mops_5.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/mops_6.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/mops_7.c16
4 files changed, 55 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 76b6898..8a18405 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1592,6 +1592,7 @@
[(set (match_operand 2) (const_int 0))
(clobber (match_dup 3))
(clobber (match_dup 4))
+ (clobber (reg:CC CC_REGNUM))
(set (match_operand 0)
(unspec:BLK [(match_operand 1) (match_dup 2)] UNSPEC_CPYMEM))])]
"TARGET_MOPS"
@@ -1605,6 +1606,7 @@
[(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (match_operand:DI 1 "register_operand" "+&r"))
+ (clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))]
"TARGET_MOPS"
@@ -1635,6 +1637,7 @@
(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (match_operand:DI 1 "register_operand" "+&r"))
+ (clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_MOVMEM))])]
"TARGET_MOPS"
@@ -1680,6 +1683,7 @@
[(parallel
[(set (match_operand 2) (const_int 0))
(clobber (match_dup 3))
+ (clobber (reg:CC CC_REGNUM))
(set (match_operand 0)
(unspec:BLK [(match_operand 1)
(match_dup 2)] UNSPEC_SETMEM))])]
@@ -1692,6 +1696,7 @@
(define_insn "*aarch64_setmemdi"
[(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
+ (clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(match_operand:QI 1 "aarch64_reg_or_zero" "rZ")
(match_dup 2)] UNSPEC_SETMEM))]
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_5.c b/gcc/testsuite/gcc.target/aarch64/mops_5.c
new file mode 100644
index 0000000..8a26267
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops" } */
+
+#include <stddef.h>
+
+void g();
+void foo (int a, size_t N, char *__restrict__ in,
+ char *__restrict__ out)
+{
+ if (a != 3)
+ __builtin_memcpy (out, in, N);
+ if (a > 3)
+ g ();
+}
+
+/* { dg-final { scan-assembler-times {cmp\tw0, *} 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_6.c b/gcc/testsuite/gcc.target/aarch64/mops_6.c
new file mode 100644
index 0000000..c6c9a54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops" } */
+
+#include <stddef.h>
+
+void g();
+void foo (int a, size_t N, char *__restrict__ in,
+ char *__restrict__ out)
+{
+ if (a != 3)
+ __builtin_memmove (out, in, N);
+ if (a > 3)
+ g ();
+}
+
+/* { dg-final { scan-assembler-times {cmp\tw0, *} 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_7.c b/gcc/testsuite/gcc.target/aarch64/mops_7.c
new file mode 100644
index 0000000..79720ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops" } */
+
+#include <stddef.h>
+
+void g();
+void foo (int a, size_t N, char *__restrict__ out)
+{
+ if (a != 3)
+ __builtin_memset (out, 0, N);
+ if (a > 3)
+ g ();
+}
+
+/* { dg-final { scan-assembler-times {cmp\tw0, *} 2 } } */
+