aarch64: Add =r,m and =m,r alternatives to 64-bit vector move patterns

We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives to the mov patterns. This straightforward patch does that and for the pair variants too. For the testcase in the code we now generate the optimal assembly without any superfluous GP<->SIMD moves. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Add =r,m and =r,m alternatives. (load_pair<DREG:mode><DREG2:mode>): Likewise. (vec_store_pair<DREG:mode><DREG2:mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/xreg-vec-modes_1.c: New test.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-01 09:37:06 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-01 09:37:06 +0100
commit: 12e71b593ea0c64d919df525cd75ea10b7be8a4b (patch)
tree: d6ca160cad485316ad68de0d2401a9530a2efa03
parent: 2df7e45188f32e3c448e004af38d56eb9ab8d959 (diff)
download: gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.zip
gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.tar.gz
gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.tar.bz2
2 files changed, 65 insertions, 17 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4904a50..9307a57 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -144,26 +144,28 @@
 
 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
-		"=w, m,  m,  w, ?r, ?w, ?r,  w,  w")
+		"=w, r, m,  m, m,  w, ?r, ?w, ?r,  w,  w")
 	(match_operand:VDMOV 1 "general_operand"
-		"m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))]
+		"m,  m, Dz, w, r,  w,  w,  r,  r, Dn, Dz"))]
   "TARGET_FLOAT
    && (register_operand (operands[0], <MODE>mode)
        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
   "@
    ldr\t%d0, %1
+   ldr\t%x0, %1
    str\txzr, %0
    str\t%d1, %0
+   str\t%x1, %0
    * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
    * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
    fmov\t%d0, %1
    mov\t%0, %1
    * return aarch64_output_simd_mov_immediate (operands[1], 64);
    fmov\t%d0, xzr"
-  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
-		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
+  [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
+		     store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
 		     mov_reg, neon_move<q>, f_mcr")
-   (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
+   (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
 )
 
 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
@@ -205,31 +207,35 @@
 )
 
 (define_insn "load_pair<DREG:mode><DREG2:mode>"
-  [(set (match_operand:DREG 0 "register_operand" "=w")
-	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
-   (set (match_operand:DREG2 2 "register_operand" "=w")
-	(match_operand:DREG2 3 "memory_operand" "m"))]
+  [(set (match_operand:DREG 0 "register_operand" "=w,r")
+	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:DREG2 2 "register_operand" "=w,r")
+	(match_operand:DREG2 3 "memory_operand" "m,m"))]
   "TARGET_FLOAT
    && rtx_equal_p (XEXP (operands[3], 0),
 		   plus_constant (Pmode,
 				  XEXP (operands[1], 0),
 				  GET_MODE_SIZE (<DREG:MODE>mode)))"
-  "ldp\\t%d0, %d2, %z1"
-  [(set_attr "type" "neon_ldp")]
+  "@
+   ldp\t%d0, %d2, %z1
+   ldp\t%x0, %x2, %z1"
+  [(set_attr "type" "neon_ldp,load_16")]
 )
 
 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
-  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
-	(match_operand:DREG 1 "register_operand" "w"))
-   (set (match_operand:DREG2 2 "memory_operand" "=m")
-	(match_operand:DREG2 3 "register_operand" "w"))]
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+	(match_operand:DREG 1 "register_operand" "w,r"))
+   (set (match_operand:DREG2 2 "memory_operand" "=m,m")
+	(match_operand:DREG2 3 "register_operand" "w,r"))]
   "TARGET_FLOAT
    && rtx_equal_p (XEXP (operands[2], 0),
 		   plus_constant (Pmode,
 				  XEXP (operands[0], 0),
 				  GET_MODE_SIZE (<DREG:MODE>mode)))"
-  "stp\\t%d1, %d3, %z0"
-  [(set_attr "type" "neon_stp")]
+  "@
+   stp\t%d1, %d3, %z0
+   stp\t%x1, %x3, %z0"
+  [(set_attr "type" "neon_stp,store_16")]
 )
 
 (define_insn "aarch64_simd_stp<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
new file mode 100644
index 0000000..fc4dcb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef unsigned int v2si  __attribute__((vector_size (8)));
+
+#define force_gp(V1)   asm volatile (""				\
+           : "=r"(V1)                                           \
+           : "r"(V1)                                            \
+           : /* No clobbers */);
+
+/*
+** foo:
+**	ldr	(x[0-9]+), \[x1\]
+**	str	\1, \[x0\]
+**	ret
+*/
+
+void
+foo (v2si *a, v2si *b)
+{
+  v2si tmp = *b;
+  force_gp (tmp);
+  *a = tmp;
+}
+
+/*
+** foo2:
+**	ldp	(x[0-9]+), (x[0-9]+), \[x0\]
+**	stp	\1, \2, \[x1\]
+**	ret
+*/
+void
+foo2 (v2si *a, v2si *b)
+{
+  v2si t1 = *a;
+  v2si t2 = a[1];
+  force_gp (t1);
+  force_gp (t2);
+  *b = t1;
+  b[1] = t2;
+}
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-01 09:37:06 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-01 09:37:06 +0100
commit	12e71b593ea0c64d919df525cd75ea10b7be8a4b (patch)
tree	d6ca160cad485316ad68de0d2401a9530a2efa03
parent	2df7e45188f32e3c448e004af38d56eb9ab8d959 (diff)
download	gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.zip gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.tar.gz gcc-12e71b593ea0c64d919df525cd75ea10b7be8a4b.tar.bz2