aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2017-11-08 18:32:09 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2017-11-08 18:32:09 +0000
commit7692ce17a3383c956400a55af88d4ff500dd614a (patch)
tree6b60e45f9e51298e4f08ceb8aa7d81d37be4479f /gcc
parent040939a2387eff082f1594fac35f007f93d2dcd8 (diff)
downloadgcc-7692ce17a3383c956400a55af88d4ff500dd614a.zip
gcc-7692ce17a3383c956400a55af88d4ff500dd614a.tar.gz
gcc-7692ce17a3383c956400a55af88d4ff500dd614a.tar.bz2
[AArch64] Add STP pattern to store a vec_concat of two 64-bit registers
On top of the previous vec_merge simplifications [1] we can add this pattern to perform a store of a vec_concat of two 64-bit values in distinct registers as an STP. This avoids constructing such a vector explicitly in a register and storing it as a Q register. This way for the code in the testcase we can generate: construct_lane_1: ldp d1, d0, [x0] fmov d3, 1.0e+0 fmov d2, 2.0e+0 fadd d4, d1, d3 fadd d5, d0, d2 stp d4, d5, [x1, 32] ret construct_lane_2: ldp x2, x0, [x0] add x3, x2, 1 add x4, x0, 2 stp x3, x4, [x1, 32] ret instead of the current: construct_lane_1: ldp d0, d1, [x0] fmov d3, 1.0e+0 fmov d2, 2.0e+0 fadd d0, d0, d3 fadd d1, d1, d2 dup v0.2d, v0.d[0] ins v0.d[1], v1.d[0] str q0, [x1, 32] ret construct_lane_2: ldp x2, x3, [x0] add x0, x2, 1 add x2, x3, 2 dup v0.2d, x0 ins v0.d[1], x2 str q0, [x1, 32] ret Bootstrapped and tested on aarch64-none-linux-gnu. [1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html * config/aarch64/aarch64-simd.md (store_pair_lanes<mode>): New pattern. * config/aarch64/constraints.md (Uml): New constraint. * config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New predicate. * gcc.target/aarch64/store_v2vec_lanes.c: New test. From-SVN: r254551
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/aarch64/aarch64-simd.md12
-rw-r--r--gcc/config/aarch64/constraints.md9
-rw-r--r--gcc/config/aarch64/predicates.md7
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c31
6 files changed, 71 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 37ccb2c..805625f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+ * config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
+ New pattern.
+ * config/aarch64/constraints.md (Uml): New constraint.
+ * config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
+ predicate.
+
+2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
* simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge
of two vec_duplicates into a vec_concat.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1f5c911..9a6da35 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2949,6 +2949,18 @@
[(set_attr "type" "neon_load1_1reg_q")]
)
+(define_insn "store_pair_lanes<mode>"
+ [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
+ (vec_concat:<VDBL>
+ (match_operand:VDC 1 "register_operand" "w, r")
+ (match_operand:VDC 2 "register_operand" "w, r")))]
+ "TARGET_SIMD"
+ "@
+ stp\\t%d1, %d2, %0
+ stp\\t%x1, %x2, %0"
+ [(set_attr "type" "neon_stp, store_16")]
+)
+
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 4ef7a50..af4143e 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -171,6 +171,15 @@
(match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
PARALLEL, 1)")))
+;; Used for storing two 64-bit values in an AdvSIMD register using an STP
+;; as a 128-bit vec_concat.
+(define_memory_constraint "Uml"
+ "@internal
+ A memory address suitable for a load/store pair operation."
+ (and (match_code "mem")
+ (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
+ PARALLEL, 1)")))
+
(define_memory_constraint "Utv"
"@internal
An address valid for loading/storing opaque structure
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 84d441a..2eaf0a7 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -189,6 +189,13 @@
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
0)")))
+;; Used for storing two 64-bit values in an AdvSIMD register using an STP
+;; as a 128-bit vec_concat.
+(define_predicate "aarch64_mem_pair_lanes_operand"
+ (and (match_code "mem")
+ (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
+ PARALLEL, 1)")))
+
(define_predicate "aarch64_prefetch_operand"
(match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3d9c337..d20cadc 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+ * gcc.target/aarch64/store_v2vec_lanes.c: New test.
+
+2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
* gcc.target/aarch64/load_v2vec_lanes_1.c: New test.
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
new file mode 100644
index 0000000..6810db3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+void
+construct_lane_1 (double *y, v2df *z)
+{
+ double y0 = y[0] + 1;
+ double y1 = y[1] + 2;
+ v2df x = {y0, y1};
+ z[2] = x;
+}
+
+void
+construct_lane_2 (long long *y, v2di *z)
+{
+ long long y0 = y[0] + 1;
+ long long y1 = y[1] + 2;
+ v2di x = {y0, y1};
+ z[2] = x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+ values from consecutive memory into a 2-element vector by using
+ a Q-reg LDR. */
+
+/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */