Simplify vec_merge of vec_duplicate with const_vector

I'm trying to improve some of the RTL-level handling of vector lane operations on aarch64 and that involves dealing with a lot of vec_merge operations. One simplification that I noticed missing from simplify-rtx are combinations of vec_merge with vec_duplicate. In this particular case: (vec_merge (vec_duplicate (X)) (const_vector [A, B]) (const_int N)) which can be replaced with (vec_concat (X) (B)) if N == 1 (0b01) or (vec_concat (A) (X)) if N == 2 (0b10). For the aarch64 testcase in this patch this simplifications allows us to try to combine: (set (reg:V2DI 77 [ x ]) (vec_concat:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64]) (const_int 0 [0]))) instead of the more complex: (set (reg:V2DI 77 [ x ]) (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64])) (const_vector:V2DI [ (const_int 0 [0]) (const_int 0 [0]) ]) (const_int 1 [0x1]))) For the simplified form above we already have an aarch64 pattern: *aarch64_combinez<mode> which is missing a DI/DFmode version due to an oversight, so this patch extends that pattern as well to use the VDC mode iterator that includes DI and DFmode (as well as V2HF which VD_BHSI was missing). The aarch64 hunk is needed to see the benefit of the simplify-rtx.c hunk, so I didn't split them into separate patches. Before this for the testcase we'd generate: construct_lanedi: movi v0.4s, 0 ldr x0, [x0] ins v0.d[0], x0 ret construct_lanedf: movi v0.2d, 0 ldr d1, [x0] ins v0.d[0], v1.d[0] ret but now we can generate: construct_lanedi: ldr d0, [x0] ret construct_lanedf: ldr d0, [x0] ret Bootstrapped and tested on aarch64-none-linux-gnu. * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE): Simplify vec_merge of vec_duplicate and const_vector. * config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero): New predicate. * config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Use VDC mode iterator. Update predicate on operand 1 to handle non-const_vec constants. Delete constraints. (*aarch64_combinez_be<mode>): Likewise for operand 2. * gcc.target/aarch64/construct_lane_zero_1.c: New test. From-SVN: r254548
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2017-11-08 18:23:35 +0000
committer: Kyrylo Tkachov <ktkachov@gcc.gnu.org> 2017-11-08 18:23:35 +0000
commit: 6432f025b4fccaaca8564e0c2518cdba869c4bf5 (patch)
tree: 2b1006da7b7f54b89c6a5965d73c2492f0e04c1f /gcc
parent: 8d9a1ba7bbd5bf5b7f5cfe1c88dd38d70ac54bbc (diff)
download: gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.zip
gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.tar.gz
gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.tar.bz2
6 files changed, 76 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7b13408..d1225c3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
+	Simplify vec_merge of vec_duplicate and const_vector.
+	* config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero):
+	New predicate.
+	* config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Use VDC
+	mode iterator.  Update predicate on operand 1 to
+	handle non-const_vec constants.  Delete constraints.
+	(*aarch64_combinez_be<mode>): Likewise for operand 2.
+
 2017-11-08  Jakub Jelinek  <jakub@redhat.com>
 
 	PR tree-optimization/78821
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9aeeffd..34233f6 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2940,9 +2940,9 @@
 
 (define_insn "*aarch64_combinez<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
-        (vec_concat:<VDBL>
-	   (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
-	   (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
+	(vec_concat:<VDBL>
+	  (match_operand:VDC 1 "general_operand" "w,?r,m")
+	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "@
    mov\\t%0.8b, %1.8b
@@ -2956,8 +2956,8 @@
 (define_insn "*aarch64_combinez_be<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
         (vec_concat:<VDBL>
-	   (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
-	   (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
+	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
+	  (match_operand:VDC 1 "general_operand" "w,?r,m")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "@
    mov\\t%0.8b, %1.8b
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index bf23b88..84d441a 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -364,6 +364,9 @@
   return aarch64_simd_imm_zero_p (op, mode);
 })
 
+(define_special_predicate "aarch64_simd_or_scalar_imm_zero"
+  (match_test "aarch64_simd_imm_zero_p (op, mode)"))
+
 (define_special_predicate "aarch64_simd_imm_minus_one"
   (match_code "const_vector")
 {
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 5cdea35..94302f6 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5749,6 +5749,22 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 		    return op1;
 		}
 	    }
+	  /* Replace (vec_merge (vec_duplicate (X)) (const_vector [A, B])
+	     (const_int N))
+	     with (vec_concat (X) (B)) if N == 1 or
+	     (vec_concat (A) (X)) if N == 2.  */
+	  if (GET_CODE (op0) == VEC_DUPLICATE
+	      && GET_CODE (op1) == CONST_VECTOR
+	      && CONST_VECTOR_NUNITS (op1) == 2
+	      && GET_MODE_NUNITS (GET_MODE (op0)) == 2
+	      && IN_RANGE (sel, 1, 2))
+	    {
+	      rtx newop0 = XEXP (op0, 0);
+	      rtx newop1 = CONST_VECTOR_ELT (op1, 2 - sel);
+	      if (sel == 2)
+		std::swap (newop0, newop1);
+	      return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+	    }
 	}
 
       if (rtx_equal_p (op0, op1)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d4a0b0f..28894ee 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/construct_lane_zero_1.c: New test.
+
 2017-11-08  Ed Schonberg  <schonberg@adacore.com>
 
 	* gnat.dg/delta_aggr.adb: New testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
new file mode 100644
index 0000000..d87f329
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+  v2di x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  { 0, y[0] }
+#else
+  { y[0], 0 }
+#endif
+  ;
+  return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+  v2df x =
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  { 0.0, y[0] }
+#else
+  { y[0], 0.0 }
+#endif
+  ;
+  return x;
+}
+
+/* Check that creating V2DI and V2DF vectors from a lane with a zero
+   makes use of the D-reg LDR rather than doing explicit lane inserts.  */
+
+/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2017-11-08 18:23:35 +0000
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>	2017-11-08 18:23:35 +0000
commit	6432f025b4fccaaca8564e0c2518cdba869c4bf5 (patch)
tree	2b1006da7b7f54b89c6a5965d73c2492f0e04c1f /gcc
parent	8d9a1ba7bbd5bf5b7f5cfe1c88dd38d70ac54bbc (diff)
download	gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.zip gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.tar.gz gcc-6432f025b4fccaaca8564e0c2518cdba869c4bf5.tar.bz2