4 files changed, 92 insertions, 76 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8215995..59f4508 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2018-05-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64-simd.md (vec_set<mode>): Use VALL_F16 mode
+	iterator.  Delete separate integer-mode vec_set<mode> expander.
+	(aarch64_simd_vec_setv2di): Delete.
+	(vec_setv2di): Delete.
+	(aarch64_simd_vec_set<mode>): Delete all other patterns with that name.
+	Use VALL_F16 mode iterator.  Add LD1 alternative and use vwcore for
+	the "w, r" alternative.
+
 2018-05-18  Martin Liska  <mliska@suse.cz>
 
 	* passes.def: Add pass_lower_switch and pass_lower_switch_O0.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9cfd4d3..2ebd256 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -694,11 +694,11 @@
 )
 
 (define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
-        (vec_merge:VDQ_BHSI
-	    (vec_duplicate:VDQ_BHSI
-		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
-	    (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
+  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
+	(vec_merge:VALL_F16
+	    (vec_duplicate:VALL_F16
+		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
+	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
   "TARGET_SIMD"
   {
@@ -707,16 +707,16 @@
    switch (which_alternative)
      {
      case 0:
-	return "ins\\t%0.<Vetype>[%p2], %w1";
-     case 1:
 	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
+     case 1:
+	return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
      case 2:
         return "ld1\\t{%0.<Vetype>}[%p2], %1";
      default:
 	gcc_unreachable ();
      }
   }
-  [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
+  [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 )
 
 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
@@ -1030,19 +1030,6 @@
   }
 )
 
-(define_expand "vec_set<mode>"
-  [(match_operand:VDQ_BHSI 0 "register_operand")
-   (match_operand:<VEL> 1 "register_operand")
-   (match_operand:SI 2 "immediate_operand")]
-  "TARGET_SIMD"
-  {
-    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
-    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
-					    GEN_INT (elem), operands[0]));
-    DONE;
-  }
-)
-
 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
 (define_insn "vec_shr_<mode>"
   [(set (match_operand:VD 0 "register_operand" "=w")
@@ -1059,62 +1046,8 @@
   [(set_attr "type" "neon_shift_imm")]
 )
 
-(define_insn "aarch64_simd_vec_setv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "=w,w")
-        (vec_merge:V2DI
-	    (vec_duplicate:V2DI
-		(match_operand:DI 1 "register_operand" "r,w"))
-	    (match_operand:V2DI 3 "register_operand" "0,0")
-	    (match_operand:SI 2 "immediate_operand" "i,i")))]
-  "TARGET_SIMD"
-  {
-    int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
-    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
-    switch (which_alternative)
-      {
-      case 0:
-	return "ins\\t%0.d[%p2], %1";
-      case 1:
-        return "ins\\t%0.d[%p2], %1.d[0]";
-      default:
-	gcc_unreachable ();
-      }
-  }
-  [(set_attr "type" "neon_from_gp, neon_ins_q")]
-)
-
-(define_expand "vec_setv2di"
-  [(match_operand:V2DI 0 "register_operand")
-   (match_operand:DI 1 "register_operand")
-   (match_operand:SI 2 "immediate_operand")]
-  "TARGET_SIMD"
-  {
-    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
-    emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
-					  GEN_INT (elem), operands[0]));
-    DONE;
-  }
-)
-
-(define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
-	(vec_merge:VDQF_F16
-	    (vec_duplicate:VDQF_F16
-		(match_operand:<VEL> 1 "register_operand" "w"))
-	    (match_operand:VDQF_F16 3 "register_operand" "0")
-	    (match_operand:SI 2 "immediate_operand" "i")))]
-  "TARGET_SIMD"
-  {
-    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
-
-    operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
-    return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
-  }
-  [(set_attr "type" "neon_ins<q>")]
-)
-
 (define_expand "vec_set<mode>"
-  [(match_operand:VDQF_F16 0 "register_operand" "+w")
+  [(match_operand:VALL_F16 0 "register_operand" "+w")
    (match_operand:<VEL> 1 "register_operand" "w")
    (match_operand:SI 2 "immediate_operand" "")]
   "TARGET_SIMD"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 765e08c..840d9a0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2018-05-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/vect-init-ld1.c: New test.
+
 2018-05-18  Martin Liska  <mliska@suse.cz>
 
 	* gcc.dg/tree-ssa/vrp104.c: Adjust dump file that is scanned.
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-init-ld1.c b/gcc/testsuite/gcc.target/aarch64/vect-init-ld1.c
new file mode 100644
index 0000000..d7ff9af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-init-ld1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+typedef __fp16 v4hf __attribute__ ((vector_size (8)));
+typedef __fp16 v8hf __attribute__ ((vector_size (16)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+#define FUNC2(T, IT)    \
+T                       \
+foo_##T (IT *a, IT *b)  \
+{                       \
+  T res = { *a, *b };   \
+  return res;           \
+}
+
+FUNC2(v2di, long long)
+FUNC2(v2si, int)
+FUNC2(v2df, double)
+FUNC2(v2sf, float)
+
+#define FUNC4(T, IT)    \
+T                       \
+foo_##T (IT *a, IT *b, IT *c, IT *d)    \
+{                                       \
+  T res = { *a, *b, *c, *d };           \
+  return res;                           \
+}
+
+FUNC4(v4si, int)
+FUNC4(v4hi, short)
+FUNC4(v4sf, float)
+FUNC4(v4hf, __fp16)
+
+#define FUNC8(T, IT)    \
+T                       \
+foo_##T (IT *a, IT *b, IT *c, IT *d, IT *e, IT *f, IT *g, IT *h)        \
+{                                                                       \
+  T res = { *a, *b, *c, *d, *e, *f, *g, *h };                           \
+  return res;                                                           \
+}
+
+FUNC8(v8hi, short)
+FUNC8(v8qi, char)
+FUNC8(v8hf, __fp16)
+
+
+v16qi
+foo_v16qi (char *a, char *a1, char *a2, char *a3, char *a4, char *a5,
+     char *a6, char *a7, char *a8, char *a9, char *a10, char *a11, char *a12,
+     char *a13, char *a14, char *a15)
+{
+  v16qi res = { *a, *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8, *a9, *a10,
+               *a11, *a12, *a13, *a14, *a15 };
+  return res;
+}
+
+/* { dg-final { scan-assembler-not "ld1r\t" } } */
+/* { dg-final { scan-assembler-not "dup\t" } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */