tree-optimization/101636 - CTOR vectorization ICE

The following fixes an ICE when vectorizing the defs of a CTOR results in a different vector type than expected. That can happen with AARCH64 SVE and a fixed vector length as noted in r10-5979 and on x86 with AVX512 mask CTORs and trying to re-vectorize using SSE as shown in this bug. The fix is simply to reject the vectorization when it didn't produce the desired type. 2022-02-23 Richard Biener <rguenther@suse.de> PR tree-optimization/101636 * tree-vect-slp.cc (vect_print_slp_tree): Dump the vector type of the node. (vect_slp_analyze_operations): Make sure the CTOR is vectorized with an expected type. (vectorize_slp_instance_root_stmt): Revert r10-5979 fix. * gcc.target/i386/pr101636.c: New testcase. * c-c++-common/torture/pr101636.c: Likewise.
author: Richard Biener <rguenther@suse.de> 2022-02-23 11:15:38 +0100
committer: Richard Biener <rguenther@suse.de> 2022-02-23 12:14:14 +0100
commit: 6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f (patch)
tree: 0b5090ba37907105ef5293e1ce630aa813f56e08
parent: c8cb5098c7854a1ed07e85c6165ef0c348d6df1d (diff)
download: gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.zip
gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.tar.gz
gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.tar.bz2
3 files changed, 135 insertions, 6 deletions
diff --git a/gcc/testsuite/c-c++-common/torture/pr101636.c b/gcc/testsuite/c-c++-common/torture/pr101636.c
new file mode 100644
index 0000000..aedaa1f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/pr101636.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */
+/* { dg-additional-options "-mavx512f" { target x86_64-*-* i?86-*-* } } */
+
+static inline int
+foo (int y, int a)
+{
+  return (y && a) ? a : 0;
+}
+
+void
+bar (int *__restrict a, int *__restrict d, int *__restrict e, int i)
+{
+  while (i < 1)
+    {
+      e[8] = e[7] = e[6] = e[5] = e[4] = e[3] = e[2] = e[1] = e[0]
+        = foo (d[8], a[8]);
+      e[9] = foo (d[9], a[9]);
+      e[10] = foo (d[0], a[0]);
+      e[11] = foo (d[1], a[1]);
+      e[12] = foo (d[12], a[12]);
+      e[13] = foo (d[13], a[13]);
+      e[14] = foo (d[4], a[4]);
+      e[15] = foo (d[15], a[15]);
+
+      a += 16;
+      e += 1;
+      i += 1;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101636.c b/gcc/testsuite/gcc.target/i386/pr101636.c
new file mode 100644
index 0000000..76399cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101636.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O -mavx512f -ftree-vectorize -fno-vect-cost-model" } */
+
+typedef _Bool sbool1 __attribute__((signed_bool_precision(1)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef v16si v16sim __attribute__((vector_mask));
+typedef long v16di __attribute__((vector_size(128)));
+
+void __GIMPLE (ssa,guessed_local(118111600),startwith("slp"))
+bar (int * restrict a, int * restrict d, int * restrict e)
+{
+  int * vectp_14;
+  v16si * vectp_e_13;
+  v16si vect_iftmp_12;
+  v16sim mask__75_11;
+  v16sim mask__74_10;
+  v16si vect__6_9;
+  v16si vect__1_8;
+  int * vectp_7;
+  v16si * vectp_a_6;
+  int _2;
+  int _5;
+  int _7;
+  int _9;
+  int _11;
+  int _13;
+  int _15;
+  int _17;
+  _Bool _41;
+  _Bool _49;
+  _Bool _53;
+  _Bool _57;
+  _Bool _61;
+  _Bool _65;
+  _Bool _69;
+  _Bool _73;
+  sbool1 _135;
+  sbool1 _136;
+  sbool1 _137;
+  sbool1 _138;
+  sbool1 _139;
+  sbool1 _140;
+  sbool1 _141;
+  sbool1 _142;
+  sbool1 _143;
+  sbool1 _144;
+  sbool1 _145;
+  sbool1 _146;
+  sbool1 _147;
+  sbool1 _148;
+  sbool1 _149;
+  sbool1 _150;
+  v16sim _151;
+
+  __BB(2,guessed_local(105119324)):
+  _2 = __MEM <int> (d_26(D) + _Literal (int * restrict) 32);
+  _73 = _2 != 0;
+  _5 = __MEM <int> (d_26(D) + _Literal (int * restrict) 36);
+  _69 = _5 != 0;
+  _7 = __MEM <int> (d_26(D));
+  _65 = _7 != 0;
+  _9 = __MEM <int> (d_26(D) + _Literal (int * restrict) 4);
+  _61 = _9 != 0;
+  _11 = __MEM <int> (d_26(D) + _Literal (int * restrict) 48);
+  _57 = _11 != 0;
+  _13 = __MEM <int> (d_26(D) + _Literal (int * restrict) 52);
+  _53 = _13 != 0;
+  _15 = __MEM <int> (d_26(D) + _Literal (int * restrict) 16);
+  _41 = _15 != 0;
+  _17 = __MEM <int> (d_26(D) + _Literal (int * restrict) 60);
+  _49 = _17 != 0;
+  _135 = _49 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _136 = _41 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _137 = _53 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _138 = _57 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _139 = _61 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _140 = _65 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _141 = _69 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _142 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _143 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _144 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _145 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _146 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _147 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _148 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _149 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _150 = _73 ? _Literal (sbool1) -1 : _Literal (sbool1) 0;
+  _151 = _Literal (v16sim) {_150, _149, _148, _147, _146, _145, _144, _143, _142, _141, _140, _139, _138, _137, _136, _135};
+  vect__1_8_154 = __MEM <v16si, 32> ((int * restrict)a_22(D));
+  vect_iftmp_12_158 = _151 ? vect__6_9_154 : _Literal (v16si) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  __MEM <v16si, 32> ((int * restrict)e_23(D)) = vect_iftmp_12_158;
+  return;
+}
+
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 341bd52..f9bbc87 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2468,7 +2468,7 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
 
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
-  dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
+  dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)",
 		   SLP_TREE_DEF_TYPE (node) == vect_external_def
 		   ? " (external)"
 		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
@@ -2476,6 +2476,9 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
 		      : ""), node,
 		   estimated_poly_value (node->max_nunits),
 					 SLP_TREE_REF_COUNT (node));
+  if (SLP_TREE_VECTYPE (node))
+    dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
+  dump_printf (metadata, "\n");
   if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
     {
       if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
@@ -4925,7 +4928,13 @@ vect_slp_analyze_operations (vec_info *vinfo)
 	  /* CTOR instances require vectorized defs for the SLP tree root.  */
 	  || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_ctor
 	      && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
-		  != vect_internal_def))
+		  != vect_internal_def
+		  /* Make sure we vectorized with the expected type.  */
+		  || !useless_type_conversion_p
+			(TREE_TYPE (TREE_TYPE (gimple_assign_rhs1
+					      (instance->root_stmts[0]->stmt))),
+			 TREE_TYPE (SLP_TREE_VECTYPE
+					    (SLP_INSTANCE_TREE (instance))))))
 	  /* Check we can vectorize the reduction.  */
 	  || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc
 	      && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)))
@@ -7373,10 +7382,6 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
 	  gimple *child_stmt = SLP_TREE_VEC_STMTS (node)[0];
 	  tree vect_lhs = gimple_get_lhs (child_stmt);
 	  tree root_lhs = gimple_get_lhs (instance->root_stmts[0]->stmt);
-	  if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
-					  TREE_TYPE (vect_lhs)))
-	    vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
-			       vect_lhs);
 	  rstmt = gimple_build_assign (root_lhs, vect_lhs);
 	}
       else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
author	Richard Biener <rguenther@suse.de>	2022-02-23 11:15:38 +0100
committer	Richard Biener <rguenther@suse.de>	2022-02-23 12:14:14 +0100
commit	6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f (patch)
tree	0b5090ba37907105ef5293e1ce630aa813f56e08
parent	c8cb5098c7854a1ed07e85c6165ef0c348d6df1d (diff)
download	gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.zip gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.tar.gz gcc-6e80c4f1ad9046b0a7c105660cc7b3dcae0fdb8f.tar.bz2