tree-optimization/114249 - ICE with BB reduction vectorization

When we scrap the last def of an odd lane numbered BB reduction we can end up recording a pattern def which will later wreck code generation. The following puts this logic where it better belongs, avoiding this issue. PR tree-optimization/114249 * tree-vect-slp.cc (vect_build_slp_instance): Move making a BB reduction lane number even ... (vect_slp_check_for_roots): ... here to avoid leaking pattern defs. * gcc.dg/vect/bb-slp-pr114249.c: New testcase.
author: Richard Biener <rguenther@suse.de> 2024-03-06 09:25:15 +0100
committer: Richard Biener <rguenther@suse.de> 2024-03-06 10:27:34 +0100
commit: 3a910114fdb2aa76495c4c748acf6b9c7fbecc89 (patch)
tree: 330d70c0bb3ea1f436532e3d5eb1a10b84227fba
parent: 0249744a9fe0775c2c895727aeebec4c59fd5f95 (diff)
download: gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.zip
gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.tar.gz
gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.tar.bz2
2 files changed, 30 insertions, 10 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
new file mode 100644
index 0000000..64c93cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+enum { SEG_THIN_POOL } read_only;
+struct {
+  unsigned skip_block_zeroing;
+  unsigned ignore_discard;
+  unsigned no_discard_passdown;
+  unsigned error_if_no_space;
+} _thin_pool_emit_segment_line_seg;
+void dm_snprintf();
+void _emit_segment()
+{
+  int features =
+      (_thin_pool_emit_segment_line_seg.error_if_no_space ? 1 : 0) +
+      (read_only ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.ignore_discard ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.no_discard_passdown ? 1 : 0) +
+      (_thin_pool_emit_segment_line_seg.skip_block_zeroing ? 1 : 0);
+  dm_snprintf(features);
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 324400d..527b06c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3288,15 +3288,6 @@ vect_build_slp_instance (vec_info *vinfo,
 			 "  %G", scalar_stmts[i]->stmt);
     }
 
-  /* When a BB reduction doesn't have an even number of lanes
-     strip it down, treating the remaining lane as scalar.
-     ???  Selecting the optimal set of lanes to vectorize would be nice
-     but SLP build for all lanes will fail quickly because we think
-     we're going to need unrolling.  */
-  if (kind == slp_inst_kind_bb_reduc
-      && (scalar_stmts.length () & 1))
-    remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
-
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
@@ -7549,6 +7540,7 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 	      /* ???  For now do not allow mixing ops or externs/constants.  */
 	      bool invalid = false;
 	      unsigned remain_cnt = 0;
+	      unsigned last_idx = 0;
 	      for (unsigned i = 0; i < chain.length (); ++i)
 		{
 		  if (chain[i].code != code)
@@ -7563,7 +7555,13 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 						      (chain[i].op)->stmt)
 			  != chain[i].op))
 		    remain_cnt++;
+		  else
+		    last_idx = i;
 		}
+	      /* Make sure to have an even number of lanes as we later do
+		 all-or-nothing discovery, not trying to split further.  */
+	      if ((chain.length () - remain_cnt) & 1)
+		remain_cnt++;
 	      if (!invalid && chain.length () - remain_cnt > 1)
 		{
 		  vec<stmt_vec_info> stmts;
@@ -7576,7 +7574,9 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
 		      stmt_vec_info stmt_info;
 		      if (chain[i].dt == vect_internal_def
 			  && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)),
-			      gimple_get_lhs (stmt_info->stmt) == chain[i].op))
+			      gimple_get_lhs (stmt_info->stmt) == chain[i].op)
+			  && (i != last_idx
+			      || (stmts.length () & 1)))
 			stmts.quick_push (stmt_info);
 		      else
 			remain.quick_push (chain[i].op);
author	Richard Biener <rguenther@suse.de>	2024-03-06 09:25:15 +0100
committer	Richard Biener <rguenther@suse.de>	2024-03-06 10:27:34 +0100
commit	3a910114fdb2aa76495c4c748acf6b9c7fbecc89 (patch)
tree	330d70c0bb3ea1f436532e3d5eb1a10b84227fba
parent	0249744a9fe0775c2c895727aeebec4c59fd5f95 (diff)
download	gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.zip gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.tar.gz gcc-3a910114fdb2aa76495c4c748acf6b9c7fbecc89.tar.bz2