1 files changed, 601 insertions, 235 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index e42f327..d6f1ffc 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -666,27 +666,50 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
   unsigned i;
 
   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
-    if (STMT_VINFO_IN_PATTERN_P (first))
-      {
-	stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
-	while (next)
-	  {
-	    if (! STMT_VINFO_IN_PATTERN_P (next)
-		|| STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
-	      break;
-	    next = REDUC_GROUP_NEXT_ELEMENT (next);
-	  }
-	/* If not all stmt in the chain are patterns or if we failed
-	   to update STMT_VINFO_REDUC_IDX try to handle the chain
-	   without patterns.  */
-	if (! next
-	    && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
-	  {
-	    vect_fixup_reduc_chain (first);
-	    LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
-	      = STMT_VINFO_RELATED_STMT (first);
-	  }
-      }
+    {
+      stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
+      while (next)
+	{
+	  if ((STMT_VINFO_IN_PATTERN_P (next)
+	       != STMT_VINFO_IN_PATTERN_P (first))
+	      || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -1)
+	    break;
+	  next = REDUC_GROUP_NEXT_ELEMENT (next);
+	}
+      /* If all reduction chain members are well-formed patterns adjust
+	 the group to group the pattern stmts instead.  */
+      if (! next
+	  && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -1)
+	{
+	  if (STMT_VINFO_IN_PATTERN_P (first))
+	    {
+	      vect_fixup_reduc_chain (first);
+	      LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+		= STMT_VINFO_RELATED_STMT (first);
+	    }
+	}
+      /* If not all stmt in the chain are patterns or if we failed
+	 to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
+	 it as regular reduction instead.  */
+      else
+	{
+	  stmt_vec_info vinfo = first;
+	  stmt_vec_info last = NULL;
+	  while (vinfo)
+	    {
+	      next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
+	      REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
+	      REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
+	      last = vinfo;
+	      vinfo = next;
+	    }
+	  STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
+	    = vect_internal_def;
+	  loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
+	  LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (i);
+	  --i;
+	}
+    }
 }
 
 /* Function vect_get_loop_niters.
@@ -1804,6 +1827,19 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
 	}
     }
 
+  /* If using the "very cheap" model. reject cases in which we'd keep
+     a copy of the scalar code (even if we might be able to vectorize it).  */
+  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+      && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+	  || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+	  || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "some scalar iterations would need to be peeled\n");
+      return 0;
+    }
+
   int min_profitable_iters, min_profitable_estimate;
   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
 				      &min_profitable_estimate);
@@ -1862,6 +1898,20 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
       min_profitable_estimate = min_profitable_iters;
     }
 
+  /* If the vector loop needs multiple iterations to be beneficial then
+     things are probably too close to call, and the conservative thing
+     would be to stick with the scalar code.  */
+  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+      && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "one iteration of the vector loop would be"
+			 " more expensive than the equivalent number of"
+			 " iterations of the scalar loop\n");
+      return 0;
+    }
+
   HOST_WIDE_INT estimated_niter;
 
   /* If we are vectorizing an epilogue then we know the maximum number of
@@ -2275,6 +2325,9 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
 
       /* Optimize the SLP graph with the vectorization factor fixed.  */
       vect_optimize_slp (loop_vinfo);
+
+      /* Gather the loads reachable from the SLP graph entries.  */
+      vect_gather_slp_loads (loop_vinfo);
     }
 
   bool saved_can_use_partial_vectors_p
@@ -2342,6 +2395,79 @@ start_over:
 				       "unsupported SLP instances\n");
 	  goto again;
 	}
+
+      /* Check whether any load in ALL SLP instances is possibly permuted.  */
+      slp_tree load_node, slp_root;
+      unsigned i, x;
+      slp_instance instance;
+      bool can_use_lanes = true;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
+	{
+	  slp_root = SLP_INSTANCE_TREE (instance);
+	  int group_size = SLP_TREE_LANES (slp_root);
+	  tree vectype = SLP_TREE_VECTYPE (slp_root);
+	  bool loads_permuted = false;
+	  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+	    {
+	      if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
+		continue;
+	      unsigned j;
+	      stmt_vec_info load_info;
+	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+		if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
+		  {
+		    loads_permuted = true;
+		    break;
+		  }
+	    }
+
+	  /* If the loads and stores can be handled with load/store-lane
+	     instructions record it and move on to the next instance.  */
+	  if (loads_permuted
+	      && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
+	      && vect_store_lanes_supported (vectype, group_size, false))
+	    {
+	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+		{
+		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+		  /* Use SLP for strided accesses (or if we can't
+		     load-lanes).  */
+		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+		      || ! vect_load_lanes_supported
+			    (STMT_VINFO_VECTYPE (stmt_vinfo),
+			     DR_GROUP_SIZE (stmt_vinfo), false))
+		    break;
+		}
+
+	      can_use_lanes
+		= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
+
+	      if (can_use_lanes && dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "SLP instance %p can use load/store-lanes\n",
+				 instance);
+	    }
+	  else
+	    {
+	      can_use_lanes = false;
+	      break;
+	    }
+	}
+
+      /* If all SLP instances can use load/store-lanes abort SLP and try again
+	 with SLP disabled.  */
+      if (can_use_lanes)
+	{
+	  ok = opt_result::failure_at (vect_location,
+				       "Built SLP cancelled: can use "
+				       "load/store-lanes\n");
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "Built SLP cancelled: all SLP instances support "
+			     "load/store-lanes\n");
+	  goto again;
+	}
     }
 
   /* Dissolve SLP-only groups.  */
@@ -2572,9 +2698,13 @@ again:
 	  STMT_SLP_TYPE (stmt_info) = loop_vect;
 	  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
 	    {
+	      stmt_vec_info pattern_stmt_info
+		= STMT_VINFO_RELATED_STMT (stmt_info);
+	      if (STMT_VINFO_SLP_VECT_ONLY (pattern_stmt_info))
+		STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
+
 	      gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
-	      stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
-	      STMT_SLP_TYPE (stmt_info) = loop_vect;
+	      STMT_SLP_TYPE (pattern_stmt_info) = loop_vect;
 	      for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq);
 		   !gsi_end_p (pi); gsi_next (&pi))
 		STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
@@ -2643,43 +2773,56 @@ vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
 
   /* Check whether the (fractional) cost per scalar iteration is lower
      or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf.  */
-  poly_widest_int rel_new = (new_loop_vinfo->vec_inside_cost
-			     * poly_widest_int (old_vf));
-  poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
-			     * poly_widest_int (new_vf));
-  if (maybe_lt (rel_old, rel_new))
-    {
-      /* When old_loop_vinfo uses a variable vectorization factor,
-	 we know that it has a lower cost for at least one runtime VF.
-	 However, we don't know how likely that VF is.
-
-	 One option would be to compare the costs for the estimated VFs.
-	 The problem is that that can put too much pressure on the cost
-	 model.  E.g. if the estimated VF is also the lowest possible VF,
-	 and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
-	 for the estimated VF, we'd then choose new_loop_vinfo even
-	 though (a) new_loop_vinfo might not actually be better than
-	 old_loop_vinfo for that VF and (b) it would be significantly
-	 worse at larger VFs.
-
-	 Here we go for a hacky compromise: pick new_loop_vinfo if it is
-	 no more expensive than old_loop_vinfo even after doubling the
-	 estimated old_loop_vinfo VF.  For all but trivial loops, this
-	 ensures that we only pick new_loop_vinfo if it is significantly
-	 better than old_loop_vinfo at the estimated VF.  */
-      if (rel_new.is_constant ())
-	return false;
+  poly_int64 rel_new = new_loop_vinfo->vec_inside_cost * old_vf;
+  poly_int64 rel_old = old_loop_vinfo->vec_inside_cost * new_vf;
+
+  HOST_WIDE_INT est_rel_new_min
+    = estimated_poly_value (rel_new, POLY_VALUE_MIN);
+  HOST_WIDE_INT est_rel_new_max
+    = estimated_poly_value (rel_new, POLY_VALUE_MAX);
+
+  HOST_WIDE_INT est_rel_old_min
+    = estimated_poly_value (rel_old, POLY_VALUE_MIN);
+  HOST_WIDE_INT est_rel_old_max
+    = estimated_poly_value (rel_old, POLY_VALUE_MAX);
+
+  /* Check first if we can make out an unambigous total order from the minimum
+     and maximum estimates.  */
+  if (est_rel_new_min < est_rel_old_min
+      && est_rel_new_max < est_rel_old_max)
+    return true;
+  else if (est_rel_old_min < est_rel_new_min
+	   && est_rel_old_max < est_rel_new_max)
+    return false;
+  /* When old_loop_vinfo uses a variable vectorization factor,
+     we know that it has a lower cost for at least one runtime VF.
+     However, we don't know how likely that VF is.
+
+     One option would be to compare the costs for the estimated VFs.
+     The problem is that that can put too much pressure on the cost
+     model.  E.g. if the estimated VF is also the lowest possible VF,
+     and if old_loop_vinfo is 1 unit worse than new_loop_vinfo
+     for the estimated VF, we'd then choose new_loop_vinfo even
+     though (a) new_loop_vinfo might not actually be better than
+     old_loop_vinfo for that VF and (b) it would be significantly
+     worse at larger VFs.
+
+     Here we go for a hacky compromise: pick new_loop_vinfo if it is
+     no more expensive than old_loop_vinfo even after doubling the
+     estimated old_loop_vinfo VF.  For all but trivial loops, this
+     ensures that we only pick new_loop_vinfo if it is significantly
+     better than old_loop_vinfo at the estimated VF.  */
 
-      HOST_WIDE_INT new_estimated_vf = estimated_poly_value (new_vf);
-      HOST_WIDE_INT old_estimated_vf = estimated_poly_value (old_vf);
-      widest_int estimated_rel_new = (new_loop_vinfo->vec_inside_cost
-				      * widest_int (old_estimated_vf));
-      widest_int estimated_rel_old = (old_loop_vinfo->vec_inside_cost
-				      * widest_int (new_estimated_vf));
-      return estimated_rel_new * 2 <= estimated_rel_old;
+  if (est_rel_old_min != est_rel_new_min
+      || est_rel_old_max != est_rel_new_max)
+    {
+      HOST_WIDE_INT est_rel_new_likely
+	= estimated_poly_value (rel_new, POLY_VALUE_LIKELY);
+      HOST_WIDE_INT est_rel_old_likely
+	= estimated_poly_value (rel_old, POLY_VALUE_LIKELY);
+
+      return est_rel_new_likely * 2 <= est_rel_old_likely;
     }
-  if (known_lt (rel_new, rel_old))
-    return true;
 
   /* If there's nothing to choose between the loop bodies, see whether
      there's a difference in the prologue and epilogue costs.  */
@@ -3230,14 +3373,17 @@ pop:
 	  fail = true;
 	  break;
 	}
-      /* Check there's only a single stmt the op is used on inside
-         of the loop.  */
+      /* Check there's only a single stmt the op is used on.  For the
+	 not value-changing tail and the last stmt allow out-of-loop uses.
+	 ???  We could relax this and handle arbitrary live stmts by
+	 forcing a scalar epilogue for example.  */
       imm_use_iterator imm_iter;
       gimple *op_use_stmt;
       unsigned cnt = 0;
       FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
 	if (!is_gimple_debug (op_use_stmt)
-	    && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+	    && (*code != ERROR_MARK
+		|| flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
 	  {
 	    /* We want to allow x + x but not x < 1 ? x : 2.  */
 	    if (is_gimple_assign (op_use_stmt)
@@ -4420,34 +4566,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
 }
 
 
-/* Function vect_model_induction_cost.
-
-   Models cost for induction operations.  */
-
-static void
-vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
-			   stmt_vector_for_cost *cost_vec)
-{
-  unsigned inside_cost, prologue_cost;
-
-  if (PURE_SLP_STMT (stmt_info))
-    return;
-
-  /* loop cost for vec_loop.  */
-  inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
-				  stmt_info, 0, vect_body);
-
-  /* prologue cost for vec_init and vec_step.  */
-  prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
-				    stmt_info, 0, vect_prologue);
-
-  if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location,
-                     "vect_model_induction_cost: inside_cost = %d, "
-                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
-}
-
-
 
 /* Function get_initial_def_for_reduction
 
@@ -5160,8 +5278,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
       int scalar_precision
 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
-      tree vectype_unsigned = build_vector_type
-	(scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
+      tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
+						vectype);
 
       /* First we need to create a vector (ZERO_VEC) of zeros and another
 	 vector (MAX_INDEX_VEC) filled with the last matching index, which we
@@ -6313,9 +6431,28 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
     {
       if (is_a <gphi *> (stmt_info->stmt))
-	/* Analysis for double-reduction is done on the outer
-	   loop PHI, nested cycles have no further restrictions.  */
-	STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+	{
+	  if (slp_node)
+	    {
+	      /* We eventually need to set a vector type on invariant
+		 arguments.  */
+	      unsigned j;
+	      slp_tree child;
+	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+		if (!vect_maybe_update_slp_op_vectype
+		       (child, SLP_TREE_VECTYPE (slp_node)))
+		  {
+		    if (dump_enabled_p ())
+		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				       "incompatible vector types for "
+				       "invariants\n");
+		    return false;
+		  }
+	    }
+	  /* Analysis for double-reduction is done on the outer
+	     loop PHI, nested cycles have no further restrictions.  */
+	  STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+	}
       else
 	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
@@ -6805,8 +6942,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       int scalar_precision
 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
       cr_index_scalar_type = make_unsigned_type (scalar_precision);
-      cr_index_vector_type = build_vector_type (cr_index_scalar_type,
-						nunits_out);
+      cr_index_vector_type = get_same_sized_vectype (cr_index_scalar_type,
+						vectype_out);
 
       if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
 					  OPTIMIZE_FOR_SPEED))
@@ -7377,15 +7514,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
   if (slp_node)
     {
       vec_initial_defs.reserve (vec_num);
-      gcc_assert (slp_node == slp_node_instance->reduc_phis);
-      stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
-      tree neutral_op
-	= neutral_op_for_slp_reduction (slp_node, vectype_out,
-					STMT_VINFO_REDUC_CODE (reduc_info),
-					first != NULL);
-      get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
-				      &vec_initial_defs, vec_num,
-				      first != NULL, neutral_op);
+      if (nested_cycle)
+	{
+	  unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+	  vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+			     &vec_initial_defs);
+	}
+      else
+	{
+	  gcc_assert (slp_node == slp_node_instance->reduc_phis);
+	  stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
+	  tree neutral_op
+	      = neutral_op_for_slp_reduction (slp_node, vectype_out,
+					      STMT_VINFO_REDUC_CODE (reduc_info),
+					      first != NULL);
+	  get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
+					  &vec_initial_defs, vec_num,
+					  first != NULL, neutral_op);
+	}
     }
   else
     {
@@ -7491,6 +7637,17 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
 
   if (!vec_stmt) /* transformation not required.  */
     {
+      /* Deal with copies from externs or constants that disguise as
+	 loop-closed PHI nodes (PR97886).  */
+      if (slp_node
+	  && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+						SLP_TREE_VECTYPE (slp_node)))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "incompatible vector types for invariants\n");
+	  return false;
+	}
       STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
       return true;
     }
@@ -7520,6 +7677,81 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
   return true;
 }
 
+/* Vectorizes PHIs.  */
+
+bool
+vectorizable_phi (vec_info *,
+		  stmt_vec_info stmt_info, gimple **vec_stmt,
+		  slp_tree slp_node, stmt_vector_for_cost *cost_vec)
+{
+  if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
+    return false;
+
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+    return false;
+
+  tree vectype = SLP_TREE_VECTYPE (slp_node);
+
+  if (!vec_stmt) /* transformation not required.  */
+    {
+      slp_tree child;
+      unsigned i;
+      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
+	if (!child)
+	  {
+	    if (dump_enabled_p ())
+	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			       "PHI node with unvectorized backedge def\n");
+	    return false;
+	  }
+	else if (!vect_maybe_update_slp_op_vectype (child, vectype))
+	  {
+	    if (dump_enabled_p ())
+	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			       "incompatible vector types for invariants\n");
+	    return false;
+	  }
+      record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+			vector_stmt, stmt_info, vectype, 0, vect_body);
+      STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+      return true;
+    }
+
+  tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+  basic_block bb = gimple_bb (stmt_info->stmt);
+  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  auto_vec<gphi *> new_phis;
+  for (unsigned i = 0; i < gimple_phi_num_args (stmt_info->stmt); ++i)
+    {
+      slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
+
+      /* Skip not yet vectorized defs.  */
+      if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+	  && SLP_TREE_VEC_STMTS (child).is_empty ())
+	continue;
+
+      auto_vec<tree> vec_oprnds;
+      vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
+      if (!new_phis.exists ())
+	{
+	  new_phis.create (vec_oprnds.length ());
+	  for (unsigned j = 0; j < vec_oprnds.length (); j++)
+	    {
+	      /* Create the vectorized LC PHI node.  */
+	      new_phis.quick_push (create_phi_node (vec_dest, bb));
+	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phis[j]);
+	    }
+	}
+      edge e = gimple_phi_arg_edge (as_a <gphi *> (stmt_info->stmt), i);
+      for (unsigned j = 0; j < vec_oprnds.length (); j++)
+	add_phi_arg (new_phis[j], vec_oprnds[j], e, UNKNOWN_LOCATION);
+    }
+  /* We should have at least one already vectorized child.  */
+  gcc_assert (new_phis.exists ());
+
+  return true;
+}
+
 
 /* Function vect_min_worthwhile_factor.
 
@@ -7590,7 +7822,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   unsigned i;
   tree expr;
-  gimple_seq stmts;
   gimple_stmt_iterator si;
 
   gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
@@ -7630,10 +7861,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 	  return false;
 	}
 
-      /* FORNOW: outer loop induction with SLP not supported.  */
-      if (STMT_SLP_TYPE (stmt_info))
-	return false;
-
       exit_phi = NULL;
       latch_e = loop_latch_edge (loop->inner);
       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
@@ -7672,7 +7899,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
   if (slp_node && !nunits.is_constant ())
     {
-      /* The current SLP code creates the initial value element-by-element.  */
+      /* The current SLP code creates the step value element-by-element.  */
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			 "SLP induction not supported for variable-length"
@@ -7682,9 +7909,50 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 
   if (!vec_stmt) /* transformation not required.  */
     {
+      unsigned inside_cost = 0, prologue_cost = 0;
+      if (slp_node)
+	{
+	  /* We eventually need to set a vector type on invariant
+	     arguments.  */
+	  unsigned j;
+	  slp_tree child;
+	  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+	    if (!vect_maybe_update_slp_op_vectype
+		(child, SLP_TREE_VECTYPE (slp_node)))
+	      {
+		if (dump_enabled_p ())
+		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				   "incompatible vector types for "
+				   "invariants\n");
+		return false;
+	      }
+	  /* loop cost for vec_loop.  */
+	  inside_cost
+	    = record_stmt_cost (cost_vec,
+				SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+				vector_stmt, stmt_info, 0, vect_body);
+	  /* prologue cost for vec_init (if not nested) and step.  */
+	  prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+					    scalar_to_vec,
+					    stmt_info, 0, vect_prologue);
+	}
+      else /* if (!slp_node) */
+	{
+	  /* loop cost for vec_loop.  */
+	  inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+					  stmt_info, 0, vect_body);
+	  /* prologue cost for vec_init and vec_step.  */
+	  prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+					    stmt_info, 0, vect_prologue);
+	}
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_model_induction_cost: inside_cost = %d, "
+			 "prologue_cost = %d .\n", inside_cost,
+			 prologue_cost);
+
       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_induction");
-      vect_model_induction_cost (stmt_info, ncopies, cost_vec);
       return true;
     }
 
@@ -7703,164 +7971,195 @@ vectorizable_induction (loop_vec_info loop_vinfo,
   tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
 
   pe = loop_preheader_edge (iv_loop);
-  init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
-				     loop_preheader_edge (iv_loop));
-
-  stmts = NULL;
-  if (!nested_in_vect_loop)
-    {
-      /* Convert the initial value to the IV update type.  */
-      tree new_type = TREE_TYPE (step_expr);
-      init_expr = gimple_convert (&stmts, new_type, init_expr);
-
-      /* If we are using the loop mask to "peel" for alignment then we need
-	 to adjust the start value here.  */
-      tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
-      if (skip_niters != NULL_TREE)
-	{
-	  if (FLOAT_TYPE_P (vectype))
-	    skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
-					skip_niters);
-	  else
-	    skip_niters = gimple_convert (&stmts, new_type, skip_niters);
-	  tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
-					 skip_niters, step_expr);
-	  init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
-				    init_expr, skip_step);
-	}
-    }
-
-  if (stmts)
-    {
-      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-      gcc_assert (!new_bb);
-    }
-
   /* Find the first insertion point in the BB.  */
   basic_block bb = gimple_bb (phi);
   si = gsi_after_labels (bb);
 
   /* For SLP induction we have to generate several IVs as for example
-     with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
-     [i + 2*S, i + 3*S, i + 3*S, i + 3*S].  The step is the same uniform
-     [VF*S, VF*S, VF*S, VF*S] for all.  */
+     with group size 3 we need
+       [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
+       [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2].  */
   if (slp_node)
     {
       /* Enforced above.  */
       unsigned int const_nunits = nunits.to_constant ();
 
-      /* Generate [VF*S, VF*S, ... ].  */
-      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+      /* The initial values are vectorized, but any lanes > group_size
+	 need adjustment.  */
+      slp_tree init_node
+	= SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
+
+      /* Gather steps.  Since we do not vectorize inductions as
+	 cycles we have to reconstruct the step from SCEV data.  */
+      unsigned group_size = SLP_TREE_LANES (slp_node);
+      tree *steps = XALLOCAVEC (tree, group_size);
+      tree *inits = XALLOCAVEC (tree, group_size);
+      stmt_vec_info phi_info;
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
 	{
-	  expr = build_int_cst (integer_type_node, vf);
-	  expr = fold_convert (TREE_TYPE (step_expr), expr);
+	  steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+	  if (!init_node)
+	    inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+					   pe->dest_idx);
 	}
-      else
-	expr = build_int_cst (TREE_TYPE (step_expr), vf);
-      new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
-			      expr, step_expr);
-      if (! CONSTANT_CLASS_P (new_name))
-	new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
-				     TREE_TYPE (step_expr), NULL);
-      new_vec = build_vector_from_val (step_vectype, new_name);
-      vec_step = vect_init_vector (loop_vinfo, stmt_info,
-				   new_vec, step_vectype, NULL);
 
       /* Now generate the IVs.  */
-      unsigned group_size = SLP_TREE_LANES (slp_node);
       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-      unsigned elts = const_nunits * nvects;
-      /* Compute the number of distinct IVs we need.  First reduce
-	 group_size if it is a multiple of const_nunits so we get
-	 one IV for a group_size of 4 but const_nunits 2.  */
-      unsigned group_sizep = group_size;
-      if (group_sizep % const_nunits == 0)
-	group_sizep = group_sizep / const_nunits;
-      unsigned nivs = least_common_multiple (group_sizep,
-					     const_nunits) / const_nunits;
-      gcc_assert (elts % group_size == 0);
-      tree elt = init_expr;
+      gcc_assert ((const_nunits * nvects) % group_size == 0);
+      unsigned nivs;
+      if (nested_in_vect_loop)
+	nivs = nvects;
+      else
+	{
+	  /* Compute the number of distinct IVs we need.  First reduce
+	     group_size if it is a multiple of const_nunits so we get
+	     one IV for a group_size of 4 but const_nunits 2.  */
+	  unsigned group_sizep = group_size;
+	  if (group_sizep % const_nunits == 0)
+	    group_sizep = group_sizep / const_nunits;
+	  nivs = least_common_multiple (group_sizep,
+					const_nunits) / const_nunits;
+	}
+      tree stept = TREE_TYPE (step_vectype);
+      tree lupdate_mul = NULL_TREE;
+      if (!nested_in_vect_loop)
+	{
+	  /* The number of iterations covered in one vector iteration.  */
+	  unsigned lup_mul = (nvects * const_nunits) / group_size;
+	  lupdate_mul
+	    = build_vector_from_val (step_vectype,
+				     SCALAR_FLOAT_TYPE_P (stept)
+				     ? build_real_from_wide (stept, lup_mul,
+							     UNSIGNED)
+				     : build_int_cstu (stept, lup_mul));
+	}
+      tree peel_mul = NULL_TREE;
+      gimple_seq init_stmts = NULL;
+      if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+	{
+	  if (SCALAR_FLOAT_TYPE_P (stept))
+	    peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
+				     LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+	  else
+	    peel_mul = gimple_convert (&init_stmts, stept,
+				       LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+	  peel_mul = gimple_build_vector_from_val (&init_stmts,
+						   step_vectype, peel_mul);
+	}
       unsigned ivn;
+      auto_vec<tree> vec_steps;
       for (ivn = 0; ivn < nivs; ++ivn)
 	{
-	  tree_vector_builder elts (step_vectype, const_nunits, 1);
-	  stmts = NULL;
+	  tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+	  tree_vector_builder init_elts (vectype, const_nunits, 1);
+	  tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
 	  for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
 	    {
-	      if (ivn*const_nunits + eltn >= group_size
-		  && (ivn * const_nunits + eltn) % group_size == 0)
-		elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
-				    elt, step_expr);
-	      elts.quick_push (elt);
-	    }
-	  vec_init = gimple_build_vector (&stmts, &elts);
-	  vec_init = gimple_convert (&stmts, vectype, vec_init);
-	  if (stmts)
-	    {
-	      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
-	      gcc_assert (!new_bb);
+	      /* The scalar steps of the IVs.  */
+	      tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+	      elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
+	      step_elts.quick_push (elt);
+	      if (!init_node)
+		{
+		  /* The scalar inits of the IVs if not vectorized.  */
+		  elt = inits[(ivn*const_nunits + eltn) % group_size];
+		  if (!useless_type_conversion_p (TREE_TYPE (vectype),
+						  TREE_TYPE (elt)))
+		    elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+					TREE_TYPE (vectype), elt);
+		  init_elts.quick_push (elt);
+		}
+	      /* The number of steps to add to the initial values.  */
+	      unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+	      mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+				   ? build_real_from_wide (stept,
+							   mul_elt, UNSIGNED)
+				   : build_int_cstu (stept, mul_elt));
 	    }
+	  vec_step = gimple_build_vector (&init_stmts, &step_elts);
+	  vec_steps.safe_push (vec_step);
+	  tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+	  if (peel_mul)
+	    step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+				     step_mul, peel_mul);
+	  if (!init_node)
+	    vec_init = gimple_build_vector (&init_stmts, &init_elts);
 
 	  /* Create the induction-phi that defines the induction-operand.  */
-	  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+	  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+					    "vec_iv_");
 	  induction_phi = create_phi_node (vec_dest, iv_loop->header);
 	  induc_def = PHI_RESULT (induction_phi);
 
 	  /* Create the iv update inside the loop  */
+	  tree up = vec_step;
+	  if (lupdate_mul)
+	    up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+			       vec_step, lupdate_mul);
 	  gimple_seq stmts = NULL;
 	  vec_def = gimple_convert (&stmts, step_vectype, induc_def);
 	  vec_def = gimple_build (&stmts,
-				  PLUS_EXPR, step_vectype, vec_def, vec_step);
+				  PLUS_EXPR, step_vectype, vec_def, up);
 	  vec_def = gimple_convert (&stmts, vectype, vec_def);
 	  gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+	  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+		       UNKNOWN_LOCATION);
+
+	  if (init_node)
+	    vec_init = vect_get_slp_vect_def (init_node, ivn);
+	  if (!nested_in_vect_loop
+	      && !integer_zerop (step_mul))
+	    {
+	      vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
+	      up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+				 vec_step, step_mul);
+	      vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+				      vec_def, up);
+	      vec_init = gimple_convert (&init_stmts, vectype, vec_def);
+	    }
 
 	  /* Set the arguments of the phi node:  */
 	  add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
-	  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
-		       UNKNOWN_LOCATION);
 
 	  SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
 	}
-      /* Fill up to the number of vectors we need for the whole group.  */
-      nivs = least_common_multiple (group_size,
-				    const_nunits) / const_nunits;
-      for (; ivn < nivs; ++ivn)
-	SLP_TREE_VEC_STMTS (slp_node)
-	  .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+      if (!nested_in_vect_loop)
+	{
+	  /* Fill up to the number of vectors we need for the whole group.  */
+	  nivs = least_common_multiple (group_size,
+					const_nunits) / const_nunits;
+	  for (; ivn < nivs; ++ivn)
+	    {
+	      SLP_TREE_VEC_STMTS (slp_node)
+		.quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
+	      vec_steps.safe_push (vec_steps[0]);
+	    }
+	}
 
-      /* Re-use IVs when we can.  */
+      /* Re-use IVs when we can.  We are generating further vector
+	 stmts by adding VF' * stride to the IVs generated above.  */
       if (ivn < nvects)
 	{
 	  unsigned vfp
 	    = least_common_multiple (group_size, const_nunits) / group_size;
-	  /* Generate [VF'*S, VF'*S, ... ].  */
-	  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
-	    {
-	      expr = build_int_cst (integer_type_node, vfp);
-	      expr = fold_convert (TREE_TYPE (step_expr), expr);
-	    }
-	  else
-	    expr = build_int_cst (TREE_TYPE (step_expr), vfp);
-	  new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
-				  expr, step_expr);
-	  if (! CONSTANT_CLASS_P (new_name))
-	    new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
-					 TREE_TYPE (step_expr), NULL);
-	  new_vec = build_vector_from_val (step_vectype, new_name);
-	  vec_step = vect_init_vector (loop_vinfo, stmt_info, new_vec,
-				       step_vectype, NULL);
+	  tree lupdate_mul
+	    = build_vector_from_val (step_vectype,
+				     SCALAR_FLOAT_TYPE_P (stept)
+				     ? build_real_from_wide (stept,
+							     vfp, UNSIGNED)
+				     : build_int_cstu (stept, vfp));
 	  for (; ivn < nvects; ++ivn)
 	    {
 	      gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
-	      tree def;
-	      if (gimple_code (iv) == GIMPLE_PHI)
-		def = gimple_phi_result (iv);
-	      else
-		def = gimple_assign_lhs (iv);
+	      tree def = gimple_get_lhs (iv);
+	      if (ivn < 2*nivs)
+		vec_steps[ivn - nivs]
+		  = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+				  vec_steps[ivn - nivs], lupdate_mul);
 	      gimple_seq stmts = NULL;
 	      def = gimple_convert (&stmts, step_vectype, def);
-	      def = gimple_build (&stmts,
-				  PLUS_EXPR, step_vectype, def, vec_step);
+	      def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+				  def, vec_steps[ivn % nivs]);
 	      def = gimple_convert (&stmts, vectype, def);
 	      if (gimple_code (iv) == GIMPLE_PHI)
 		gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
@@ -7874,9 +8173,45 @@ vectorizable_induction (loop_vec_info loop_vinfo,
 	    }
 	}
 
+      new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
+      gcc_assert (!new_bb);
+
       return true;
     }
 
+  init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
+				     loop_preheader_edge (iv_loop));
+
+  gimple_seq stmts = NULL;
+  if (!nested_in_vect_loop)
+    {
+      /* Convert the initial value to the IV update type.  */
+      tree new_type = TREE_TYPE (step_expr);
+      init_expr = gimple_convert (&stmts, new_type, init_expr);
+
+      /* If we are using the loop mask to "peel" for alignment then we need
+	 to adjust the start value here.  */
+      tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+      if (skip_niters != NULL_TREE)
+	{
+	  if (FLOAT_TYPE_P (vectype))
+	    skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
+					skip_niters);
+	  else
+	    skip_niters = gimple_convert (&stmts, new_type, skip_niters);
+	  tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
+					 skip_niters, step_expr);
+	  init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
+				    init_expr, skip_step);
+	}
+    }
+
+  if (stmts)
+    {
+      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+      gcc_assert (!new_bb);
+    }
+
   /* Create the vector that holds the initial_value of the induction.  */
   if (nested_in_vect_loop)
     {
@@ -8376,8 +8711,19 @@ vectorizable_live_operation (vec_info *vinfo,
       gimple_seq stmts = NULL;
       new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
 				       &stmts, true, NULL_TREE);
-
-      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+      if (TREE_CODE (new_tree) == SSA_NAME
+	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+	SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) = 1;
+      if (is_a <gphi *> (vec_stmt))
+	{
+	  gimple_stmt_iterator si = gsi_after_labels (gimple_bb (vec_stmt));
+	  gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+	}
+      else
+	{
+	  gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
+	  gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
+	}
 
       /* Replace use of lhs with newly computed result.  If the use stmt is a
 	 single arg PHI, just replace all uses of PHI result.  It's necessary
@@ -8414,6 +8760,24 @@ vectorizable_live_operation (vec_info *vinfo,
 				   "def\n");
 		continue;
 	      }
+	    /* ???  It can also happen that we end up pulling a def into
+	       a loop where replacing out-of-loop uses would require
+	       a new LC SSA PHI node.  Retain the original scalar in
+	       those cases as well.  PR98064.  */
+	    if (TREE_CODE (new_tree) == SSA_NAME
+		&& !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+		&& (gimple_bb (use_stmt)->loop_father
+		    != gimple_bb (vec_stmt)->loop_father)
+		&& !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father,
+					gimple_bb (use_stmt)->loop_father))
+	      {
+		if (dump_enabled_p ())
+		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				   "Using original scalar computation for "
+				   "live lane because there is an out-of-loop "
+				   "definition for it\n");
+		continue;
+	      }
 	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 	      SET_USE (use_p, new_tree);
 	    update_stmt (use_stmt);
@@ -8734,7 +9098,7 @@ maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
    When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
    stmt_vec_info.  */
 
-static void
+static bool
 vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 			  gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
 {
@@ -8750,7 +9114,7 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info)
       && !STMT_VINFO_LIVE_P (stmt_info))
-    return;
+    return false;
 
   if (STMT_VINFO_VECTYPE (stmt_info))
     {
@@ -8767,13 +9131,15 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
   /* Pure SLP statements have already been vectorized.  We still need
      to apply loop vectorization to hybrid SLP statements.  */
   if (PURE_SLP_STMT (stmt_info))
-    return;
+    return false;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
 
   if (vect_transform_stmt (loop_vinfo, stmt_info, gsi, NULL, NULL))
     *seen_store = stmt_info;
+
+  return true;
 }
 
 /* Helper function to pass to simplify_replace_tree to enable replacing tree's
@@ -9199,17 +9565,17 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
 			}
 		      stmt_vec_info pat_stmt_info
 			= STMT_VINFO_RELATED_STMT (stmt_info);
-		      vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
-						&seen_store);
-		      maybe_set_vectorized_backedge_value (loop_vinfo,
-							   pat_stmt_info);
+		      if (vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,
+						    &si, &seen_store))
+			maybe_set_vectorized_backedge_value (loop_vinfo,
+							     pat_stmt_info);
 		    }
 		  else
 		    {
-		      vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
-						&seen_store);
-		      maybe_set_vectorized_backedge_value (loop_vinfo,
-							   stmt_info);
+		      if (vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
+						    &seen_store))
+			maybe_set_vectorized_backedge_value (loop_vinfo,
+							     stmt_info);
 		    }
 		}
 	      gsi_next (&si);