Merge branch 'master' into gfortran-testdevel/gfortran-test

author: Jerry DeLisle <jvdelisle@gcc.gnu.org> 2025-09-02 15:58:26 -0700
committer: Jerry DeLisle <jvdelisle@gcc.gnu.org> 2025-09-02 15:58:26 -0700
commit: 071b4126c613881f4cb25b4e5c39032964827f88 (patch)
tree: 7ed805786566918630d1d617b1ed8f7310f5fd8e /gcc/tree-vect-stmts.cc
parent: 845d23f3ea08ba873197c275a8857eee7edad996 (diff)
parent: caa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff)
download: gcc-devel/gfortran-test.zip
gcc-devel/gfortran-test.tar.gz
gcc-devel/gfortran-test.tar.bz2
1 files changed, 590 insertions, 590 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index aa2657a..5b1f291 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -386,6 +386,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
 	  dump_printf_loc (MSG_NOTE, vect_location,
                            "vec_stmt_relevant_p: stmt has vdefs.\n");
 	*relevant = vect_used_in_scope;
+	if (! STMT_VINFO_DATA_REF (stmt_info)
+	    && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
+	  LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
       }
 
   /* uses outside the loop.  */
@@ -414,7 +417,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
 
   /* Check if it's a not live PHI and multiple exits.  In this case
      there will be a usage later on after peeling which is needed for the
-     alternate exit.  */
+     alternate exit.
+     ???  Unless the PHI was marked live because of early
+     break, which also needs the latch def live and vectorized.  */
   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
       && is_a <gphi *> (stmt)
       && gimple_bb (stmt) == LOOP_VINFO_LOOP (loop_vinfo)->header
@@ -652,14 +657,15 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
     }
   /* We are also not interested in uses on loop PHI backedges that are
      inductions.  Otherwise we'll needlessly vectorize the IV increment
-     and cause hybrid SLP for SLP inductions.  Unless the PHI is live
-     of course.  */
+     and cause hybrid SLP for SLP inductions.  */
   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
 	   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
-	   && ! STMT_VINFO_LIVE_P (stmt_vinfo)
 	   && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
 				      loop_latch_edge (bb->loop_father))
-	       == use))
+	       == use)
+	   && (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+	       || (gimple_bb (stmt_vinfo->stmt)
+		   != LOOP_VINFO_LOOP (loop_vinfo)->header)))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@@ -667,7 +673,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
       return opt_result::success ();
     }
 
-
   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
   return opt_result::success ();
 }
@@ -719,16 +724,28 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
 			     phi_info->stmt);
 
 	  if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
-	    vect_mark_relevant (&worklist, phi_info, relevant, live_p);
+	    {
+	      if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
+		return opt_result::failure_at
+		  (*si, "not vectorized: unhandled relevant PHI: %G", *si);
+	      vect_mark_relevant (&worklist, phi_info, relevant, live_p);
+	    }
 	}
-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+      for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si))
 	{
-	  if (is_gimple_debug (gsi_stmt (si)))
+	  gimple *stmt = gsi_stmt (si);
+	  if (is_gimple_debug (stmt))
 	    continue;
-	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
+	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
 	  if (dump_enabled_p ())
 	      dump_printf_loc (MSG_NOTE, vect_location,
-			       "init: stmt relevant? %G", stmt_info->stmt);
+			       "init: stmt relevant? %G", stmt);
+
+	  if (gimple_get_lhs (stmt) == NULL_TREE
+	      && !is_a <gcond *> (stmt)
+	      && !is_a <gcall *> (stmt))
+	    return opt_result::failure_at
+		(stmt, "not vectorized: irregular stmt: %G", stmt);
 
 	  if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
 	    vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
@@ -874,7 +891,9 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
 	{
 	  gather_scatter_info gs_info;
-	  if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
+	  if (!vect_check_gather_scatter (stmt_vinfo,
+					  STMT_VINFO_VECTYPE (stmt_vinfo),
+					  loop_vinfo, &gs_info))
 	    gcc_unreachable ();
 	  opt_result res
 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
@@ -926,8 +945,7 @@ vect_model_simple_cost (vec_info *, int n, slp_tree node,
    is true the stmt is doing widening arithmetic.  */
 
 static void
-vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-				    enum vect_def_type *dt,
+vect_model_promotion_demotion_cost (slp_tree slp_node,
 				    unsigned int ncopies, int pwr,
 				    stmt_vector_for_cost *cost_vec,
 				    bool widen_arith)
@@ -940,16 +958,10 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
       inside_cost += record_stmt_cost (cost_vec, ncopies,
 				       widen_arith
 				       ? vector_stmt : vec_promote_demote,
-				       stmt_info, 0, vect_body);
+				       slp_node, 0, vect_body);
       ncopies *= 2;
     }
 
-  /* FORNOW: Assuming maximum 2 args per stmts.  */
-  for (i = 0; i < 2; i++)
-    if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
-      prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
-					 stmt_info, 0, vect_prologue);
-
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
@@ -1420,16 +1432,33 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
 				      slp_tree slp_node,
 				      vec_load_store_type vls_type,
 				      int group_size,
-				      vect_memory_access_type
-				      memory_access_type,
-				      gather_scatter_info *gs_info,
-				      tree scalar_mask,
+				      vect_load_store_data *ls,
+				      slp_tree mask_node,
 				      vec<int> *elsvals = nullptr)
 {
+  vect_memory_access_type memory_access_type = ls->memory_access_type;
+
   /* Invariant loads need no special support.  */
   if (memory_access_type == VMAT_INVARIANT)
     return;
 
+  /* Figure whether the mask is uniform.  scalar_mask is used to
+     populate the scalar_cond_masked_set.  */
+  tree scalar_mask = NULL_TREE;
+  if (mask_node)
+    for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i)
+      {
+	tree def = vect_get_slp_scalar_def (mask_node, i);
+	if (!def
+	    || (scalar_mask && def != scalar_mask))
+	  {
+	    scalar_mask = NULL;
+	    break;
+	  }
+	else
+	  scalar_mask = def;
+      }
+
   unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype);
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
@@ -1459,7 +1488,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
       return;
     }
 
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (mat_gather_scatter_p (memory_access_type))
     {
       internal_fn ifn = (is_load
 			 ? IFN_MASK_GATHER_LOAD
@@ -1467,17 +1496,22 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
       internal_fn len_ifn = (is_load
 			     ? IFN_MASK_LEN_GATHER_LOAD
 			     : IFN_MASK_LEN_SCATTER_STORE);
+      stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
+      tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
+			  ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
+			  : ls->strided_offset_vectype);
+      tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
+      int scale = SLP_TREE_GS_SCALE (slp_node);
       if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
-						  gs_info->memory_type,
-						  gs_info->offset_vectype,
-						  gs_info->scale,
+						  memory_type,
+						  off_vectype, scale,
 						  elsvals))
 	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
       else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
-						       gs_info->memory_type,
-						       gs_info->offset_vectype,
-						       gs_info->scale,
-						       elsvals))
+						       memory_type,
+						       off_vectype, scale,
+						       elsvals)
+	       || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
 	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
 			       scalar_mask);
       else
@@ -1515,7 +1549,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
     }
 
   /* We might load more scalars than we need for permuting SLP loads.
-     We checked in get_group_load_store_type that the extra elements
+     We checked in get_load_store_type that the extra elements
      don't leak into a new vector.  */
   auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
   {
@@ -1676,7 +1710,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
 	 get_object_alignment (DR_REF (dr)));
       gs_info->element_type = TREE_TYPE (vectype);
       gs_info->offset = fold_convert (offset_type, step);
-      gs_info->offset_dt = vect_constant_def;
       gs_info->scale = scale;
       gs_info->memory_type = memory_type;
       return true;
@@ -1703,19 +1736,33 @@ static bool
 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
 				    loop_vec_info loop_vinfo, bool masked_p,
 				    gather_scatter_info *gs_info,
-				    vec<int> *elsvals)
+				    vec<int> *elsvals,
+				    unsigned int group_size,
+				    bool single_element_p)
 {
-  if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
+  if (!vect_check_gather_scatter (stmt_info, vectype,
+				  loop_vinfo, gs_info, elsvals)
       || gs_info->ifn == IFN_LAST)
-    return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
-						masked_p, gs_info, elsvals);
+    {
+      if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
+						masked_p, gs_info, elsvals))
+	return false;
+    }
+  else
+    {
+      tree old_offset_type = TREE_TYPE (gs_info->offset);
+      tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
 
-  tree old_offset_type = TREE_TYPE (gs_info->offset);
-  tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
+      gcc_assert (TYPE_PRECISION (new_offset_type)
+		  >= TYPE_PRECISION (old_offset_type));
+      gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+    }
 
-  gcc_assert (TYPE_PRECISION (new_offset_type)
-	      >= TYPE_PRECISION (old_offset_type));
-  gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+  if (!single_element_p
+      && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
+						   gs_info->scale,
+						   group_size))
+    return false;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -1903,38 +1950,46 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
   return NULL_TREE;
 }
 
-/* A subroutine of get_load_store_type, with a subset of the same
-   arguments.  Handle the case where STMT_INFO is part of a grouped load
-   or store.
+/* Analyze load or store SLP_NODE of type VLS_TYPE.  Return true
+   if there is a memory access type that the vectorized form can use,
+   storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
+   or scatters, fill in GS_INFO accordingly.  In addition
+   *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
+   the target does not support the alignment scheme.  *MISALIGNMENT
+   is set according to the alignment of the access (including
+   DR_MISALIGNMENT_UNKNOWN when it is unknown).
 
-   For stores, the statements in the group are all consecutive
-   and there is no gap at the end.  For loads, the statements in the
-   group might not be consecutive; there can be gaps between statements
-   as well as at the end.
+   MASKED_P is true if the statement is conditional on a vectorized mask.
+   VECTYPE is the vector type that the vectorized statements will use.
 
-   If we can use gather/scatter and ELSVALS is nonzero the supported
-   else values will be stored in the vector ELSVALS points to.
-*/
+   If ELSVALS is nonzero the supported else values will be stored in the
+   vector ELSVALS points to.  */
 
 static bool
-get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
-			   tree vectype, slp_tree slp_node,
-			   bool masked_p, vec_load_store_type vls_type,
-			   vect_memory_access_type *memory_access_type,
-			   poly_int64 *poffset,
-			   dr_alignment_support *alignment_support_scheme,
-			   int *misalignment,
-			   gather_scatter_info *gs_info,
-			   internal_fn *lanes_ifn,
-			   vec<int> *elsvals)
+get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
+		     tree vectype, slp_tree slp_node,
+		     bool masked_p, vec_load_store_type vls_type,
+		     vect_load_store_data *ls)
 {
+  vect_memory_access_type *memory_access_type = &ls->memory_access_type;
+  poly_int64 *poffset = &ls->poffset;
+  dr_alignment_support *alignment_support_scheme
+    = &ls->alignment_support_scheme;
+  int *misalignment = &ls->misalignment;
+  internal_fn *lanes_ifn = &ls->lanes_ifn;
+  vec<int> *elsvals = &ls->elsvals;
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
   stmt_vec_info first_stmt_info;
   unsigned int group_size;
   unsigned HOST_WIDE_INT gap;
   bool single_element_p;
   poly_int64 neg_ldst_offset = 0;
+
+  *misalignment = DR_MISALIGNMENT_UNKNOWN;
+  *poffset = 0;
+
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
@@ -1951,7 +2006,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
       single_element_p = true;
     }
   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   /* True if the vectorized statements would access beyond the last
      statement in the group.  */
@@ -1977,7 +2031,53 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
        separated by the stride, until we have a complete vector.
        Fall back to scalar accesses if that isn't possible.  */
     *memory_access_type = VMAT_STRIDED_SLP;
-  else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    {
+      slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
+      tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
+      int scale = SLP_TREE_GS_SCALE (slp_node);
+      tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
+      tree tem;
+      if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
+				    masked_p, vectype,
+				    memory_type,
+				    offset_vectype, scale,
+				    &ls->gs.ifn, &tem,
+				    elsvals))
+	*memory_access_type = VMAT_GATHER_SCATTER_IFN;
+      else if (vls_type == VLS_LOAD
+	       ? (targetm.vectorize.builtin_gather
+		  && (ls->gs.decl
+			= targetm.vectorize.builtin_gather (vectype,
+							    TREE_TYPE
+							      (offset_vectype),
+							    scale)))
+	       : (targetm.vectorize.builtin_scatter
+		  && (ls->gs.decl
+			= targetm.vectorize.builtin_scatter (vectype,
+							     TREE_TYPE
+							       (offset_vectype),
+							     scale))))
+	*memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
+      else
+	{
+	  /* GATHER_SCATTER_EMULATED_P.  */
+	  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+	      || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
+	      || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
+	      || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
+				       TYPE_VECTOR_SUBPARTS (vectype)))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "unsupported vector types for emulated "
+				 "gather.\n");
+	      return false;
+	    }
+	  *memory_access_type = VMAT_GATHER_SCATTER_EMULATED;
+	}
+    }
+  else
     {
       int cmp = compare_step_with_zero (vinfo, stmt_info);
       if (cmp < 0)
@@ -2221,62 +2321,19 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
   if ((*memory_access_type == VMAT_ELEMENTWISE
        || *memory_access_type == VMAT_STRIDED_SLP)
       && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
-      && single_element_p
       && SLP_TREE_LANES (slp_node) == 1
-      && loop_vinfo
-      && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
-					     masked_p, gs_info, elsvals))
-    *memory_access_type = VMAT_GATHER_SCATTER;
-  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+      && loop_vinfo)
     {
-      tree offset;
-      slp_tree offset_node;
-      *memory_access_type = VMAT_GATHER_SCATTER;
-      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
-				      elsvals))
-	gcc_unreachable ();
-      /* When using internal functions, we rely on pattern recognition
-	 to convert the type of the offset to the type that the target
-	 requires, with the result being a call to an internal function.
-	 If that failed for some reason (e.g. because another pattern
-	 took priority), just handle cases in which the offset already
-	 has the right type.  */
-      else if (GATHER_SCATTER_IFN_P (*gs_info)
-	       && !is_gimple_call (stmt_info->stmt)
-	       && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
-					  TREE_TYPE (gs_info->offset_vectype)))
-	{
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "%s offset requires a conversion\n",
-			     vls_type == VLS_LOAD ? "gather" : "scatter");
-	  return false;
-	}
-      else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node,
-				    &gs_info->offset_dt,
-				    &gs_info->offset_vectype))
-	{
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "%s index use not simple.\n",
-			     vls_type == VLS_LOAD ? "gather" : "scatter");
-	  return false;
-	}
-      else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+      gather_scatter_info gs_info;
+      if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
+					      masked_p, &gs_info, elsvals,
+					      group_size, single_element_p))
 	{
-	  if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
-	      || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
-	      || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
-	      || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
-					 (gs_info->offset_vectype),
-				       TYPE_VECTOR_SUBPARTS (vectype)))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "unsupported vector types for emulated "
-				 "gather.\n");
-	      return false;
-	    }
+	  SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
+	  SLP_TREE_GS_BASE (slp_node) = error_mark_node;
+	  ls->gs.ifn = gs_info.ifn;
+	  ls->strided_offset_vectype = gs_info.offset_vectype;
+	  *memory_access_type = VMAT_GATHER_SCATTER_IFN;
 	}
     }
 
@@ -2285,8 +2342,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
     *poffset = neg_ldst_offset;
 
   if (*memory_access_type == VMAT_ELEMENTWISE
-      || (*memory_access_type == VMAT_GATHER_SCATTER
-	  && GATHER_SCATTER_LEGACY_P (*gs_info))
+      || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
       || *memory_access_type == VMAT_STRIDED_SLP
       || *memory_access_type == VMAT_INVARIANT)
     {
@@ -2295,7 +2351,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
     }
   else
     {
-      if (*memory_access_type == VMAT_GATHER_SCATTER
+      if (mat_gather_scatter_p (*memory_access_type)
 	  && !first_dr_info)
 	*misalignment = DR_MISALIGNMENT_UNKNOWN;
       else
@@ -2303,27 +2359,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
       *alignment_support_scheme
 	= vect_supportable_dr_alignment
 	   (vinfo, first_dr_info, vectype, *misalignment,
-	    *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
-    }
-
-  if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
-    {
-      /* STMT is the leader of the group. Check the operands of all the
-	 stmts of the group.  */
-      stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
-      while (next_stmt_info)
-	{
-	  tree op = vect_get_store_rhs (next_stmt_info);
-	  enum vect_def_type dt;
-	  if (!vect_is_simple_use (op, vinfo, &dt))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "use not simple.\n");
-	      return false;
-	    }
-	  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
-	}
+	    mat_gather_scatter_p (*memory_access_type));
     }
 
   if (overrun_p)
@@ -2336,51 +2372,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
     }
 
-  return true;
-}
-
-/* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
-   if there is a memory access type that the vectorized form can use,
-   storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
-   or scatters, fill in GS_INFO accordingly.  In addition
-   *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
-   the target does not support the alignment scheme.  *MISALIGNMENT
-   is set according to the alignment of the access (including
-   DR_MISALIGNMENT_UNKNOWN when it is unknown).
-
-   SLP says whether we're performing SLP rather than loop vectorization.
-   MASKED_P is true if the statement is conditional on a vectorized mask.
-   VECTYPE is the vector type that the vectorized statements will use.
-   NCOPIES is the number of vector statements that will be needed.
-
-   If ELSVALS is nonzero the supported else values will be stored in the
-   vector ELSVALS points to.  */
-
-static bool
-get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
-		     tree vectype, slp_tree slp_node,
-		     bool masked_p, vec_load_store_type vls_type,
-		     unsigned int,
-		     vect_memory_access_type *memory_access_type,
-		     poly_int64 *poffset,
-		     dr_alignment_support *alignment_support_scheme,
-		     int *misalignment,
-		     gather_scatter_info *gs_info,
-		     internal_fn *lanes_ifn,
-		     vec<int> *elsvals = nullptr)
-{
-  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
-  *misalignment = DR_MISALIGNMENT_UNKNOWN;
-  *poffset = 0;
-  if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
-				  masked_p,
-				  vls_type, memory_access_type, poffset,
-				  alignment_support_scheme,
-				  misalignment, gs_info, lanes_ifn,
-				  elsvals))
-    return false;
-
   if ((*memory_access_type == VMAT_ELEMENTWISE
        || *memory_access_type == VMAT_STRIDED_SLP)
       && !nunits.is_constant ())
@@ -2392,7 +2383,6 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
       return false;
     }
 
-
   /* Checks if all scalar iterations are known to be inbounds.  */
   bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info));
 
@@ -2403,7 +2393,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
   if (loop_vinfo
       && dr_safe_speculative_read_required (stmt_info)
       && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-      && (*memory_access_type == VMAT_GATHER_SCATTER
+      && (mat_gather_scatter_p (*memory_access_type)
 	  || *memory_access_type == VMAT_STRIDED_SLP))
     {
       if (dump_enabled_p ())
@@ -2423,75 +2413,31 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
      vector iteration or force masking.  */
   if (dr_safe_speculative_read_required (stmt_info)
       && (*alignment_support_scheme == dr_aligned
-	  && *memory_access_type != VMAT_GATHER_SCATTER))
+	  && !mat_gather_scatter_p (*memory_access_type)))
     {
       /* We can only peel for loops, of course.  */
       gcc_checking_assert (loop_vinfo);
 
+      poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+      poly_uint64 read_amount
+	= vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+	read_amount *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+
       auto target_alignment
 	= DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
-      unsigned HOST_WIDE_INT target_align;
-
-      bool group_aligned = false;
-      if (target_alignment.is_constant (&target_align)
-	  && nunits.is_constant ())
-	{
-	  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-	  auto vectype_size
-	    = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
-	  poly_uint64 required_alignment = vf * vectype_size;
-	  /* If we have a grouped access we require that the alignment be N * elem.  */
-	  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
-	    required_alignment *=
-		DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
-	  if (!multiple_p (target_alignment, required_alignment))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "desired alignment %wu not met. Instead got %wu "
-			     "for DR alignment at %G",
-			     required_alignment.to_constant (),
-			     target_align, STMT_VINFO_STMT (stmt_info));
-	      return false;
-	    }
-
-	  if (!pow2p_hwi (target_align))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "non-power-of-two vector alignment %wd "
-			     "for DR alignment at %G",
-			     target_align, STMT_VINFO_STMT (stmt_info));
-	      return false;
-	    }
-
-	  /* For VLA we have to insert a runtime check that the vector loads
-	     per iterations don't exceed a page size.  For now we can use
-	     POLY_VALUE_MAX as a proxy as we can't peel for VLA.  */
-	  if (known_gt (required_alignment, (unsigned)param_min_pagesize))
+      if (!multiple_p (target_alignment, read_amount))
+	{
+	  if (dump_enabled_p ())
 	    {
-	      if (dump_enabled_p ())
-		{
-		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			       "alignment required for correctness (");
-		  dump_dec (MSG_MISSED_OPTIMIZATION, required_alignment);
-		  dump_printf (MSG_NOTE, ") may exceed page size\n");
-		}
-	      return false;
+	      dump_printf_loc (MSG_NOTE, vect_location,
+			       "desired alignment not met, target was ");
+	      dump_dec (MSG_NOTE, target_alignment);
+	      dump_printf (MSG_NOTE, " previously, but read amount is ");
+	      dump_dec (MSG_NOTE, read_amount);
+	      dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
 	    }
-
-	  group_aligned = true;
-	}
-
-      /* There are multiple loads that have a misalignment that we couldn't
-	 align.  We would need LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P to
-	 vectorize. */
-      if (!group_aligned)
-	{
-	  if (inbounds)
-	    LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
-	  else
-	    return false;
+	  return false;
 	}
 
       /* When using a group access the first element may be aligned but the
@@ -2513,6 +2459,33 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
 			     STMT_VINFO_STMT (stmt_info));
 	  return false;
 	}
+
+      /* Reject vectorization if we know the read mount per vector iteration
+	 exceeds the min page size.  */
+      if (known_gt (read_amount, (unsigned) param_min_pagesize))
+	{
+	  if (dump_enabled_p ())
+	    {
+	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			       "alignment required for correctness (");
+	      dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
+	      dump_printf (MSG_NOTE, ") may exceed page size.\n");
+	    }
+	  return false;
+	}
+
+      if (!vf.is_constant ())
+	{
+	  /* For VLA modes, we need a runtime check to ensure any speculative
+	     read amount does not exceed the page size.  Here we record the max
+	     possible read amount for the check.  */
+	  if (maybe_gt (read_amount,
+			LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
+	    LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
+
+	  /* For VLA modes, we must use partial vectors.  */
+	  LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+	}
     }
 
   if (*alignment_support_scheme == dr_unaligned_unsupported)
@@ -2526,9 +2499,6 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
   /* FIXME: At the moment the cost model seems to underestimate the
      cost of using elementwise accesses.  This check preserves the
      traditional behavior until that can be fixed.  */
-  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
-  if (!first_stmt_info)
-    first_stmt_info = stmt_info;
   if (*memory_access_type == VMAT_ELEMENTWISE
       && !STMT_VINFO_STRIDED_P (first_stmt_info)
       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
@@ -2545,21 +2515,21 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
 
 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
    conditional operation STMT_INFO.  When returning true, store the mask
-   in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
-   vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
-   to the mask in *MASK_NODE if MASK_NODE is not NULL.  */
+   in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of
+   the vectorized mask in *MASK_VECTYPE_OUT.  */
 
 static bool
 vect_check_scalar_mask (vec_info *vinfo,
 			slp_tree slp_node, unsigned mask_index,
-			tree *mask, slp_tree *mask_node,
+			slp_tree *mask_node,
 			vect_def_type *mask_dt_out, tree *mask_vectype_out)
 {
   enum vect_def_type mask_dt;
   tree mask_vectype;
   slp_tree mask_node_1;
+  tree mask_;
   if (!vect_is_simple_use (vinfo, slp_node, mask_index,
-			   mask, &mask_node_1, &mask_dt, &mask_vectype))
+			   &mask_, &mask_node_1, &mask_dt, &mask_vectype))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2568,7 +2538,7 @@ vect_check_scalar_mask (vec_info *vinfo,
     }
 
   if ((mask_dt == vect_constant_def || mask_dt == vect_external_def)
-      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
+      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_)))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2576,17 +2546,6 @@ vect_check_scalar_mask (vec_info *vinfo,
       return false;
     }
 
-  /* If the caller is not prepared for adjusting an external/constant
-     SLP mask vector type fail.  */
-  if (!mask_node
-      && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "SLP mask argument is not vectorized.\n");
-      return false;
-    }
-
   tree vectype = SLP_TREE_VECTYPE (slp_node);
   if (!mask_vectype)
     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
@@ -2614,11 +2573,11 @@ vect_check_scalar_mask (vec_info *vinfo,
 
   *mask_dt_out = mask_dt;
   *mask_vectype_out = mask_vectype;
-  if (mask_node)
-    *mask_node = mask_node_1;
+  *mask_node = mask_node_1;
   return true;
 }
 
+
 /* Return true if stored value is suitable for vectorizing store
    statement STMT_INFO.  When returning true, store the scalar stored
    in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
@@ -2627,7 +2586,7 @@ vect_check_scalar_mask (vec_info *vinfo,
 
 static bool
 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
-		      slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
+		      slp_tree slp_node, slp_tree *rhs_node,
 		      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
 		      vec_load_store_type *vls_type_out)
 {
@@ -2643,8 +2602,9 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
 
   enum vect_def_type rhs_dt;
   tree rhs_vectype;
+  tree rhs;
   if (!vect_is_simple_use (vinfo, slp_node, op_no,
-			   rhs, rhs_node, &rhs_dt, &rhs_vectype))
+			   &rhs, rhs_node, &rhs_dt, &rhs_vectype))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2655,7 +2615,7 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
   /* In the case this is a store from a constant make sure
      native_encode_expr can handle it.  */
   if (rhs_dt == vect_constant_def
-      && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
+      && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2769,13 +2729,12 @@ vect_get_mask_load_else (int elsval, tree type)
 
 static gimple *
 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
-				 tree vectype,
-				 gimple_stmt_iterator *gsi,
-				 gather_scatter_info *gs_info,
+				 slp_tree slp_node, tree vectype,
+				 gimple_stmt_iterator *gsi, tree decl,
 				 tree ptr, tree offset, tree mask)
 {
-  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
-  tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
+  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
+  tree rettype = TREE_TYPE (TREE_TYPE (decl));
   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   /* ptrtype */ arglist = TREE_CHAIN (arglist);
   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
@@ -2841,8 +2800,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
     }
 
-  tree scale = build_int_cst (scaletype, gs_info->scale);
-  gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
+  tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
+  gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
 					mask_op, scale);
 
   if (!useless_type_conversion_p (vectype, rettype))
@@ -2868,12 +2827,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
 
 static gimple *
 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
+				   slp_tree slp_node,
 				   gimple_stmt_iterator *gsi,
-				   gather_scatter_info *gs_info,
+				   tree decl,
 				   tree ptr, tree offset, tree oprnd, tree mask)
 {
-  tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
-  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
+  tree rettype = TREE_TYPE (TREE_TYPE (decl));
+  tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
   /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
@@ -2937,9 +2897,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
       op = var;
     }
 
-  tree scale = build_int_cst (scaletype, gs_info->scale);
+  tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
   gcall *new_stmt
-    = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
+    = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
   return new_stmt;
 }
 
@@ -2950,12 +2910,12 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
    containing loop.  */
 
 static void
-vect_get_gather_scatter_ops (class loop *loop,
-			     slp_tree slp_node, gather_scatter_info *gs_info,
+vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
 			     tree *dataref_ptr, vec<tree> *vec_offset)
 {
   gimple_seq stmts = NULL;
-  *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
+  *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
+				       &stmts, true, NULL_TREE);
   if (stmts != NULL)
     {
       basic_block new_bb;
@@ -2976,10 +2936,10 @@ vect_get_gather_scatter_ops (class loop *loop,
    I * DR_STEP / SCALE.  */
 
 static void
-vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
+vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
+				 tree vectype, tree offset_vectype,
 				 loop_vec_info loop_vinfo,
 				 gimple_stmt_iterator *gsi,
-				 gather_scatter_info *gs_info,
 				 tree *dataref_bump, tree *vec_offset,
 				 vec_loop_lens *loop_lens)
 {
@@ -3020,15 +2980,15 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
 
   /* The offset given in GS_INFO can have pointer type, so use the element
      type of the vector instead.  */
-  tree offset_type = TREE_TYPE (gs_info->offset_vectype);
+  tree offset_type = TREE_TYPE (offset_vectype);
 
   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
-			  ssize_int (gs_info->scale));
+			  ssize_int (SLP_TREE_GS_SCALE (node)));
   step = fold_convert (offset_type, step);
 
   /* Create {0, X, X*2, X*3, ...}.  */
-  tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
+  tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
 			     build_zero_cst (offset_type), step);
   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
 }
@@ -3051,7 +3011,7 @@ vect_get_loop_variant_data_ptr_increment (
   tree step = vect_dr_behavior (vinfo, dr_info)->step;
 
   /* gather/scatter never reach here.  */
-  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
+  gcc_assert (!mat_gather_scatter_p (memory_access_type));
 
   /* When we support SELECT_VL pattern, we dynamic adjust
      the memory address by .SELECT_VL result.
@@ -3158,13 +3118,13 @@ vectorizable_bswap (vec_info *vinfo,
 	  return false;
 	}
 
-      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = call_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_bswap");
       record_stmt_cost (cost_vec,
-			1, vector_stmt, stmt_info, 0, vect_prologue);
+			1, vector_stmt, slp_node, 0, vect_prologue);
       record_stmt_cost (cost_vec,
 			SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
-			vec_perm, stmt_info, 0, vect_body);
+			vec_perm, slp_node, 0, vect_body);
       return true;
     }
 
@@ -3312,14 +3272,20 @@ vectorizable_call (vec_info *vinfo,
 
   int mask_opno = -1;
   if (internal_fn_p (cfn))
-    mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+    {
+      /* We can only handle direct internal masked calls here,
+	 vectorizable_simd_clone_call is for the rest.  */
+      if (cfn == CFN_MASK_CALL)
+	return false;
+      mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+    }
 
   for (i = 0; i < nargs; i++)
     {
       if ((int) i == mask_opno)
 	{
 	  if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno,
-				       &op, &slp_op[i], &dt[i], &vectypes[i]))
+				       &slp_op[i], &dt[i], &vectypes[i]))
 	    return false;
 	  continue;
 	}
@@ -3469,7 +3435,7 @@ vectorizable_call (vec_info *vinfo,
 	}
     }
 
-  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
   internal_fn cond_fn = get_conditional_internal_fn (ifn);
   internal_fn cond_len_fn = get_len_internal_fn (ifn);
   int len_opno = internal_fn_len_index (cond_len_fn);
@@ -3487,7 +3453,7 @@ vectorizable_call (vec_info *vinfo,
 			       "incompatible vector types for invariants\n");
 	    return false;
 	  }
-      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = call_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_call");
       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
 
@@ -3884,9 +3850,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
   if (nargs == 0)
     return false;
 
-  vec<tree>& simd_clone_info = SLP_TREE_SIMD_CLONE_INFO (slp_node);
-  if (cost_vec)
-    simd_clone_info.truncate (0);
+  vect_simd_clone_data _data;
+  vect_simd_clone_data &data = slp_node->get_data (_data);
+  vec<tree>& simd_clone_info = data.simd_clone_info;
   arginfo.reserve (nargs, true);
   auto_vec<slp_tree> slp_op;
   slp_op.safe_grow_cleared (nargs);
@@ -4282,7 +4248,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
 	}
 
-      STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
+      slp_node->data = new vect_simd_clone_data (std::move (_data));
       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
 /*      vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */
       return true;
@@ -4799,7 +4766,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
 	    }
 	}
 
-      SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
+      if (gimple_get_lhs (new_stmt))
+	SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
     }
 
   for (i = 0; i < nargs; ++i)
@@ -5427,28 +5395,28 @@ vectorizable_conversion (vec_info *vinfo,
       DUMP_VECT_SCOPE ("vectorizable_conversion");
       if (modifier == NONE)
         {
-	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+	  SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
 	  vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
 				  slp_node, cost_vec);
 	}
       else if (modifier == NARROW_SRC || modifier == NARROW_DST)
 	{
-	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+	  SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
 	  /* The final packing step produces one vector result per copy.  */
 	  unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+	  vect_model_promotion_demotion_cost (slp_node, nvectors,
 					      multi_step_cvt, cost_vec,
 					      widen_arith);
 	}
       else
 	{
-	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+	  SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
 	  /* The initial unpacking step produces two vector results
 	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
 	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
 	  unsigned int nvectors
 	    = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+	  vect_model_promotion_demotion_cost (slp_node, nvectors,
 					      multi_step_cvt, cost_vec,
 					      widen_arith);
 	}
@@ -5777,7 +5745,7 @@ vectorizable_assignment (vec_info *vinfo,
 			     "incompatible vector types for invariants\n");
 	  return false;
 	}
-      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_assignment");
       if (!vect_nop_conversion_p (stmt_info))
 	vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
@@ -6122,7 +6090,7 @@ vectorizable_shift (vec_info *vinfo,
 	    gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
 			 == INTEGER_CST));
 	  }
-      STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_shift");
       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
       return true;
@@ -6487,7 +6455,7 @@ vectorizable_operation (vec_info *vinfo,
       using_emulated_vectors_p = true;
     }
 
-  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6541,7 +6509,7 @@ vectorizable_operation (vec_info *vinfo,
 	  return false;
 	}
 
-      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = op_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_operation");
       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
       if (using_emulated_vectors_p)
@@ -6599,6 +6567,20 @@ vectorizable_operation (vec_info *vinfo,
       vec_dest = vect_create_destination_var (scalar_dest, vectype);
       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
     }
+  /* For reduction operations with undefined overflow behavior make sure to
+     pun them to unsigned since we change the order of evaluation.
+     ???  Avoid for in-order reductions?  */
+  else if (arith_code_with_undefined_signed_overflow (orig_code)
+	   && ANY_INTEGRAL_TYPE_P (vectype)
+	   && TYPE_OVERFLOW_UNDEFINED (vectype)
+	   && SLP_TREE_REDUC_IDX (slp_node) != -1)
+    {
+      gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
+		  || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
+      vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
+      vectype = unsigned_type_for (vectype);
+      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+    }
   /* Handle def.  */
   else
     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
@@ -6612,6 +6594,46 @@ vectorizable_operation (vec_info *vinfo,
       vop1 = ((op_type == binary_op || op_type == ternary_op)
 	      ? vec_oprnds1[i] : NULL_TREE);
       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
+
+      if (vec_cvt_dest
+	  && !useless_type_conversion_p (vectype, TREE_TYPE (vop0)))
+	{
+	  new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0);
+	  new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+					  new_temp);
+	  new_temp = make_ssa_name (vec_dest, new_stmt);
+	  gimple_assign_set_lhs (new_stmt, new_temp);
+	  vect_finish_stmt_generation (vinfo, stmt_info,
+				       new_stmt, gsi);
+	  vop0 = new_temp;
+	}
+      if (vop1
+	  && vec_cvt_dest
+	  && !useless_type_conversion_p (vectype, TREE_TYPE (vop1)))
+	{
+	  new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1);
+	  new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+					  new_temp);
+	  new_temp = make_ssa_name (vec_dest, new_stmt);
+	  gimple_assign_set_lhs (new_stmt, new_temp);
+	  vect_finish_stmt_generation (vinfo, stmt_info,
+				       new_stmt, gsi);
+	  vop1 = new_temp;
+	}
+      if (vop2
+	  && vec_cvt_dest
+	  && !useless_type_conversion_p (vectype, TREE_TYPE (vop2)))
+	{
+	  new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2);
+	  new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+					  new_temp);
+	  new_temp = make_ssa_name (vec_dest, new_stmt);
+	  gimple_assign_set_lhs (new_stmt, new_temp);
+	  vect_finish_stmt_generation (vinfo, stmt_info,
+				       new_stmt, gsi);
+	  vop2 = new_temp;
+	}
+
       if (using_emulated_vectors_p)
 	{
 	  /* Lower the operation.  This follows vector lowering.  */
@@ -7082,7 +7104,8 @@ scan_store_can_perm_p (tree vectype, tree init,
 
 static bool
 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
-		  enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
+		  enum vect_def_type rhs_dt, slp_tree slp_node,
+		  slp_tree mask_node,
 		  vect_memory_access_type memory_access_type)
 {
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -7090,13 +7113,14 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
   tree ref_type;
 
   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
-  if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
-      || mask
+  if (SLP_TREE_LANES (slp_node) > 1
+      || mask_node
       || memory_access_type != VMAT_CONTIGUOUS
       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
       || loop_vinfo == NULL
       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+      || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
       || !integer_zerop (DR_INIT (dr_info->dr))
@@ -7773,7 +7797,6 @@ vectorizable_store (vec_info *vinfo,
   unsigned int vec_num;
   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   tree aggr_type;
-  gather_scatter_info gs_info;
   poly_uint64 vf;
   vec_load_store_type vls_type;
   tree ref_type;
@@ -7787,7 +7810,7 @@ vectorizable_store (vec_info *vinfo,
 
   /* Is vectorizable store? */
 
-  tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+  tree mask_vectype = NULL_TREE;
   slp_tree mask_node = NULL;
   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
     {
@@ -7820,16 +7843,12 @@ vectorizable_store (vec_info *vinfo,
 		    (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
       if (mask_index >= 0
 	  && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
-				      &mask, &mask_node, &mask_dt,
+				      &mask_node, &mask_dt,
 				      &mask_vectype))
 	return false;
     }
 
-  /* Cannot have hybrid store SLP -- that would mean storing to the
-     same location twice.  */
-  gcc_assert (PURE_SLP_STMT (stmt_info));
-
-  tree vectype = SLP_TREE_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
+  tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE;
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
   if (loop_vinfo)
@@ -7851,10 +7870,9 @@ vectorizable_store (vec_info *vinfo,
       return false;
     }
 
-  tree op;
   slp_tree op_node;
   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
-			     &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
+			     &op_node, &rhs_dt, &rhs_vectype, &vls_type))
     return false;
 
   elem_type = TREE_TYPE (vectype);
@@ -7863,16 +7881,19 @@ vectorizable_store (vec_info *vinfo,
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  vect_memory_access_type memory_access_type;
-  enum dr_alignment_support alignment_support_scheme;
-  int misalignment;
-  poly_int64 poffset;
-  internal_fn lanes_ifn;
-  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
-			    1, &memory_access_type, &poffset,
-			    &alignment_support_scheme, &misalignment, &gs_info,
-			    &lanes_ifn))
+  vect_load_store_data _ls_data;
+  vect_load_store_data &ls = slp_node->get_data (_ls_data);
+  if (cost_vec
+      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
+			       vls_type, &_ls_data))
     return false;
+  /* Temporary aliases to analysis data, should not be modified through
+     these.  */
+  const vect_memory_access_type memory_access_type = ls.memory_access_type;
+  const dr_alignment_support alignment_support_scheme
+    = ls.alignment_support_scheme;
+  const int misalignment = ls.misalignment;
+  const poly_int64 poffset = ls.poffset;
 
   if (slp_node->ldst_lanes
       && memory_access_type != VMAT_LOAD_STORE_LANES)
@@ -7883,7 +7904,7 @@ vectorizable_store (vec_info *vinfo,
       return false;
     }
 
-  if (mask)
+  if (mask_node)
     {
       if (memory_access_type == VMAT_CONTIGUOUS)
 	{
@@ -7893,8 +7914,8 @@ vectorizable_store (vec_info *vinfo,
 	    return false;
 	}
       else if (memory_access_type != VMAT_LOAD_STORE_LANES
-	       && (memory_access_type != VMAT_GATHER_SCATTER
-		   || (GATHER_SCATTER_LEGACY_P (gs_info)
+	       && (!mat_gather_scatter_p (memory_access_type)
+		   || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY
 		       && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
 	{
 	  if (dump_enabled_p ())
@@ -7902,8 +7923,7 @@ vectorizable_store (vec_info *vinfo,
 			     "unsupported access type for masked store.\n");
 	  return false;
 	}
-      else if (memory_access_type == VMAT_GATHER_SCATTER
-	       && GATHER_SCATTER_EMULATED_P (gs_info))
+      else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7921,7 +7941,7 @@ vectorizable_store (vec_info *vinfo,
 
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
-		   && memory_access_type != VMAT_GATHER_SCATTER);
+		   && !mat_gather_scatter_p (memory_access_type));
   if (grouped_store)
     {
       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
@@ -7937,26 +7957,22 @@ vectorizable_store (vec_info *vinfo,
 
   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
     {
-      if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask,
-			     memory_access_type))
+      if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node,
+			     mask_node, memory_access_type))
 	return false;
     }
 
   bool costing_p = cost_vec;
   if (costing_p) /* transformation not required.  */
     {
-      STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
-      SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
-
       if (loop_vinfo
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
-					      vls_type, group_size,
-					      memory_access_type, &gs_info,
-					      mask);
+					      vls_type, group_size, &ls,
+					      mask_node);
 
       if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
-	  || (mask
+	  || (mask_node
 	      && !vect_maybe_update_slp_op_vectype (mask_node,
 						    mask_vectype)))
 	{
@@ -7974,9 +7990,9 @@ vectorizable_store (vec_info *vinfo,
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "Vectorizing an unaligned access.\n");
 
-      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = store_vec_info_type;
+      slp_node->data = new vect_load_store_data (std::move (ls));
     }
-  gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
 
   /* Transform.  */
 
@@ -7991,7 +8007,7 @@ vectorizable_store (vec_info *vinfo,
 	  unsigned int inside_cost = 0, prologue_cost = 0;
 	  if (vls_type == VLS_STORE_INVARIANT)
 	    prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
-					       stmt_info, 0, vect_prologue);
+					       slp_node, 0, vect_prologue);
 	  vect_get_store_cost (vinfo, stmt_info, slp_node, 1,
 			       alignment_support_scheme, misalignment,
 			       &inside_cost, cost_vec);
@@ -8020,7 +8036,6 @@ vectorizable_store (vec_info *vinfo,
   gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
 	      || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
   first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
-  op = vect_get_store_rhs (first_stmt_info);
 
   ref_type = get_group_alias_ptr_type (first_stmt_info);
 
@@ -8072,6 +8087,14 @@ vectorizable_store (vec_info *vinfo,
 	     ...
          */
 
+      /* ???  Modify local copies of alignment_support_scheme and
+	 misalignment, but this part of analysis should be done
+	 earlier and remembered, likewise the chosen load mode.  */
+      const dr_alignment_support tem = alignment_support_scheme;
+      dr_alignment_support alignment_support_scheme = tem;
+      const int tem2 = misalignment;
+      int misalignment = tem2;
+
       unsigned nstores = const_nunits;
       unsigned lnel = 1;
       tree ltype = elem_type;
@@ -8206,7 +8229,7 @@ vectorizable_store (vec_info *vinfo,
       unsigned int n_adjacent_stores = 0;
       running_off = offvar;
       if (!costing_p)
-	vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
+	vect_get_slp_defs (op_node, &vec_oprnds);
       unsigned int group_el = 0;
       unsigned HOST_WIDE_INT elsz
 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@@ -8281,7 +8304,7 @@ vectorizable_store (vec_info *vinfo,
 	      else
 		inside_cost
 		  += record_stmt_cost (cost_vec, n_adjacent_stores,
-				       scalar_store, stmt_info, 0, vect_body);
+				       scalar_store, slp_node, 0, vect_body);
 	      /* Only need vector extracting when there are more
 		 than one stores.  */
 	      if (nstores > 1)
@@ -8321,7 +8344,7 @@ vectorizable_store (vec_info *vinfo,
      realignment.  vect_supportable_dr_alignment always returns either
      dr_aligned or dr_unaligned_supported for masked operations.  */
   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
-	       && !mask
+	       && !mask_node
 	       && !loop_masks)
 	      || alignment_support_scheme == dr_aligned
 	      || alignment_support_scheme == dr_unaligned_supported);
@@ -8337,12 +8360,13 @@ vectorizable_store (vec_info *vinfo,
       aggr_type = NULL_TREE;
       bump = NULL_TREE;
     }
-  else if (memory_access_type == VMAT_GATHER_SCATTER)
+  else if (mat_gather_scatter_p (memory_access_type))
     {
       aggr_type = elem_type;
       if (!costing_p)
-	vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
-					 gsi, &gs_info,
+	vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
+					 ls.strided_offset_vectype,
+					 loop_vinfo, gsi,
 					 &bump, &vec_offset, loop_lens);
     }
   else
@@ -8356,7 +8380,7 @@ vectorizable_store (vec_info *vinfo,
 					    memory_access_type, loop_lens);
     }
 
-  if (mask && !costing_p)
+  if (mask_node && !costing_p)
     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
 
   /* In case the vectorization factor (VF) is bigger than the number
@@ -8373,6 +8397,8 @@ vectorizable_store (vec_info *vinfo,
 
   if (memory_access_type == VMAT_LOAD_STORE_LANES)
     {
+      const internal_fn lanes_ifn = ls.lanes_ifn;
+
       if (costing_p)
 	/* Update all incoming store operand nodes, the general handling
 	   above only handles the mask and the first store operand node.  */
@@ -8396,7 +8422,7 @@ vectorizable_store (vec_info *vinfo,
 	    {
 	      if (!costing_p)
 		{
-		  if (mask)
+		  if (mask_node)
 		    {
 		      vect_get_slp_defs (mask_node, &vec_masks);
 		      vec_mask = vec_masks[0];
@@ -8410,7 +8436,7 @@ vectorizable_store (vec_info *vinfo,
 	  else if (!costing_p)
 	    {
 	      gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-	      if (mask)
+	      if (mask_node)
 		vec_mask = vec_masks[j];
 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
 					     stmt_info, bump);
@@ -8525,7 +8551,7 @@ vectorizable_store (vec_info *vinfo,
       return true;
     }
 
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (mat_gather_scatter_p (memory_access_type))
     {
       gcc_assert (!grouped_store);
       auto_vec<tree> vec_offsets;
@@ -8545,11 +8571,11 @@ vectorizable_store (vec_info *vinfo,
 		     DR_CHAIN is of size 1.  */
 		  gcc_assert (group_size == 1);
 		  vect_get_slp_defs (op_node, gvec_oprnds[0]);
-		  if (mask)
+		  if (mask_node)
 		    vect_get_slp_defs (mask_node, &vec_masks);
 
 		  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-		    vect_get_gather_scatter_ops (loop, slp_node, &gs_info,
+		    vect_get_gather_scatter_ops (loop, slp_node,
 						 &dataref_ptr, &vec_offsets);
 		  else
 		    dataref_ptr
@@ -8571,13 +8597,12 @@ vectorizable_store (vec_info *vinfo,
 	  if (!costing_p)
 	    {
 	      vec_oprnd = (*gvec_oprnds[0])[j];
-	      if (mask)
+	      if (mask_node)
 		vec_mask = vec_masks[j];
 	      /* We should have catched mismatched types earlier.  */
 	      gcc_assert (useless_type_conversion_p (vectype,
 						     TREE_TYPE (vec_oprnd)));
 	    }
-	  unsigned HOST_WIDE_INT align;
 	  tree final_mask = NULL_TREE;
 	  tree final_len = NULL_TREE;
 	  tree bias = NULL_TREE;
@@ -8592,7 +8617,9 @@ vectorizable_store (vec_info *vinfo,
 					       final_mask, vec_mask, gsi);
 	    }
 
-	  if (GATHER_SCATTER_IFN_P (gs_info))
+	  unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
+	  tree alias_align_ptr = build_int_cst (ref_type, align);
+	  if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
 	    {
 	      if (costing_p)
 		{
@@ -8606,9 +8633,9 @@ vectorizable_store (vec_info *vinfo,
 	      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 		vec_offset = vec_offsets[j];
 
-	      tree scale = size_int (gs_info.scale);
+	      tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
 
-	      if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+	      if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
 		{
 		  if (loop_lens)
 		    final_len = vect_get_loop_len (loop_vinfo, gsi,
@@ -8633,7 +8660,7 @@ vectorizable_store (vec_info *vinfo,
 		  if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
 		    call = gimple_build_call_internal (
 			    IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
-			    gs_info.alias_ptr,
+			    alias_align_ptr,
 			    vec_offset, scale, vec_oprnd, final_mask, final_len,
 			    bias);
 		  else
@@ -8650,19 +8677,19 @@ vectorizable_store (vec_info *vinfo,
 	      else if (final_mask)
 		call = gimple_build_call_internal
 			     (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
-			      gs_info.alias_ptr,
+			      alias_align_ptr,
 			      vec_offset, scale, vec_oprnd, final_mask);
 	      else
 		call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
 						   dataref_ptr,
-						   gs_info.alias_ptr,
+						   alias_align_ptr,
 						   vec_offset,
 						   scale, vec_oprnd);
 	      gimple_call_set_nothrow (call, true);
 	      vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
 	      new_stmt = call;
 	    }
-	  else if (GATHER_SCATTER_LEGACY_P (gs_info))
+	  else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
 	    {
 	      /* The builtin decls path for scatter is legacy, x86 only.  */
 	      gcc_assert (nunits.is_constant ()
@@ -8678,13 +8705,14 @@ vectorizable_store (vec_info *vinfo,
 		  continue;
 		}
 
+		tree offset_vectype = TREE_TYPE (vec_offsets[0]);
 		poly_uint64 offset_nunits
-		  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+		  = TYPE_VECTOR_SUBPARTS (offset_vectype);
 		if (known_eq (nunits, offset_nunits))
 		  {
 		    new_stmt = vect_build_one_scatter_store_call
-				   (vinfo, stmt_info, gsi, &gs_info,
-				    dataref_ptr, vec_offsets[j],
+				   (vinfo, stmt_info, slp_node, gsi,
+				    ls.gs.decl, dataref_ptr, vec_offsets[j],
 				    vec_oprnd, final_mask);
 		    vect_finish_stmt_generation (vinfo, stmt_info,
 						 new_stmt, gsi);
@@ -8695,7 +8723,7 @@ vectorizable_store (vec_info *vinfo,
 		       lanes but the builtins will store full vectype
 		       data from the lower lanes.  */
 		    new_stmt = vect_build_one_scatter_store_call
-				   (vinfo, stmt_info, gsi, &gs_info,
+				   (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
 				    dataref_ptr, vec_offsets[2 * j],
 				    vec_oprnd, final_mask);
 		    vect_finish_stmt_generation (vinfo, stmt_info,
@@ -8721,14 +8749,14 @@ vectorizable_store (vec_info *vinfo,
 							VEC_UNPACK_HI_EXPR,
 							final_mask);
 			final_mask = make_ssa_name
-				      (truth_type_for (gs_info.offset_vectype));
+				      (truth_type_for (offset_vectype));
 			gimple_set_lhs (new_stmt, final_mask);
 			vect_finish_stmt_generation (vinfo, stmt_info,
 						     new_stmt, gsi);
 			}
 
 		    new_stmt = vect_build_one_scatter_store_call
-				  (vinfo, stmt_info, gsi, &gs_info,
+				  (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
 				   dataref_ptr, vec_offsets[2 * j + 1],
 				   vec_oprnd, final_mask);
 		    vect_finish_stmt_generation (vinfo, stmt_info,
@@ -8761,8 +8789,8 @@ vectorizable_store (vec_info *vinfo,
 		      }
 
 		    new_stmt = vect_build_one_scatter_store_call
-				   (vinfo, stmt_info, gsi, &gs_info,
-				    dataref_ptr, vec_offset,
+				   (vinfo, stmt_info, slp_node, gsi,
+				    ls.gs.decl, dataref_ptr, vec_offset,
 				    vec_oprnd, final_mask);
 		    vect_finish_stmt_generation (vinfo, stmt_info,
 						 new_stmt, gsi);
@@ -8793,9 +8821,10 @@ vectorizable_store (vec_info *vinfo,
 		  continue;
 		}
 
+	      tree offset_vectype = TREE_TYPE (vec_offsets[0]);
 	      unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
 	      unsigned HOST_WIDE_INT const_offset_nunits
-		= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
+		= TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
 	      vec<constructor_elt, va_gc> *ctor_elts;
 	      vec_alloc (ctor_elts, const_nunits);
 	      gimple_seq stmts = NULL;
@@ -8810,8 +8839,7 @@ vectorizable_store (vec_info *vinfo,
 	      unsigned elt_offset
 		= (j % factor) * const_nunits;
 	      tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
-	      tree scale = size_int (gs_info.scale);
-	      align = get_object_alignment (DR_REF (first_dr_info->dr));
+	      tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
 	      tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
 	      for (unsigned k = 0; k < const_nunits; ++k)
 		{
@@ -8871,10 +8899,13 @@ vectorizable_store (vec_info *vinfo,
   if (!costing_p)
     {
       /* Get vectorized arguments for SLP_NODE.  */
-      vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds, mask, &vec_masks);
+      vect_get_slp_defs (op_node, &vec_oprnds);
       vec_oprnd = vec_oprnds[0];
-      if (mask)
-	vec_mask = vec_masks[0];
+      if (mask_node)
+	{
+	  vect_get_slp_defs (mask_node, &vec_masks);
+	  vec_mask = vec_masks[0];
+	}
     }
 
   /* We should have catched mismatched types earlier.  */
@@ -8916,10 +8947,7 @@ vectorizable_store (vec_info *vinfo,
 	  else
 	    {
 	      tree perm_mask = perm_mask_for_reverse (vectype);
-	      tree perm_dest
-		= vect_create_destination_var (vect_get_store_rhs (stmt_info),
-					       vectype);
-	      tree new_temp = make_ssa_name (perm_dest);
+	      tree new_temp = make_ssa_name (vectype);
 
 	      /* Generate the permute statement.  */
 	      gimple *perm_stmt
@@ -9290,7 +9318,6 @@ vectorizable_load (vec_info *vinfo,
   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   poly_uint64 vf;
   tree aggr_type;
-  gather_scatter_info gs_info;
   tree ref_type;
   enum vect_def_type mask_dt = vect_unknown_def_type;
   enum vect_def_type els_dt = vect_unknown_def_type;
@@ -9305,12 +9332,12 @@ vectorizable_load (vec_info *vinfo,
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+  tree mask_vectype = NULL_TREE;
   tree els = NULL_TREE; tree els_vectype = NULL_TREE;
 
   int mask_index = -1;
   int els_index = -1;
-  slp_tree slp_op = NULL;
+  slp_tree mask_node = NULL;
   slp_tree els_op = NULL;
   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
     {
@@ -9349,7 +9376,7 @@ vectorizable_load (vec_info *vinfo,
 		    (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
       if (mask_index >= 0
 	  && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
-				      &mask, &slp_op, &mask_dt, &mask_vectype))
+				      &mask_node, &mask_dt, &mask_vectype))
 	return false;
 
       els_index = internal_fn_else_index (ifn);
@@ -9424,20 +9451,23 @@ vectorizable_load (vec_info *vinfo,
   else
     group_size = 1;
 
-  vect_memory_access_type memory_access_type;
-  enum dr_alignment_support alignment_support_scheme;
-  int misalignment;
-  poly_int64 poffset;
-  internal_fn lanes_ifn;
-  auto_vec<int> elsvals;
-  int maskload_elsval = 0;
-  bool need_zeroing = false;
-  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
-			    1, &memory_access_type, &poffset,
-			    &alignment_support_scheme, &misalignment, &gs_info,
-			    &lanes_ifn, &elsvals))
+  vect_load_store_data _ls_data;
+  vect_load_store_data &ls = slp_node->get_data (_ls_data);
+  if (cost_vec
+      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
+			       VLS_LOAD, &ls))
     return false;
+  /* Temporary aliases to analysis data, should not be modified through
+     these.  */
+  const vect_memory_access_type memory_access_type = ls.memory_access_type;
+  const dr_alignment_support alignment_support_scheme
+    = ls.alignment_support_scheme;
+  const int misalignment = ls.misalignment;
+  const poly_int64 poffset = ls.poffset;
+  const vec<int> &elsvals = ls.elsvals;
 
+  int maskload_elsval = 0;
+  bool need_zeroing = false;
 
   /* We might need to explicitly zero inactive elements if there are
      padding bits in the type that might leak otherwise.
@@ -9447,10 +9477,10 @@ vectorizable_load (vec_info *vinfo,
     = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
 
   /* ???  The following checks should really be part of
-     get_group_load_store_type.  */
+     get_load_store_type.  */
   if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
       && !((memory_access_type == VMAT_ELEMENTWISE
-	    || memory_access_type == VMAT_GATHER_SCATTER)
+	    || mat_gather_scatter_p (memory_access_type))
 	   && SLP_TREE_LANES (slp_node) == 1))
     {
       slp_perm = true;
@@ -9500,7 +9530,7 @@ vectorizable_load (vec_info *vinfo,
       return false;
     }
 
-  if (mask)
+  if (mask_node)
     {
       if (memory_access_type == VMAT_CONTIGUOUS)
 	{
@@ -9508,19 +9538,18 @@ vectorizable_load (vec_info *vinfo,
 	  if (!VECTOR_MODE_P (vec_mode)
 	      || !can_vec_mask_load_store_p (vec_mode,
 					     TYPE_MODE (mask_vectype),
-					     true, NULL, &elsvals))
+					     true, NULL, &ls.elsvals))
 	    return false;
 	}
       else if (memory_access_type != VMAT_LOAD_STORE_LANES
-	       && memory_access_type != VMAT_GATHER_SCATTER)
+	       && !mat_gather_scatter_p (memory_access_type))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			     "unsupported access type for masked load.\n");
 	  return false;
 	}
-      else if (memory_access_type == VMAT_GATHER_SCATTER
-	       && GATHER_SCATTER_EMULATED_P (gs_info))
+      else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9541,8 +9570,8 @@ vectorizable_load (vec_info *vinfo,
 
   if (costing_p) /* transformation not required.  */
     {
-      if (mask
-	  && !vect_maybe_update_slp_op_vectype (slp_op,
+      if (mask_node
+	  && !vect_maybe_update_slp_op_vectype (mask_node,
 						mask_vectype))
 	{
 	  if (dump_enabled_p ())
@@ -9551,18 +9580,15 @@ vectorizable_load (vec_info *vinfo,
 	  return false;
 	}
 
-      SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
-
       if (loop_vinfo
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
 	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
-					      VLS_LOAD, group_size,
-					      memory_access_type, &gs_info,
-					      mask, &elsvals);
+					      VLS_LOAD, group_size, &ls,
+					      mask_node, &ls.elsvals);
 
       if (dump_enabled_p ()
 	  && memory_access_type != VMAT_ELEMENTWISE
-	  && memory_access_type != VMAT_GATHER_SCATTER
+	  && !mat_gather_scatter_p (memory_access_type)
 	  && memory_access_type != VMAT_STRIDED_SLP
 	  && memory_access_type != VMAT_INVARIANT
 	  && alignment_support_scheme != dr_aligned)
@@ -9572,17 +9598,8 @@ vectorizable_load (vec_info *vinfo,
       if (memory_access_type == VMAT_LOAD_STORE_LANES)
 	vinfo->any_known_not_updated_vssa = true;
 
-      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-    }
-  else
-    {
-      /* Here just get the else values.  */
-      if (loop_vinfo
-	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
-					      VLS_LOAD, group_size,
-					      memory_access_type, &gs_info,
-					      mask, &elsvals);
+      SLP_TREE_TYPE (slp_node) = load_vec_info_type;
+      slp_node->data = new vect_load_store_data (std::move (ls));
     }
 
   /* If the type needs padding we must zero inactive elements.
@@ -9605,8 +9622,6 @@ vectorizable_load (vec_info *vinfo,
   if (elsvals.length ())
     maskload_elsval = *elsvals.begin ();
 
-  gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
-
   if (dump_enabled_p () && !costing_p)
     dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
 
@@ -9617,7 +9632,7 @@ vectorizable_load (vec_info *vinfo,
 
   if (memory_access_type == VMAT_INVARIANT)
     {
-      gcc_assert (!grouped_load && !mask && !bb_vinfo);
+      gcc_assert (!grouped_load && !mask_node && !bb_vinfo);
       /* If we have versioned for aliasing or the loop doesn't
 	 have any data dependencies that would preclude this,
 	 then we are sure this is a loop invariant load and
@@ -9775,6 +9790,13 @@ vectorizable_load (vec_info *vinfo,
       tree ltype = TREE_TYPE (vectype);
       tree lvectype = vectype;
       auto_vec<tree> dr_chain;
+      /* ???  Modify local copies of alignment_support_scheme and
+	 misalignment, but this part of analysis should be done
+	 earlier and remembered, likewise the chosen load mode.  */
+      const dr_alignment_support tem = alignment_support_scheme;
+      dr_alignment_support alignment_support_scheme = tem;
+      const int tem2 = misalignment;
+      int misalignment = tem2;
       if (memory_access_type == VMAT_STRIDED_SLP)
 	{
 	  HOST_WIDE_INT n = gcd (group_size, const_nunits);
@@ -9998,7 +10020,7 @@ vectorizable_load (vec_info *vinfo,
       return true;
     }
 
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (mat_gather_scatter_p (memory_access_type))
     grouped_load = false;
 
   if (grouped_load
@@ -10092,9 +10114,9 @@ vectorizable_load (vec_info *vinfo,
      dr_aligned or dr_unaligned_supported for (non-length) masked
      operations.  */
   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
-	       && !mask
+	       && !mask_node
 	       && !loop_masks)
-	      || memory_access_type == VMAT_GATHER_SCATTER
+	      || mat_gather_scatter_p (memory_access_type)
 	      || alignment_support_scheme == dr_aligned
 	      || alignment_support_scheme == dr_unaligned_supported);
 
@@ -10211,33 +10233,10 @@ vectorizable_load (vec_info *vinfo,
 
   tree bump;
   tree vec_offset = NULL_TREE;
-  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-    {
-      aggr_type = NULL_TREE;
-      bump = NULL_TREE;
-    }
-  else if (memory_access_type == VMAT_GATHER_SCATTER)
-    {
-      aggr_type = elem_type;
-      if (!costing_p)
-	vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
-					 gsi, &gs_info,
-					 &bump, &vec_offset, loop_lens);
-    }
-  else
-    {
-      if (memory_access_type == VMAT_LOAD_STORE_LANES)
-	aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
-      else
-	aggr_type = vectype;
-      if (!costing_p)
-	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
-					    memory_access_type, loop_lens);
-    }
 
   auto_vec<tree> vec_offsets;
   auto_vec<tree> vec_masks;
-  if (mask && !costing_p)
+  if (mask_node && !costing_p)
     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
 		       &vec_masks);
 
@@ -10245,9 +10244,16 @@ vectorizable_load (vec_info *vinfo,
   tree vec_els = NULL_TREE;
   if (memory_access_type == VMAT_LOAD_STORE_LANES)
     {
+      const internal_fn lanes_ifn = ls.lanes_ifn;
+
       gcc_assert (alignment_support_scheme == dr_aligned
 		  || alignment_support_scheme == dr_unaligned_supported);
 
+      aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
+      if (!costing_p)
+	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+					    memory_access_type, loop_lens);
+
       unsigned int inside_cost = 0, prologue_cost = 0;
       /* For costing some adjacent vector loads, we'd like to cost with
 	 the total number of them once instead of cost each one by one. */
@@ -10300,7 +10306,7 @@ vectorizable_load (vec_info *vinfo,
 	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
 					     stmt_info, bump);
 	    }
-	  if (mask)
+	  if (mask_node)
 	    vec_mask = vec_masks[j];
 
 	  tree vec_array = create_vector_array (vectype, group_size);
@@ -10405,25 +10411,37 @@ vectorizable_load (vec_info *vinfo,
       return true;
     }
 
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (mat_gather_scatter_p (memory_access_type))
     {
       gcc_assert (!grouped_load && !slp_perm);
 
-      unsigned int inside_cost = 0, prologue_cost = 0;
-
       /* 1. Create the vector or array pointer update chain.  */
-      if (!costing_p)
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 	{
-	  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-	    vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr,
+	  aggr_type = NULL_TREE;
+	  bump = NULL_TREE;
+	  if (!costing_p)
+	    vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
 					 &vec_offsets);
-	  else
-	    dataref_ptr
-	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-					  at_loop, offset, &dummy, gsi,
-					  &ptr_incr, false, bump);
+	}
+      else
+	{
+	  aggr_type = elem_type;
+	  if (!costing_p)
+	    {
+	      vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
+					       ls.strided_offset_vectype,
+					       loop_vinfo, gsi,
+					       &bump, &vec_offset, loop_lens);
+	      dataref_ptr
+		  = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+					      at_loop, offset, &dummy, gsi,
+					      &ptr_incr, false, bump);
+	    }
 	}
 
+      unsigned int inside_cost = 0, prologue_cost = 0;
+
       gimple *new_stmt = NULL;
       for (i = 0; i < vec_num; i++)
 	{
@@ -10432,7 +10450,7 @@ vectorizable_load (vec_info *vinfo,
 	  tree bias = NULL_TREE;
 	  if (!costing_p)
 	    {
-	      if (mask)
+	      if (mask_node)
 		vec_mask = vec_masks[i];
 	      if (loop_masks)
 		final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
@@ -10447,8 +10465,9 @@ vectorizable_load (vec_info *vinfo,
 	    }
 
 	  /* 2. Create the vector-load in the loop.  */
-	  unsigned HOST_WIDE_INT align;
-	  if (GATHER_SCATTER_IFN_P (gs_info))
+	  unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
+	  tree alias_align_ptr = build_int_cst (ref_type, align);
+	  if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
 	    {
 	      if (costing_p)
 		{
@@ -10461,9 +10480,9 @@ vectorizable_load (vec_info *vinfo,
 	      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 		vec_offset = vec_offsets[i];
 	      tree zero = build_zero_cst (vectype);
-	      tree scale = size_int (gs_info.scale);
+	      tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
 
-	      if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+	      if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
 		{
 		  if (loop_lens)
 		    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -10495,7 +10514,7 @@ vectorizable_load (vec_info *vinfo,
 		  if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
 		    call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
 						       9, dataref_ptr,
-						       gs_info.alias_ptr,
+						       alias_align_ptr,
 						       vec_offset, scale, zero,
 						       final_mask, vec_els,
 						       final_len, bias);
@@ -10511,19 +10530,19 @@ vectorizable_load (vec_info *vinfo,
 	      else if (final_mask)
 		call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
 						   7, dataref_ptr,
-						   gs_info.alias_ptr,
+						   alias_align_ptr,
 						   vec_offset, scale,
 						   zero, final_mask, vec_els);
 	      else
 		call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
 						   dataref_ptr,
-						   gs_info.alias_ptr,
+						   alias_align_ptr,
 						   vec_offset, scale, zero);
 	      gimple_call_set_nothrow (call, true);
 	      new_stmt = call;
 	      data_ref = NULL_TREE;
 	    }
-	  else if (GATHER_SCATTER_LEGACY_P (gs_info))
+	  else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
 	    {
 	      /* The builtin decls path for gather is legacy, x86 only.  */
 	      gcc_assert (!final_len && nunits.is_constant ());
@@ -10535,13 +10554,14 @@ vectorizable_load (vec_info *vinfo,
 					slp_node, 0, vect_body);
 		  continue;
 		}
-	      poly_uint64 offset_nunits
-		  = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+	      tree offset_vectype = TREE_TYPE (vec_offsets[0]);
+	      poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
 	      if (known_eq (nunits, offset_nunits))
 		{
 		  new_stmt = vect_build_one_gather_load_call
-			       (vinfo, stmt_info, vectype, gsi, &gs_info,
-				dataref_ptr, vec_offsets[i], final_mask);
+			       (vinfo, stmt_info, slp_node, vectype, gsi,
+				ls.gs.decl, dataref_ptr, vec_offsets[i],
+				final_mask);
 		  data_ref = NULL_TREE;
 		}
 	      else if (known_eq (nunits, offset_nunits * 2))
@@ -10550,8 +10570,9 @@ vectorizable_load (vec_info *vinfo,
 		     lanes but the builtins will produce full vectype
 		     data with just the lower lanes filled.  */
 		  new_stmt = vect_build_one_gather_load_call
-			       (vinfo, stmt_info, vectype, gsi, &gs_info,
-				dataref_ptr, vec_offsets[2 * i], final_mask);
+			       (vinfo, stmt_info, slp_node, vectype, gsi,
+				ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
+				final_mask);
 		  tree low = make_ssa_name (vectype);
 		  gimple_set_lhs (new_stmt, low);
 		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
@@ -10582,15 +10603,15 @@ vectorizable_load (vec_info *vinfo,
 						      VEC_UNPACK_HI_EXPR,
 						      final_mask);
 		      final_mask = make_ssa_name
-				    (truth_type_for (gs_info.offset_vectype));
+				    (truth_type_for (offset_vectype));
 		      gimple_set_lhs (new_stmt, final_mask);
 		      vect_finish_stmt_generation (vinfo, stmt_info,
 						   new_stmt, gsi);
 		    }
 
 		  new_stmt = vect_build_one_gather_load_call
-			       (vinfo, stmt_info, vectype, gsi, &gs_info,
-				dataref_ptr,
+			       (vinfo, stmt_info, slp_node, vectype, gsi,
+				ls.gs.decl, dataref_ptr,
 				vec_offsets[2 * i + 1], final_mask);
 		  tree high = make_ssa_name (vectype);
 		  gimple_set_lhs (new_stmt, high);
@@ -10633,7 +10654,8 @@ vectorizable_load (vec_info *vinfo,
 						   new_stmt, gsi);
 		    }
 		  new_stmt = vect_build_one_gather_load_call
-			       (vinfo, stmt_info, vectype, gsi, &gs_info,
+			       (vinfo, stmt_info, slp_node, vectype, gsi,
+				ls.gs.decl,
 				dataref_ptr, vec_offset, final_mask);
 		  data_ref = NULL_TREE;
 		}
@@ -10662,8 +10684,9 @@ vectorizable_load (vec_info *vinfo,
 					slp_node, 0, vect_body);
 		  continue;
 		}
+	      tree offset_vectype = TREE_TYPE (vec_offsets[0]);
 	      unsigned HOST_WIDE_INT const_offset_nunits
-		= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant ();
+		= TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
 	      vec<constructor_elt, va_gc> *ctor_elts;
 	      vec_alloc (ctor_elts, const_nunits);
 	      gimple_seq stmts = NULL;
@@ -10674,8 +10697,7 @@ vectorizable_load (vec_info *vinfo,
 	      vec_offset = vec_offsets[i / factor];
 	      unsigned elt_offset = (i % factor) * const_nunits;
 	      tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
-	      tree scale = size_int (gs_info.scale);
-	      align = get_object_alignment (DR_REF (first_dr_info->dr));
+	      tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
 	      tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
 	      for (unsigned k = 0; k < const_nunits; ++k)
 		{
@@ -10744,6 +10766,11 @@ vectorizable_load (vec_info *vinfo,
       return true;
     }
 
+  aggr_type = vectype;
+  if (!costing_p)
+    bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+					memory_access_type, loop_lens);
+
   poly_uint64 group_elt = 0;
   unsigned int inside_cost = 0, prologue_cost = 0;
   /* For costing some adjacent vector loads, we'd like to cost with
@@ -10823,7 +10850,7 @@ vectorizable_load (vec_info *vinfo,
 
       if (!costing_p)
 	{
-	  if (mask)
+	  if (mask_node)
 	    vec_mask = vec_masks[i];
 	  if (loop_masks)
 	    final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
@@ -11536,20 +11563,24 @@ vectorizable_condition (vec_info *vinfo,
   if (code != COND_EXPR)
     return false;
 
-  stmt_vec_info reduc_info = NULL;
-  int reduc_index = -1;
+  int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
-  bool for_reduction
-    = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
+  bool nested_cycle_p = false;
+  bool for_reduction = vect_is_reduction (stmt_info);
   if (for_reduction)
     {
       if (SLP_TREE_LANES (slp_node) > 1)
 	return false;
-      reduc_info = info_for_reduction (vinfo, stmt_info);
-      reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
-      reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
-      gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
-		  || reduc_index != -1);
+      /* ???  With a reduction path we do not get at the reduction info from
+	 every stmt, use the conservative default setting then.  */
+      if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
+	{
+	  vect_reduc_info reduc_info
+	    = info_for_reduction (loop_vinfo, slp_node);
+	  reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
+	  nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+						  stmt_info);
+	}
     }
   else
     {
@@ -11739,7 +11770,7 @@ vectorizable_condition (vec_info *vinfo,
 				       vec_num, vectype, NULL);
 	    }
 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
-	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
+	  else if (!nested_cycle_p)
 	    {
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11749,7 +11780,7 @@ vectorizable_condition (vec_info *vinfo,
 	    }
 	}
 
-      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+      SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
       return true;
     }
@@ -12256,7 +12287,7 @@ vectorizable_comparison (vec_info *vinfo,
     return false;
 
   if (cost_vec)
-    STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+    SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
 
   return true;
 }
@@ -12265,13 +12296,11 @@ vectorizable_comparison (vec_info *vinfo,
    vectorization.  */
 
 bool
-vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
+vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
 			 gimple_stmt_iterator *gsi,
 			 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
 {
-  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-  if (!loop_vinfo
-      || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
+  if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
     return false;
 
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
@@ -12336,7 +12365,7 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
 	  return false;
 	}
 
-      if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+      if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi,
 				      slp_node, cost_vec))
 	return false;
 
@@ -12573,19 +12602,19 @@ vect_analyze_stmt (vec_info *vinfo,
     }
 
   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
-  STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
+  STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
 
   if (STMT_VINFO_RELEVANT_P (stmt_info))
     {
       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
-      gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
+      gcc_assert (SLP_TREE_VECTYPE (node)
 		  || gimple_code (stmt_info->stmt) == GIMPLE_COND
 		  || (call && gimple_call_lhs (call) == NULL_TREE));
     }
 
   ok = true;
-  if (!bb_vinfo
-      && (STMT_VINFO_RELEVANT_P (stmt_info)
+  if (bb_vinfo
+      || (STMT_VINFO_RELEVANT_P (stmt_info)
 	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
        -mveclibabi= takes preference over library functions with
@@ -12593,58 +12622,31 @@ vect_analyze_stmt (vec_info *vinfo,
     ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
 	  || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node,
 					   cost_vec)
-	  || vectorizable_conversion (vinfo, stmt_info,
-				      NULL, node, cost_vec)
-	  || vectorizable_operation (vinfo, stmt_info,
-				     NULL, node, cost_vec)
-	  || vectorizable_assignment (vinfo, stmt_info,
-				      NULL, node, cost_vec)
+	  || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec)
+	  || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec)
+	  || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec)
 	  || vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec)
 	  || vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec)
-	  || vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
-					 stmt_info, node, cost_vec)
-	  || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
-				     node, node_instance, cost_vec)
-	  || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
-				     node, cost_vec)
 	  || vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec)
-	  || vectorizable_condition (vinfo, stmt_info,
-				     NULL, node, cost_vec)
-	  || vectorizable_comparison (vinfo, stmt_info, NULL, node,
-				      cost_vec)
-	  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
-				  stmt_info, node)
-	  || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
-				   stmt_info, node, cost_vec)
-	  || vectorizable_early_exit (vinfo, stmt_info, NULL, node,
-				      cost_vec));
-  else
-    {
-      if (bb_vinfo)
-	ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
-	      || vectorizable_simd_clone_call (vinfo, stmt_info,
-					       NULL, node, cost_vec)
-	      || vectorizable_conversion (vinfo, stmt_info, NULL, node,
-					  cost_vec)
-	      || vectorizable_shift (vinfo, stmt_info,
-				     NULL, node, cost_vec)
-	      || vectorizable_operation (vinfo, stmt_info,
-					 NULL, node, cost_vec)
-	      || vectorizable_assignment (vinfo, stmt_info, NULL, node,
-					  cost_vec)
-	      || vectorizable_load (vinfo, stmt_info,
-				    NULL, node, cost_vec)
-	      || vectorizable_store (vinfo, stmt_info,
-				     NULL, node, cost_vec)
-	      || vectorizable_condition (vinfo, stmt_info,
-					 NULL, node, cost_vec)
-	      || vectorizable_comparison (vinfo, stmt_info, NULL, node,
-					  cost_vec)
-	      || vectorizable_phi (vinfo, stmt_info, node, cost_vec)
-	      || vectorizable_early_exit (vinfo, stmt_info, NULL, node,
-					  cost_vec));
-
-    }
+	  || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec)
+	  || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec)
+	  || (bb_vinfo
+	      && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec))
+	  || (is_a <loop_vec_info> (vinfo)
+	      && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
+					      stmt_info, node, cost_vec)
+		  || vectorizable_reduction (as_a <loop_vec_info> (vinfo),
+					     stmt_info,
+					     node, node_instance, cost_vec)
+		  || vectorizable_induction (as_a <loop_vec_info> (vinfo),
+					     stmt_info, node, cost_vec)
+		  || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+					  stmt_info, node)
+		  || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
+					  stmt_info, node, cost_vec)
+		  || vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
+					      stmt_info, NULL, node,
+					      cost_vec))));
 
   STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
 
@@ -12657,10 +12659,10 @@ vect_analyze_stmt (vec_info *vinfo,
   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
       need extra handling, except for vectorizable reductions.  */
   if (!bb_vinfo
-      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
-      && (STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
-	  || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
-      && (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+      && SLP_TREE_TYPE (node) != reduc_vec_info_type
+      && (SLP_TREE_TYPE (node) != lc_phi_info_type
+	  || SLP_TREE_DEF_TYPE (node) == vect_internal_def)
+      && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node))
       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
 				    node, node_instance,
 				    false, cost_vec))
@@ -12691,10 +12693,9 @@ vect_transform_stmt (vec_info *vinfo,
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "------>vectorizing statement: %G", stmt_info->stmt);
 
-  tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
-  STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
+  STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
 
-  switch (STMT_VINFO_TYPE (stmt_info))
+  switch (SLP_TREE_TYPE (slp_node))
     {
     case type_demotion_vec_info_type:
     case type_promotion_vec_info_type:
@@ -12791,12 +12792,14 @@ vect_transform_stmt (vec_info *vinfo,
       break;
 
     case phi_info_type:
-      done = vectorizable_phi (vinfo, stmt_info, slp_node, NULL);
+      done = vectorizable_phi (as_a <bb_vec_info> (vinfo),
+			       stmt_info, slp_node, NULL);
       gcc_assert (done);
       break;
 
     case loop_exit_ctrl_vec_info_type:
-      done = vectorizable_early_exit (vinfo, stmt_info, gsi, slp_node, NULL);
+      done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
+				      stmt_info, gsi, slp_node, NULL);
       gcc_assert (done);
       break;
 
@@ -12811,9 +12814,8 @@ vect_transform_stmt (vec_info *vinfo,
       done = true;
     }
 
-  if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type
-      && (!slp_node->ldst_lanes
-	  || SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR))
+  if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
+      && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node)))
     {
       /* Handle stmts whose DEF is used outside the loop-nest that is
 	 being vectorized.  */
@@ -12822,8 +12824,6 @@ vect_transform_stmt (vec_info *vinfo,
       gcc_assert (done);
     }
 
-  STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
-
   return is_store;
 }
 
@@ -13285,7 +13285,7 @@ vect_is_simple_use (vec_info *vinfo, slp_tree slp_node,
 	}
       else
 	{
-	  gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
+	  gcc_assert (SLP_TREE_PERMUTE_P (child));
 	  *op = error_mark_node;
 	  *dt = vect_internal_def;
 	  if (def_stmt_info_out)
author	Jerry DeLisle <jvdelisle@gcc.gnu.org>	2025-09-02 15:58:26 -0700
committer	Jerry DeLisle <jvdelisle@gcc.gnu.org>	2025-09-02 15:58:26 -0700
commit	071b4126c613881f4cb25b4e5c39032964827f88 (patch)
tree	7ed805786566918630d1d617b1ed8f7310f5fd8e /gcc/tree-vect-stmts.cc
parent	845d23f3ea08ba873197c275a8857eee7edad996 (diff)
parent	caa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff)
download	gcc-devel/gfortran-test.zip gcc-devel/gfortran-test.tar.gz gcc-devel/gfortran-test.tar.bz2