aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.cc
diff options
context:
space:
mode:
authorJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
committerJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
commit071b4126c613881f4cb25b4e5c39032964827f88 (patch)
tree7ed805786566918630d1d617b1ed8f7310f5fd8e /gcc/tree-vect-stmts.cc
parent845d23f3ea08ba873197c275a8857eee7edad996 (diff)
parentcaa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff)
downloadgcc-devel/gfortran-test.zip
gcc-devel/gfortran-test.tar.gz
gcc-devel/gfortran-test.tar.bz2
Merge branch 'master' into gfortran-testdevel/gfortran-test
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r--gcc/tree-vect-stmts.cc1180
1 files changed, 590 insertions, 590 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index aa2657a..5b1f291 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -386,6 +386,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"vec_stmt_relevant_p: stmt has vdefs.\n");
*relevant = vect_used_in_scope;
+ if (! STMT_VINFO_DATA_REF (stmt_info)
+ && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
+ LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
}
/* uses outside the loop. */
@@ -414,7 +417,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
/* Check if it's a not live PHI and multiple exits. In this case
there will be a usage later on after peeling which is needed for the
- alternate exit. */
+ alternate exit.
+ ??? Unless the PHI was marked live because of early
+ break, which also needs the latch def live and vectorized. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
&& is_a <gphi *> (stmt)
&& gimple_bb (stmt) == LOOP_VINFO_LOOP (loop_vinfo)->header
@@ -652,14 +657,15 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
}
/* We are also not interested in uses on loop PHI backedges that are
inductions. Otherwise we'll needlessly vectorize the IV increment
- and cause hybrid SLP for SLP inductions. Unless the PHI is live
- of course. */
+ and cause hybrid SLP for SLP inductions. */
else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
- && ! STMT_VINFO_LIVE_P (stmt_vinfo)
&& (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
loop_latch_edge (bb->loop_father))
- == use))
+ == use)
+ && (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ || (gimple_bb (stmt_vinfo->stmt)
+ != LOOP_VINFO_LOOP (loop_vinfo)->header)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -667,7 +673,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
return opt_result::success ();
}
-
vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
return opt_result::success ();
}
@@ -719,16 +724,28 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
phi_info->stmt);
if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
- vect_mark_relevant (&worklist, phi_info, relevant, live_p);
+ {
+ if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
+ return opt_result::failure_at
+ (*si, "not vectorized: unhandled relevant PHI: %G", *si);
+ vect_mark_relevant (&worklist, phi_info, relevant, live_p);
+ }
}
- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si))
{
- if (is_gimple_debug (gsi_stmt (si)))
+ gimple *stmt = gsi_stmt (si);
+ if (is_gimple_debug (stmt))
continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
+ stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
- "init: stmt relevant? %G", stmt_info->stmt);
+ "init: stmt relevant? %G", stmt);
+
+ if (gimple_get_lhs (stmt) == NULL_TREE
+ && !is_a <gcond *> (stmt)
+ && !is_a <gcall *> (stmt))
+ return opt_result::failure_at
+ (stmt, "not vectorized: irregular stmt: %G", stmt);
if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
@@ -874,7 +891,9 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
{
gather_scatter_info gs_info;
- if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
+ if (!vect_check_gather_scatter (stmt_vinfo,
+ STMT_VINFO_VECTYPE (stmt_vinfo),
+ loop_vinfo, &gs_info))
gcc_unreachable ();
opt_result res
= process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
@@ -926,8 +945,7 @@ vect_model_simple_cost (vec_info *, int n, slp_tree node,
is true the stmt is doing widening arithmetic. */
static void
-vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
- enum vect_def_type *dt,
+vect_model_promotion_demotion_cost (slp_tree slp_node,
unsigned int ncopies, int pwr,
stmt_vector_for_cost *cost_vec,
bool widen_arith)
@@ -940,16 +958,10 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
inside_cost += record_stmt_cost (cost_vec, ncopies,
widen_arith
? vector_stmt : vec_promote_demote,
- stmt_info, 0, vect_body);
+ slp_node, 0, vect_body);
ncopies *= 2;
}
- /* FORNOW: Assuming maximum 2 args per stmts. */
- for (i = 0; i < 2; i++)
- if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
- prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_prologue);
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_promotion_demotion_cost: inside_cost = %d, "
@@ -1420,16 +1432,33 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
slp_tree slp_node,
vec_load_store_type vls_type,
int group_size,
- vect_memory_access_type
- memory_access_type,
- gather_scatter_info *gs_info,
- tree scalar_mask,
+ vect_load_store_data *ls,
+ slp_tree mask_node,
vec<int> *elsvals = nullptr)
{
+ vect_memory_access_type memory_access_type = ls->memory_access_type;
+
/* Invariant loads need no special support. */
if (memory_access_type == VMAT_INVARIANT)
return;
+ /* Figure whether the mask is uniform. scalar_mask is used to
+ populate the scalar_cond_masked_set. */
+ tree scalar_mask = NULL_TREE;
+ if (mask_node)
+ for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i)
+ {
+ tree def = vect_get_slp_scalar_def (mask_node, i);
+ if (!def
+ || (scalar_mask && def != scalar_mask))
+ {
+ scalar_mask = NULL;
+ break;
+ }
+ else
+ scalar_mask = def;
+ }
+
unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
@@ -1459,7 +1488,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
return;
}
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (mat_gather_scatter_p (memory_access_type))
{
internal_fn ifn = (is_load
? IFN_MASK_GATHER_LOAD
@@ -1467,17 +1496,22 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
internal_fn len_ifn = (is_load
? IFN_MASK_LEN_GATHER_LOAD
: IFN_MASK_LEN_SCATTER_STORE);
+ stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
+ tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
+ ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
+ : ls->strided_offset_vectype);
+ tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
+ int scale = SLP_TREE_GS_SCALE (slp_node);
if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
- gs_info->memory_type,
- gs_info->offset_vectype,
- gs_info->scale,
+ memory_type,
+ off_vectype, scale,
elsvals))
vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
- gs_info->memory_type,
- gs_info->offset_vectype,
- gs_info->scale,
- elsvals))
+ memory_type,
+ off_vectype, scale,
+ elsvals)
+ || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
scalar_mask);
else
@@ -1515,7 +1549,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
}
/* We might load more scalars than we need for permuting SLP loads.
- We checked in get_group_load_store_type that the extra elements
+ We checked in get_load_store_type that the extra elements
don't leak into a new vector. */
auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
{
@@ -1676,7 +1710,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
get_object_alignment (DR_REF (dr)));
gs_info->element_type = TREE_TYPE (vectype);
gs_info->offset = fold_convert (offset_type, step);
- gs_info->offset_dt = vect_constant_def;
gs_info->scale = scale;
gs_info->memory_type = memory_type;
return true;
@@ -1703,19 +1736,33 @@ static bool
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
loop_vec_info loop_vinfo, bool masked_p,
gather_scatter_info *gs_info,
- vec<int> *elsvals)
+ vec<int> *elsvals,
+ unsigned int group_size,
+ bool single_element_p)
{
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
+ if (!vect_check_gather_scatter (stmt_info, vectype,
+ loop_vinfo, gs_info, elsvals)
|| gs_info->ifn == IFN_LAST)
- return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals);
+ {
+ if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
+ masked_p, gs_info, elsvals))
+ return false;
+ }
+ else
+ {
+ tree old_offset_type = TREE_TYPE (gs_info->offset);
+ tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
- tree old_offset_type = TREE_TYPE (gs_info->offset);
- tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
+ gcc_assert (TYPE_PRECISION (new_offset_type)
+ >= TYPE_PRECISION (old_offset_type));
+ gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ }
- gcc_assert (TYPE_PRECISION (new_offset_type)
- >= TYPE_PRECISION (old_offset_type));
- gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
+ if (!single_element_p
+ && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
+ gs_info->scale,
+ group_size))
+ return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1903,38 +1950,46 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
return NULL_TREE;
}
-/* A subroutine of get_load_store_type, with a subset of the same
- arguments. Handle the case where STMT_INFO is part of a grouped load
- or store.
+/* Analyze load or store SLP_NODE of type VLS_TYPE. Return true
+ if there is a memory access type that the vectorized form can use,
+ storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
+ or scatters, fill in GS_INFO accordingly. In addition
+ *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
+ the target does not support the alignment scheme. *MISALIGNMENT
+ is set according to the alignment of the access (including
+ DR_MISALIGNMENT_UNKNOWN when it is unknown).
- For stores, the statements in the group are all consecutive
- and there is no gap at the end. For loads, the statements in the
- group might not be consecutive; there can be gaps between statements
- as well as at the end.
+ MASKED_P is true if the statement is conditional on a vectorized mask.
+ VECTYPE is the vector type that the vectorized statements will use.
- If we can use gather/scatter and ELSVALS is nonzero the supported
- else values will be stored in the vector ELSVALS points to.
-*/
+ If ELSVALS is nonzero the supported else values will be stored in the
+ vector ELSVALS points to. */
static bool
-get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
- tree vectype, slp_tree slp_node,
- bool masked_p, vec_load_store_type vls_type,
- vect_memory_access_type *memory_access_type,
- poly_int64 *poffset,
- dr_alignment_support *alignment_support_scheme,
- int *misalignment,
- gather_scatter_info *gs_info,
- internal_fn *lanes_ifn,
- vec<int> *elsvals)
+get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree vectype, slp_tree slp_node,
+ bool masked_p, vec_load_store_type vls_type,
+ vect_load_store_data *ls)
{
+ vect_memory_access_type *memory_access_type = &ls->memory_access_type;
+ poly_int64 *poffset = &ls->poffset;
+ dr_alignment_support *alignment_support_scheme
+ = &ls->alignment_support_scheme;
+ int *misalignment = &ls->misalignment;
+ internal_fn *lanes_ifn = &ls->lanes_ifn;
+ vec<int> *elsvals = &ls->elsvals;
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
stmt_vec_info first_stmt_info;
unsigned int group_size;
unsigned HOST_WIDE_INT gap;
bool single_element_p;
poly_int64 neg_ldst_offset = 0;
+
+ *misalignment = DR_MISALIGNMENT_UNKNOWN;
+ *poffset = 0;
+
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
@@ -1951,7 +2006,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
single_element_p = true;
}
dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
- poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
/* True if the vectorized statements would access beyond the last
statement in the group. */
@@ -1977,7 +2031,53 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
separated by the stride, until we have a complete vector.
Fall back to scalar accesses if that isn't possible. */
*memory_access_type = VMAT_STRIDED_SLP;
- else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ {
+ slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
+ tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
+ int scale = SLP_TREE_GS_SCALE (slp_node);
+ tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
+ tree tem;
+ if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
+ masked_p, vectype,
+ memory_type,
+ offset_vectype, scale,
+ &ls->gs.ifn, &tem,
+ elsvals))
+ *memory_access_type = VMAT_GATHER_SCATTER_IFN;
+ else if (vls_type == VLS_LOAD
+ ? (targetm.vectorize.builtin_gather
+ && (ls->gs.decl
+ = targetm.vectorize.builtin_gather (vectype,
+ TREE_TYPE
+ (offset_vectype),
+ scale)))
+ : (targetm.vectorize.builtin_scatter
+ && (ls->gs.decl
+ = targetm.vectorize.builtin_scatter (vectype,
+ TREE_TYPE
+ (offset_vectype),
+ scale))))
+ *memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
+ else
+ {
+ /* GATHER_SCATTER_EMULATED_P. */
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
+ || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ *memory_access_type = VMAT_GATHER_SCATTER_EMULATED;
+ }
+ }
+ else
{
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
@@ -2221,62 +2321,19 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && single_element_p
&& SLP_TREE_LANES (slp_node) == 1
- && loop_vinfo
- && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
- masked_p, gs_info, elsvals))
- *memory_access_type = VMAT_GATHER_SCATTER;
- else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ && loop_vinfo)
{
- tree offset;
- slp_tree offset_node;
- *memory_access_type = VMAT_GATHER_SCATTER;
- if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
- elsvals))
- gcc_unreachable ();
- /* When using internal functions, we rely on pattern recognition
- to convert the type of the offset to the type that the target
- requires, with the result being a call to an internal function.
- If that failed for some reason (e.g. because another pattern
- took priority), just handle cases in which the offset already
- has the right type. */
- else if (GATHER_SCATTER_IFN_P (*gs_info)
- && !is_gimple_call (stmt_info->stmt)
- && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
- TREE_TYPE (gs_info->offset_vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s offset requires a conversion\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node,
- &gs_info->offset_dt,
- &gs_info->offset_vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "%s index use not simple.\n",
- vls_type == VLS_LOAD ? "gather" : "scatter");
- return false;
- }
- else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+ gather_scatter_info gs_info;
+ if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
+ masked_p, &gs_info, elsvals,
+ group_size, single_element_p))
{
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
- || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
- || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
- || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
- (gs_info->offset_vectype),
- TYPE_VECTOR_SUBPARTS (vectype)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported vector types for emulated "
- "gather.\n");
- return false;
- }
+ SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
+ SLP_TREE_GS_BASE (slp_node) = error_mark_node;
+ ls->gs.ifn = gs_info.ifn;
+ ls->strided_offset_vectype = gs_info.offset_vectype;
+ *memory_access_type = VMAT_GATHER_SCATTER_IFN;
}
}
@@ -2285,8 +2342,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
*poffset = neg_ldst_offset;
if (*memory_access_type == VMAT_ELEMENTWISE
- || (*memory_access_type == VMAT_GATHER_SCATTER
- && GATHER_SCATTER_LEGACY_P (*gs_info))
+ || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
|| *memory_access_type == VMAT_STRIDED_SLP
|| *memory_access_type == VMAT_INVARIANT)
{
@@ -2295,7 +2351,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
}
else
{
- if (*memory_access_type == VMAT_GATHER_SCATTER
+ if (mat_gather_scatter_p (*memory_access_type)
&& !first_dr_info)
*misalignment = DR_MISALIGNMENT_UNKNOWN;
else
@@ -2303,27 +2359,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
*alignment_support_scheme
= vect_supportable_dr_alignment
(vinfo, first_dr_info, vectype, *misalignment,
- *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
- }
-
- if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
- {
- /* STMT is the leader of the group. Check the operands of all the
- stmts of the group. */
- stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
- while (next_stmt_info)
- {
- tree op = vect_get_store_rhs (next_stmt_info);
- enum vect_def_type dt;
- if (!vect_is_simple_use (op, vinfo, &dt))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "use not simple.\n");
- return false;
- }
- next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
- }
+ mat_gather_scatter_p (*memory_access_type));
}
if (overrun_p)
@@ -2336,51 +2372,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
}
- return true;
-}
-
-/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
- if there is a memory access type that the vectorized form can use,
- storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
- or scatters, fill in GS_INFO accordingly. In addition
- *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
- the target does not support the alignment scheme. *MISALIGNMENT
- is set according to the alignment of the access (including
- DR_MISALIGNMENT_UNKNOWN when it is unknown).
-
- SLP says whether we're performing SLP rather than loop vectorization.
- MASKED_P is true if the statement is conditional on a vectorized mask.
- VECTYPE is the vector type that the vectorized statements will use.
- NCOPIES is the number of vector statements that will be needed.
-
- If ELSVALS is nonzero the supported else values will be stored in the
- vector ELSVALS points to. */
-
-static bool
-get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
- tree vectype, slp_tree slp_node,
- bool masked_p, vec_load_store_type vls_type,
- unsigned int,
- vect_memory_access_type *memory_access_type,
- poly_int64 *poffset,
- dr_alignment_support *alignment_support_scheme,
- int *misalignment,
- gather_scatter_info *gs_info,
- internal_fn *lanes_ifn,
- vec<int> *elsvals = nullptr)
-{
- loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
- poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- *misalignment = DR_MISALIGNMENT_UNKNOWN;
- *poffset = 0;
- if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
- masked_p,
- vls_type, memory_access_type, poffset,
- alignment_support_scheme,
- misalignment, gs_info, lanes_ifn,
- elsvals))
- return false;
-
if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
&& !nunits.is_constant ())
@@ -2392,7 +2383,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
}
-
/* Checks if all scalar iterations are known to be inbounds. */
bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info));
@@ -2403,7 +2393,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
if (loop_vinfo
&& dr_safe_speculative_read_required (stmt_info)
&& LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && (*memory_access_type == VMAT_GATHER_SCATTER
+ && (mat_gather_scatter_p (*memory_access_type)
|| *memory_access_type == VMAT_STRIDED_SLP))
{
if (dump_enabled_p ())
@@ -2423,75 +2413,31 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
vector iteration or force masking. */
if (dr_safe_speculative_read_required (stmt_info)
&& (*alignment_support_scheme == dr_aligned
- && *memory_access_type != VMAT_GATHER_SCATTER))
+ && !mat_gather_scatter_p (*memory_access_type)))
{
/* We can only peel for loops, of course. */
gcc_checking_assert (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 read_amount
+ = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ read_amount *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+
auto target_alignment
= DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
- unsigned HOST_WIDE_INT target_align;
-
- bool group_aligned = false;
- if (target_alignment.is_constant (&target_align)
- && nunits.is_constant ())
- {
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- auto vectype_size
- = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
- poly_uint64 required_alignment = vf * vectype_size;
- /* If we have a grouped access we require that the alignment be N * elem. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- required_alignment *=
- DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
- if (!multiple_p (target_alignment, required_alignment))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "desired alignment %wu not met. Instead got %wu "
- "for DR alignment at %G",
- required_alignment.to_constant (),
- target_align, STMT_VINFO_STMT (stmt_info));
- return false;
- }
-
- if (!pow2p_hwi (target_align))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "non-power-of-two vector alignment %wd "
- "for DR alignment at %G",
- target_align, STMT_VINFO_STMT (stmt_info));
- return false;
- }
-
- /* For VLA we have to insert a runtime check that the vector loads
- per iterations don't exceed a page size. For now we can use
- POLY_VALUE_MAX as a proxy as we can't peel for VLA. */
- if (known_gt (required_alignment, (unsigned)param_min_pagesize))
+ if (!multiple_p (target_alignment, read_amount))
+ {
+ if (dump_enabled_p ())
{
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "alignment required for correctness (");
- dump_dec (MSG_MISSED_OPTIMIZATION, required_alignment);
- dump_printf (MSG_NOTE, ") may exceed page size\n");
- }
- return false;
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "desired alignment not met, target was ");
+ dump_dec (MSG_NOTE, target_alignment);
+ dump_printf (MSG_NOTE, " previously, but read amount is ");
+ dump_dec (MSG_NOTE, read_amount);
+ dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
}
-
- group_aligned = true;
- }
-
- /* There are multiple loads that have a misalignment that we couldn't
- align. We would need LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P to
- vectorize. */
- if (!group_aligned)
- {
- if (inbounds)
- LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
- else
- return false;
+ return false;
}
/* When using a group access the first element may be aligned but the
@@ -2513,6 +2459,33 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
STMT_VINFO_STMT (stmt_info));
return false;
}
+
+ /* Reject vectorization if we know the read mount per vector iteration
+ exceeds the min page size. */
+ if (known_gt (read_amount, (unsigned) param_min_pagesize))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "alignment required for correctness (");
+ dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
+ dump_printf (MSG_NOTE, ") may exceed page size.\n");
+ }
+ return false;
+ }
+
+ if (!vf.is_constant ())
+ {
+ /* For VLA modes, we need a runtime check to ensure any speculative
+ read amount does not exceed the page size. Here we record the max
+ possible read amount for the check. */
+ if (maybe_gt (read_amount,
+ LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
+ LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
+
+ /* For VLA modes, we must use partial vectors. */
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ }
}
if (*alignment_support_scheme == dr_unaligned_unsupported)
@@ -2526,9 +2499,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
/* FIXME: At the moment the cost model seems to underestimate the
cost of using elementwise accesses. This check preserves the
traditional behavior until that can be fixed. */
- stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- if (!first_stmt_info)
- first_stmt_info = stmt_info;
if (*memory_access_type == VMAT_ELEMENTWISE
&& !STMT_VINFO_STRIDED_P (first_stmt_info)
&& !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
@@ -2545,21 +2515,21 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
/* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
conditional operation STMT_INFO. When returning true, store the mask
- in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
- vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
- to the mask in *MASK_NODE if MASK_NODE is not NULL. */
+ in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of
+ the vectorized mask in *MASK_VECTYPE_OUT. */
static bool
vect_check_scalar_mask (vec_info *vinfo,
slp_tree slp_node, unsigned mask_index,
- tree *mask, slp_tree *mask_node,
+ slp_tree *mask_node,
vect_def_type *mask_dt_out, tree *mask_vectype_out)
{
enum vect_def_type mask_dt;
tree mask_vectype;
slp_tree mask_node_1;
+ tree mask_;
if (!vect_is_simple_use (vinfo, slp_node, mask_index,
- mask, &mask_node_1, &mask_dt, &mask_vectype))
+ &mask_, &mask_node_1, &mask_dt, &mask_vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2568,7 +2538,7 @@ vect_check_scalar_mask (vec_info *vinfo,
}
if ((mask_dt == vect_constant_def || mask_dt == vect_external_def)
- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
+ && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2576,17 +2546,6 @@ vect_check_scalar_mask (vec_info *vinfo,
return false;
}
- /* If the caller is not prepared for adjusting an external/constant
- SLP mask vector type fail. */
- if (!mask_node
- && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP mask argument is not vectorized.\n");
- return false;
- }
-
tree vectype = SLP_TREE_VECTYPE (slp_node);
if (!mask_vectype)
mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
@@ -2614,11 +2573,11 @@ vect_check_scalar_mask (vec_info *vinfo,
*mask_dt_out = mask_dt;
*mask_vectype_out = mask_vectype;
- if (mask_node)
- *mask_node = mask_node_1;
+ *mask_node = mask_node_1;
return true;
}
+
/* Return true if stored value is suitable for vectorizing store
statement STMT_INFO. When returning true, store the scalar stored
in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
@@ -2627,7 +2586,7 @@ vect_check_scalar_mask (vec_info *vinfo,
static bool
vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
- slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
+ slp_tree slp_node, slp_tree *rhs_node,
vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
vec_load_store_type *vls_type_out)
{
@@ -2643,8 +2602,9 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
enum vect_def_type rhs_dt;
tree rhs_vectype;
+ tree rhs;
if (!vect_is_simple_use (vinfo, slp_node, op_no,
- rhs, rhs_node, &rhs_dt, &rhs_vectype))
+ &rhs, rhs_node, &rhs_dt, &rhs_vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2655,7 +2615,7 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
/* In the case this is a store from a constant make sure
native_encode_expr can handle it. */
if (rhs_dt == vect_constant_def
- && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
+ && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2769,13 +2729,12 @@ vect_get_mask_load_else (int elsval, tree type)
static gimple *
vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
- tree vectype,
- gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
+ slp_tree slp_node, tree vectype,
+ gimple_stmt_iterator *gsi, tree decl,
tree ptr, tree offset, tree mask)
{
- tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
- tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
+ tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
+ tree rettype = TREE_TYPE (TREE_TYPE (decl));
tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
/* ptrtype */ arglist = TREE_CHAIN (arglist);
tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
@@ -2841,8 +2800,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
}
- tree scale = build_int_cst (scaletype, gs_info->scale);
- gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
+ tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
+ gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
mask_op, scale);
if (!useless_type_conversion_p (vectype, rettype))
@@ -2868,12 +2827,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
static gimple *
vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
+ slp_tree slp_node,
gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
+ tree decl,
tree ptr, tree offset, tree oprnd, tree mask)
{
- tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
- tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
+ tree rettype = TREE_TYPE (TREE_TYPE (decl));
+ tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
/* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
@@ -2937,9 +2897,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
op = var;
}
- tree scale = build_int_cst (scaletype, gs_info->scale);
+ tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
gcall *new_stmt
- = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
+ = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
return new_stmt;
}
@@ -2950,12 +2910,12 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
containing loop. */
static void
-vect_get_gather_scatter_ops (class loop *loop,
- slp_tree slp_node, gather_scatter_info *gs_info,
+vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
tree *dataref_ptr, vec<tree> *vec_offset)
{
gimple_seq stmts = NULL;
- *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
+ *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
+ &stmts, true, NULL_TREE);
if (stmts != NULL)
{
basic_block new_bb;
@@ -2976,10 +2936,10 @@ vect_get_gather_scatter_ops (class loop *loop,
I * DR_STEP / SCALE. */
static void
-vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
+vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
+ tree vectype, tree offset_vectype,
loop_vec_info loop_vinfo,
gimple_stmt_iterator *gsi,
- gather_scatter_info *gs_info,
tree *dataref_bump, tree *vec_offset,
vec_loop_lens *loop_lens)
{
@@ -3020,15 +2980,15 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype,
/* The offset given in GS_INFO can have pointer type, so use the element
type of the vector instead. */
- tree offset_type = TREE_TYPE (gs_info->offset_vectype);
+ tree offset_type = TREE_TYPE (offset_vectype);
/* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
- ssize_int (gs_info->scale));
+ ssize_int (SLP_TREE_GS_SCALE (node)));
step = fold_convert (offset_type, step);
/* Create {0, X, X*2, X*3, ...}. */
- tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
+ tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
build_zero_cst (offset_type), step);
*vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
}
@@ -3051,7 +3011,7 @@ vect_get_loop_variant_data_ptr_increment (
tree step = vect_dr_behavior (vinfo, dr_info)->step;
/* gather/scatter never reach here. */
- gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
+ gcc_assert (!mat_gather_scatter_p (memory_access_type));
/* When we support SELECT_VL pattern, we dynamic adjust
the memory address by .SELECT_VL result.
@@ -3158,13 +3118,13 @@ vectorizable_bswap (vec_info *vinfo,
return false;
}
- STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_bswap");
record_stmt_cost (cost_vec,
- 1, vector_stmt, stmt_info, 0, vect_prologue);
+ 1, vector_stmt, slp_node, 0, vect_prologue);
record_stmt_cost (cost_vec,
SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
- vec_perm, stmt_info, 0, vect_body);
+ vec_perm, slp_node, 0, vect_body);
return true;
}
@@ -3312,14 +3272,20 @@ vectorizable_call (vec_info *vinfo,
int mask_opno = -1;
if (internal_fn_p (cfn))
- mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ {
+ /* We can only handle direct internal masked calls here,
+ vectorizable_simd_clone_call is for the rest. */
+ if (cfn == CFN_MASK_CALL)
+ return false;
+ mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+ }
for (i = 0; i < nargs; i++)
{
if ((int) i == mask_opno)
{
if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno,
- &op, &slp_op[i], &dt[i], &vectypes[i]))
+ &slp_op[i], &dt[i], &vectypes[i]))
return false;
continue;
}
@@ -3469,7 +3435,7 @@ vectorizable_call (vec_info *vinfo,
}
}
- int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
internal_fn cond_fn = get_conditional_internal_fn (ifn);
internal_fn cond_len_fn = get_len_internal_fn (ifn);
int len_opno = internal_fn_len_index (cond_len_fn);
@@ -3487,7 +3453,7 @@ vectorizable_call (vec_info *vinfo,
"incompatible vector types for invariants\n");
return false;
}
- STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_call");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
@@ -3884,9 +3850,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
if (nargs == 0)
return false;
- vec<tree>& simd_clone_info = SLP_TREE_SIMD_CLONE_INFO (slp_node);
- if (cost_vec)
- simd_clone_info.truncate (0);
+ vect_simd_clone_data _data;
+ vect_simd_clone_data &data = slp_node->get_data (_data);
+ vec<tree>& simd_clone_info = data.simd_clone_info;
arginfo.reserve (nargs, true);
auto_vec<slp_tree> slp_op;
slp_op.safe_grow_cleared (nargs);
@@ -4282,7 +4248,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
+ slp_node->data = new vect_simd_clone_data (std::move (_data));
DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
/* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */
return true;
@@ -4799,7 +4766,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
}
- SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
+ if (gimple_get_lhs (new_stmt))
+ SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
}
for (i = 0; i < nargs; ++i)
@@ -5427,28 +5395,28 @@ vectorizable_conversion (vec_info *vinfo,
DUMP_VECT_SCOPE ("vectorizable_conversion");
if (modifier == NONE)
{
- STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
slp_node, cost_vec);
}
else if (modifier == NARROW_SRC || modifier == NARROW_DST)
{
- STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
/* The final packing step produces one vector result per copy. */
unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+ vect_model_promotion_demotion_cost (slp_node, nvectors,
multi_step_cvt, cost_vec,
widen_arith);
}
else
{
- STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
/* The initial unpacking step produces two vector results
per copy. MULTI_STEP_CVT is 0 for a single conversion,
so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
unsigned int nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt;
- vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+ vect_model_promotion_demotion_cost (slp_node, nvectors,
multi_step_cvt, cost_vec,
widen_arith);
}
@@ -5777,7 +5745,7 @@ vectorizable_assignment (vec_info *vinfo,
"incompatible vector types for invariants\n");
return false;
}
- STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_assignment");
if (!vect_nop_conversion_p (stmt_info))
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
@@ -6122,7 +6090,7 @@ vectorizable_shift (vec_info *vinfo,
gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
== INTEGER_CST));
}
- STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_shift");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
return true;
@@ -6487,7 +6455,7 @@ vectorizable_operation (vec_info *vinfo,
using_emulated_vectors_p = true;
}
- int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6541,7 +6509,7 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
- STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
if (using_emulated_vectors_p)
@@ -6599,6 +6567,20 @@ vectorizable_operation (vec_info *vinfo,
vec_dest = vect_create_destination_var (scalar_dest, vectype);
vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
}
+ /* For reduction operations with undefined overflow behavior make sure to
+ pun them to unsigned since we change the order of evaluation.
+ ??? Avoid for in-order reductions? */
+ else if (arith_code_with_undefined_signed_overflow (orig_code)
+ && ANY_INTEGRAL_TYPE_P (vectype)
+ && TYPE_OVERFLOW_UNDEFINED (vectype)
+ && SLP_TREE_REDUC_IDX (slp_node) != -1)
+ {
+ gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
+ || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
+ vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ vectype = unsigned_type_for (vectype);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ }
/* Handle def. */
else
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
@@ -6612,6 +6594,46 @@ vectorizable_operation (vec_info *vinfo,
vop1 = ((op_type == binary_op || op_type == ternary_op)
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
+
+ if (vec_cvt_dest
+ && !useless_type_conversion_p (vectype, TREE_TYPE (vop0)))
+ {
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0);
+ new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ vop0 = new_temp;
+ }
+ if (vop1
+ && vec_cvt_dest
+ && !useless_type_conversion_p (vectype, TREE_TYPE (vop1)))
+ {
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1);
+ new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ vop1 = new_temp;
+ }
+ if (vop2
+ && vec_cvt_dest
+ && !useless_type_conversion_p (vectype, TREE_TYPE (vop2)))
+ {
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2);
+ new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
+ new_temp);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_temp);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ vop2 = new_temp;
+ }
+
if (using_emulated_vectors_p)
{
/* Lower the operation. This follows vector lowering. */
@@ -7082,7 +7104,8 @@ scan_store_can_perm_p (tree vectype, tree init,
static bool
check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
- enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
+ enum vect_def_type rhs_dt, slp_tree slp_node,
+ slp_tree mask_node,
vect_memory_access_type memory_access_type)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -7090,13 +7113,14 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
tree ref_type;
gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
- if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
- || mask
+ if (SLP_TREE_LANES (slp_node) > 1
+ || mask_node
|| memory_access_type != VMAT_CONTIGUOUS
|| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
|| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
|| loop_vinfo == NULL
|| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
|| STMT_VINFO_GROUPED_ACCESS (stmt_info)
|| !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
|| !integer_zerop (DR_INIT (dr_info->dr))
@@ -7773,7 +7797,6 @@ vectorizable_store (vec_info *vinfo,
unsigned int vec_num;
bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
tree aggr_type;
- gather_scatter_info gs_info;
poly_uint64 vf;
vec_load_store_type vls_type;
tree ref_type;
@@ -7787,7 +7810,7 @@ vectorizable_store (vec_info *vinfo,
/* Is vectorizable store? */
- tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+ tree mask_vectype = NULL_TREE;
slp_tree mask_node = NULL;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
@@ -7820,16 +7843,12 @@ vectorizable_store (vec_info *vinfo,
(call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (mask_index >= 0
&& !vect_check_scalar_mask (vinfo, slp_node, mask_index,
- &mask, &mask_node, &mask_dt,
+ &mask_node, &mask_dt,
&mask_vectype))
return false;
}
- /* Cannot have hybrid store SLP -- that would mean storing to the
- same location twice. */
- gcc_assert (PURE_SLP_STMT (stmt_info));
-
- tree vectype = SLP_TREE_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
+ tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE;
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (loop_vinfo)
@@ -7851,10 +7870,9 @@ vectorizable_store (vec_info *vinfo,
return false;
}
- tree op;
slp_tree op_node;
if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
- &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
+ &op_node, &rhs_dt, &rhs_vectype, &vls_type))
return false;
elem_type = TREE_TYPE (vectype);
@@ -7863,16 +7881,19 @@ vectorizable_store (vec_info *vinfo,
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- vect_memory_access_type memory_access_type;
- enum dr_alignment_support alignment_support_scheme;
- int misalignment;
- poly_int64 poffset;
- internal_fn lanes_ifn;
- if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
- 1, &memory_access_type, &poffset,
- &alignment_support_scheme, &misalignment, &gs_info,
- &lanes_ifn))
+ vect_load_store_data _ls_data;
+ vect_load_store_data &ls = slp_node->get_data (_ls_data);
+ if (cost_vec
+ && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
+ vls_type, &_ls_data))
return false;
+ /* Temporary aliases to analysis data, should not be modified through
+ these. */
+ const vect_memory_access_type memory_access_type = ls.memory_access_type;
+ const dr_alignment_support alignment_support_scheme
+ = ls.alignment_support_scheme;
+ const int misalignment = ls.misalignment;
+ const poly_int64 poffset = ls.poffset;
if (slp_node->ldst_lanes
&& memory_access_type != VMAT_LOAD_STORE_LANES)
@@ -7883,7 +7904,7 @@ vectorizable_store (vec_info *vinfo,
return false;
}
- if (mask)
+ if (mask_node)
{
if (memory_access_type == VMAT_CONTIGUOUS)
{
@@ -7893,8 +7914,8 @@ vectorizable_store (vec_info *vinfo,
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
- && (memory_access_type != VMAT_GATHER_SCATTER
- || (GATHER_SCATTER_LEGACY_P (gs_info)
+ && (!mat_gather_scatter_p (memory_access_type)
+ || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
{
if (dump_enabled_p ())
@@ -7902,8 +7923,7 @@ vectorizable_store (vec_info *vinfo,
"unsupported access type for masked store.\n");
return false;
}
- else if (memory_access_type == VMAT_GATHER_SCATTER
- && GATHER_SCATTER_EMULATED_P (gs_info))
+ else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7921,7 +7941,7 @@ vectorizable_store (vec_info *vinfo,
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- && memory_access_type != VMAT_GATHER_SCATTER);
+ && !mat_gather_scatter_p (memory_access_type));
if (grouped_store)
{
first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
@@ -7937,26 +7957,22 @@ vectorizable_store (vec_info *vinfo,
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
{
- if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask,
- memory_access_type))
+ if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node,
+ mask_node, memory_access_type))
return false;
}
bool costing_p = cost_vec;
if (costing_p) /* transformation not required. */
{
- STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
- SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
-
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
- vls_type, group_size,
- memory_access_type, &gs_info,
- mask);
+ vls_type, group_size, &ls,
+ mask_node);
if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
- || (mask
+ || (mask_node
&& !vect_maybe_update_slp_op_vectype (mask_node,
mask_vectype)))
{
@@ -7974,9 +7990,9 @@ vectorizable_store (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"Vectorizing an unaligned access.\n");
- STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = store_vec_info_type;
+ slp_node->data = new vect_load_store_data (std::move (ls));
}
- gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info));
/* Transform. */
@@ -7991,7 +8007,7 @@ vectorizable_store (vec_info *vinfo,
unsigned int inside_cost = 0, prologue_cost = 0;
if (vls_type == VLS_STORE_INVARIANT)
prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ slp_node, 0, vect_prologue);
vect_get_store_cost (vinfo, stmt_info, slp_node, 1,
alignment_support_scheme, misalignment,
&inside_cost, cost_vec);
@@ -8020,7 +8036,6 @@ vectorizable_store (vec_info *vinfo,
gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
|| (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
- op = vect_get_store_rhs (first_stmt_info);
ref_type = get_group_alias_ptr_type (first_stmt_info);
@@ -8072,6 +8087,14 @@ vectorizable_store (vec_info *vinfo,
...
*/
+ /* ??? Modify local copies of alignment_support_scheme and
+ misalignment, but this part of analysis should be done
+ earlier and remembered, likewise the chosen load mode. */
+ const dr_alignment_support tem = alignment_support_scheme;
+ dr_alignment_support alignment_support_scheme = tem;
+ const int tem2 = misalignment;
+ int misalignment = tem2;
+
unsigned nstores = const_nunits;
unsigned lnel = 1;
tree ltype = elem_type;
@@ -8206,7 +8229,7 @@ vectorizable_store (vec_info *vinfo,
unsigned int n_adjacent_stores = 0;
running_off = offvar;
if (!costing_p)
- vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
+ vect_get_slp_defs (op_node, &vec_oprnds);
unsigned int group_el = 0;
unsigned HOST_WIDE_INT elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@@ -8281,7 +8304,7 @@ vectorizable_store (vec_info *vinfo,
else
inside_cost
+= record_stmt_cost (cost_vec, n_adjacent_stores,
- scalar_store, stmt_info, 0, vect_body);
+ scalar_store, slp_node, 0, vect_body);
/* Only need vector extracting when there are more
than one stores. */
if (nstores > 1)
@@ -8321,7 +8344,7 @@ vectorizable_store (vec_info *vinfo,
realignment. vect_supportable_dr_alignment always returns either
dr_aligned or dr_unaligned_supported for masked operations. */
gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
- && !mask
+ && !mask_node
&& !loop_masks)
|| alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
@@ -8337,12 +8360,13 @@ vectorizable_store (vec_info *vinfo,
aggr_type = NULL_TREE;
bump = NULL_TREE;
}
- else if (memory_access_type == VMAT_GATHER_SCATTER)
+ else if (mat_gather_scatter_p (memory_access_type))
{
aggr_type = elem_type;
if (!costing_p)
- vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
- gsi, &gs_info,
+ vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
+ ls.strided_offset_vectype,
+ loop_vinfo, gsi,
&bump, &vec_offset, loop_lens);
}
else
@@ -8356,7 +8380,7 @@ vectorizable_store (vec_info *vinfo,
memory_access_type, loop_lens);
}
- if (mask && !costing_p)
+ if (mask_node && !costing_p)
LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
/* In case the vectorization factor (VF) is bigger than the number
@@ -8373,6 +8397,8 @@ vectorizable_store (vec_info *vinfo,
if (memory_access_type == VMAT_LOAD_STORE_LANES)
{
+ const internal_fn lanes_ifn = ls.lanes_ifn;
+
if (costing_p)
/* Update all incoming store operand nodes, the general handling
above only handles the mask and the first store operand node. */
@@ -8396,7 +8422,7 @@ vectorizable_store (vec_info *vinfo,
{
if (!costing_p)
{
- if (mask)
+ if (mask_node)
{
vect_get_slp_defs (mask_node, &vec_masks);
vec_mask = vec_masks[0];
@@ -8410,7 +8436,7 @@ vectorizable_store (vec_info *vinfo,
else if (!costing_p)
{
gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
- if (mask)
+ if (mask_node)
vec_mask = vec_masks[j];
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
@@ -8525,7 +8551,7 @@ vectorizable_store (vec_info *vinfo,
return true;
}
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (mat_gather_scatter_p (memory_access_type))
{
gcc_assert (!grouped_store);
auto_vec<tree> vec_offsets;
@@ -8545,11 +8571,11 @@ vectorizable_store (vec_info *vinfo,
DR_CHAIN is of size 1. */
gcc_assert (group_size == 1);
vect_get_slp_defs (op_node, gvec_oprnds[0]);
- if (mask)
+ if (mask_node)
vect_get_slp_defs (mask_node, &vec_masks);
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vect_get_gather_scatter_ops (loop, slp_node, &gs_info,
+ vect_get_gather_scatter_ops (loop, slp_node,
&dataref_ptr, &vec_offsets);
else
dataref_ptr
@@ -8571,13 +8597,12 @@ vectorizable_store (vec_info *vinfo,
if (!costing_p)
{
vec_oprnd = (*gvec_oprnds[0])[j];
- if (mask)
+ if (mask_node)
vec_mask = vec_masks[j];
/* We should have catched mismatched types earlier. */
gcc_assert (useless_type_conversion_p (vectype,
TREE_TYPE (vec_oprnd)));
}
- unsigned HOST_WIDE_INT align;
tree final_mask = NULL_TREE;
tree final_len = NULL_TREE;
tree bias = NULL_TREE;
@@ -8592,7 +8617,9 @@ vectorizable_store (vec_info *vinfo,
final_mask, vec_mask, gsi);
}
- if (GATHER_SCATTER_IFN_P (gs_info))
+ unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree alias_align_ptr = build_int_cst (ref_type, align);
+ if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
{
if (costing_p)
{
@@ -8606,9 +8633,9 @@ vectorizable_store (vec_info *vinfo,
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
vec_offset = vec_offsets[j];
- tree scale = size_int (gs_info.scale);
+ tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
- if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
+ if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi,
@@ -8633,7 +8660,7 @@ vectorizable_store (vec_info *vinfo,
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (
IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset, scale, vec_oprnd, final_mask, final_len,
bias);
else
@@ -8650,19 +8677,19 @@ vectorizable_store (vec_info *vinfo,
else if (final_mask)
call = gimple_build_call_internal
(IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset, scale, vec_oprnd, final_mask);
else
call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset,
scale, vec_oprnd);
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
new_stmt = call;
}
- else if (GATHER_SCATTER_LEGACY_P (gs_info))
+ else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
{
/* The builtin decls path for scatter is legacy, x86 only. */
gcc_assert (nunits.is_constant ()
@@ -8678,13 +8705,14 @@ vectorizable_store (vec_info *vinfo,
continue;
}
+ tree offset_vectype = TREE_TYPE (vec_offsets[0]);
poly_uint64 offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ = TYPE_VECTOR_SUBPARTS (offset_vectype);
if (known_eq (nunits, offset_nunits))
{
new_stmt = vect_build_one_scatter_store_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr, vec_offsets[j],
+ (vinfo, stmt_info, slp_node, gsi,
+ ls.gs.decl, dataref_ptr, vec_offsets[j],
vec_oprnd, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
new_stmt, gsi);
@@ -8695,7 +8723,7 @@ vectorizable_store (vec_info *vinfo,
lanes but the builtins will store full vectype
data from the lower lanes. */
new_stmt = vect_build_one_scatter_store_call
- (vinfo, stmt_info, gsi, &gs_info,
+ (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
dataref_ptr, vec_offsets[2 * j],
vec_oprnd, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
@@ -8721,14 +8749,14 @@ vectorizable_store (vec_info *vinfo,
VEC_UNPACK_HI_EXPR,
final_mask);
final_mask = make_ssa_name
- (truth_type_for (gs_info.offset_vectype));
+ (truth_type_for (offset_vectype));
gimple_set_lhs (new_stmt, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
new_stmt, gsi);
}
new_stmt = vect_build_one_scatter_store_call
- (vinfo, stmt_info, gsi, &gs_info,
+ (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
dataref_ptr, vec_offsets[2 * j + 1],
vec_oprnd, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
@@ -8761,8 +8789,8 @@ vectorizable_store (vec_info *vinfo,
}
new_stmt = vect_build_one_scatter_store_call
- (vinfo, stmt_info, gsi, &gs_info,
- dataref_ptr, vec_offset,
+ (vinfo, stmt_info, slp_node, gsi,
+ ls.gs.decl, dataref_ptr, vec_offset,
vec_oprnd, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
new_stmt, gsi);
@@ -8793,9 +8821,10 @@ vectorizable_store (vec_info *vinfo,
continue;
}
+ tree offset_vectype = TREE_TYPE (vec_offsets[0]);
unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
unsigned HOST_WIDE_INT const_offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
+ = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
vec<constructor_elt, va_gc> *ctor_elts;
vec_alloc (ctor_elts, const_nunits);
gimple_seq stmts = NULL;
@@ -8810,8 +8839,7 @@ vectorizable_store (vec_info *vinfo,
unsigned elt_offset
= (j % factor) * const_nunits;
tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
- tree scale = size_int (gs_info.scale);
- align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
for (unsigned k = 0; k < const_nunits; ++k)
{
@@ -8871,10 +8899,13 @@ vectorizable_store (vec_info *vinfo,
if (!costing_p)
{
/* Get vectorized arguments for SLP_NODE. */
- vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds, mask, &vec_masks);
+ vect_get_slp_defs (op_node, &vec_oprnds);
vec_oprnd = vec_oprnds[0];
- if (mask)
- vec_mask = vec_masks[0];
+ if (mask_node)
+ {
+ vect_get_slp_defs (mask_node, &vec_masks);
+ vec_mask = vec_masks[0];
+ }
}
/* We should have catched mismatched types earlier. */
@@ -8916,10 +8947,7 @@ vectorizable_store (vec_info *vinfo,
else
{
tree perm_mask = perm_mask_for_reverse (vectype);
- tree perm_dest
- = vect_create_destination_var (vect_get_store_rhs (stmt_info),
- vectype);
- tree new_temp = make_ssa_name (perm_dest);
+ tree new_temp = make_ssa_name (vectype);
/* Generate the permute statement. */
gimple *perm_stmt
@@ -9290,7 +9318,6 @@ vectorizable_load (vec_info *vinfo,
bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
poly_uint64 vf;
tree aggr_type;
- gather_scatter_info gs_info;
tree ref_type;
enum vect_def_type mask_dt = vect_unknown_def_type;
enum vect_def_type els_dt = vect_unknown_def_type;
@@ -9305,12 +9332,12 @@ vectorizable_load (vec_info *vinfo,
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- tree mask = NULL_TREE, mask_vectype = NULL_TREE;
+ tree mask_vectype = NULL_TREE;
tree els = NULL_TREE; tree els_vectype = NULL_TREE;
int mask_index = -1;
int els_index = -1;
- slp_tree slp_op = NULL;
+ slp_tree mask_node = NULL;
slp_tree els_op = NULL;
if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
{
@@ -9349,7 +9376,7 @@ vectorizable_load (vec_info *vinfo,
(call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (mask_index >= 0
&& !vect_check_scalar_mask (vinfo, slp_node, mask_index,
- &mask, &slp_op, &mask_dt, &mask_vectype))
+ &mask_node, &mask_dt, &mask_vectype))
return false;
els_index = internal_fn_else_index (ifn);
@@ -9424,20 +9451,23 @@ vectorizable_load (vec_info *vinfo,
else
group_size = 1;
- vect_memory_access_type memory_access_type;
- enum dr_alignment_support alignment_support_scheme;
- int misalignment;
- poly_int64 poffset;
- internal_fn lanes_ifn;
- auto_vec<int> elsvals;
- int maskload_elsval = 0;
- bool need_zeroing = false;
- if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
- 1, &memory_access_type, &poffset,
- &alignment_support_scheme, &misalignment, &gs_info,
- &lanes_ifn, &elsvals))
+ vect_load_store_data _ls_data;
+ vect_load_store_data &ls = slp_node->get_data (_ls_data);
+ if (cost_vec
+ && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
+ VLS_LOAD, &ls))
return false;
+ /* Temporary aliases to analysis data, should not be modified through
+ these. */
+ const vect_memory_access_type memory_access_type = ls.memory_access_type;
+ const dr_alignment_support alignment_support_scheme
+ = ls.alignment_support_scheme;
+ const int misalignment = ls.misalignment;
+ const poly_int64 poffset = ls.poffset;
+ const vec<int> &elsvals = ls.elsvals;
+ int maskload_elsval = 0;
+ bool need_zeroing = false;
/* We might need to explicitly zero inactive elements if there are
padding bits in the type that might leak otherwise.
@@ -9447,10 +9477,10 @@ vectorizable_load (vec_info *vinfo,
= TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
/* ??? The following checks should really be part of
- get_group_load_store_type. */
+ get_load_store_type. */
if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
&& !((memory_access_type == VMAT_ELEMENTWISE
- || memory_access_type == VMAT_GATHER_SCATTER)
+ || mat_gather_scatter_p (memory_access_type))
&& SLP_TREE_LANES (slp_node) == 1))
{
slp_perm = true;
@@ -9500,7 +9530,7 @@ vectorizable_load (vec_info *vinfo,
return false;
}
- if (mask)
+ if (mask_node)
{
if (memory_access_type == VMAT_CONTIGUOUS)
{
@@ -9508,19 +9538,18 @@ vectorizable_load (vec_info *vinfo,
if (!VECTOR_MODE_P (vec_mode)
|| !can_vec_mask_load_store_p (vec_mode,
TYPE_MODE (mask_vectype),
- true, NULL, &elsvals))
+ true, NULL, &ls.elsvals))
return false;
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES
- && memory_access_type != VMAT_GATHER_SCATTER)
+ && !mat_gather_scatter_p (memory_access_type))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked load.\n");
return false;
}
- else if (memory_access_type == VMAT_GATHER_SCATTER
- && GATHER_SCATTER_EMULATED_P (gs_info))
+ else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9541,8 +9570,8 @@ vectorizable_load (vec_info *vinfo,
if (costing_p) /* transformation not required. */
{
- if (mask
- && !vect_maybe_update_slp_op_vectype (slp_op,
+ if (mask_node
+ && !vect_maybe_update_slp_op_vectype (mask_node,
mask_vectype))
{
if (dump_enabled_p ())
@@ -9551,18 +9580,15 @@ vectorizable_load (vec_info *vinfo,
return false;
}
- SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
-
if (loop_vinfo
&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
- VLS_LOAD, group_size,
- memory_access_type, &gs_info,
- mask, &elsvals);
+ VLS_LOAD, group_size, &ls,
+ mask_node, &ls.elsvals);
if (dump_enabled_p ()
&& memory_access_type != VMAT_ELEMENTWISE
- && memory_access_type != VMAT_GATHER_SCATTER
+ && !mat_gather_scatter_p (memory_access_type)
&& memory_access_type != VMAT_STRIDED_SLP
&& memory_access_type != VMAT_INVARIANT
&& alignment_support_scheme != dr_aligned)
@@ -9572,17 +9598,8 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_LOAD_STORE_LANES)
vinfo->any_known_not_updated_vssa = true;
- STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
- }
- else
- {
- /* Here just get the else values. */
- if (loop_vinfo
- && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
- check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
- VLS_LOAD, group_size,
- memory_access_type, &gs_info,
- mask, &elsvals);
+ SLP_TREE_TYPE (slp_node) = load_vec_info_type;
+ slp_node->data = new vect_load_store_data (std::move (ls));
}
/* If the type needs padding we must zero inactive elements.
@@ -9605,8 +9622,6 @@ vectorizable_load (vec_info *vinfo,
if (elsvals.length ())
maskload_elsval = *elsvals.begin ();
- gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
-
if (dump_enabled_p () && !costing_p)
dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
@@ -9617,7 +9632,7 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_INVARIANT)
{
- gcc_assert (!grouped_load && !mask && !bb_vinfo);
+ gcc_assert (!grouped_load && !mask_node && !bb_vinfo);
/* If we have versioned for aliasing or the loop doesn't
have any data dependencies that would preclude this,
then we are sure this is a loop invariant load and
@@ -9775,6 +9790,13 @@ vectorizable_load (vec_info *vinfo,
tree ltype = TREE_TYPE (vectype);
tree lvectype = vectype;
auto_vec<tree> dr_chain;
+ /* ??? Modify local copies of alignment_support_scheme and
+ misalignment, but this part of analysis should be done
+ earlier and remembered, likewise the chosen load mode. */
+ const dr_alignment_support tem = alignment_support_scheme;
+ dr_alignment_support alignment_support_scheme = tem;
+ const int tem2 = misalignment;
+ int misalignment = tem2;
if (memory_access_type == VMAT_STRIDED_SLP)
{
HOST_WIDE_INT n = gcd (group_size, const_nunits);
@@ -9998,7 +10020,7 @@ vectorizable_load (vec_info *vinfo,
return true;
}
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (mat_gather_scatter_p (memory_access_type))
grouped_load = false;
if (grouped_load
@@ -10092,9 +10114,9 @@ vectorizable_load (vec_info *vinfo,
dr_aligned or dr_unaligned_supported for (non-length) masked
operations. */
gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
- && !mask
+ && !mask_node
&& !loop_masks)
- || memory_access_type == VMAT_GATHER_SCATTER
+ || mat_gather_scatter_p (memory_access_type)
|| alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
@@ -10211,33 +10233,10 @@ vectorizable_load (vec_info *vinfo,
tree bump;
tree vec_offset = NULL_TREE;
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- {
- aggr_type = NULL_TREE;
- bump = NULL_TREE;
- }
- else if (memory_access_type == VMAT_GATHER_SCATTER)
- {
- aggr_type = elem_type;
- if (!costing_p)
- vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo,
- gsi, &gs_info,
- &bump, &vec_offset, loop_lens);
- }
- else
- {
- if (memory_access_type == VMAT_LOAD_STORE_LANES)
- aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
- else
- aggr_type = vectype;
- if (!costing_p)
- bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
- memory_access_type, loop_lens);
- }
auto_vec<tree> vec_offsets;
auto_vec<tree> vec_masks;
- if (mask && !costing_p)
+ if (mask_node && !costing_p)
vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
&vec_masks);
@@ -10245,9 +10244,16 @@ vectorizable_load (vec_info *vinfo,
tree vec_els = NULL_TREE;
if (memory_access_type == VMAT_LOAD_STORE_LANES)
{
+ const internal_fn lanes_ifn = ls.lanes_ifn;
+
gcc_assert (alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
+ aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
+
unsigned int inside_cost = 0, prologue_cost = 0;
/* For costing some adjacent vector loads, we'd like to cost with
the total number of them once instead of cost each one by one. */
@@ -10300,7 +10306,7 @@ vectorizable_load (vec_info *vinfo,
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
}
- if (mask)
+ if (mask_node)
vec_mask = vec_masks[j];
tree vec_array = create_vector_array (vectype, group_size);
@@ -10405,25 +10411,37 @@ vectorizable_load (vec_info *vinfo,
return true;
}
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (mat_gather_scatter_p (memory_access_type))
{
gcc_assert (!grouped_load && !slp_perm);
- unsigned int inside_cost = 0, prologue_cost = 0;
-
/* 1. Create the vector or array pointer update chain. */
- if (!costing_p)
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr,
+ aggr_type = NULL_TREE;
+ bump = NULL_TREE;
+ if (!costing_p)
+ vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
&vec_offsets);
- else
- dataref_ptr
- = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
- at_loop, offset, &dummy, gsi,
- &ptr_incr, false, bump);
+ }
+ else
+ {
+ aggr_type = elem_type;
+ if (!costing_p)
+ {
+ vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
+ ls.strided_offset_vectype,
+ loop_vinfo, gsi,
+ &bump, &vec_offset, loop_lens);
+ dataref_ptr
+ = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+ at_loop, offset, &dummy, gsi,
+ &ptr_incr, false, bump);
+ }
}
+ unsigned int inside_cost = 0, prologue_cost = 0;
+
gimple *new_stmt = NULL;
for (i = 0; i < vec_num; i++)
{
@@ -10432,7 +10450,7 @@ vectorizable_load (vec_info *vinfo,
tree bias = NULL_TREE;
if (!costing_p)
{
- if (mask)
+ if (mask_node)
vec_mask = vec_masks[i];
if (loop_masks)
final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
@@ -10447,8 +10465,9 @@ vectorizable_load (vec_info *vinfo,
}
/* 2. Create the vector-load in the loop. */
- unsigned HOST_WIDE_INT align;
- if (GATHER_SCATTER_IFN_P (gs_info))
+ unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree alias_align_ptr = build_int_cst (ref_type, align);
+ if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
{
if (costing_p)
{
@@ -10461,9 +10480,9 @@ vectorizable_load (vec_info *vinfo,
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
vec_offset = vec_offsets[i];
tree zero = build_zero_cst (vectype);
- tree scale = size_int (gs_info.scale);
+ tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
- if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+ if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
@@ -10495,7 +10514,7 @@ vectorizable_load (vec_info *vinfo,
if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
9, dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset, scale, zero,
final_mask, vec_els,
final_len, bias);
@@ -10511,19 +10530,19 @@ vectorizable_load (vec_info *vinfo,
else if (final_mask)
call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
7, dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset, scale,
zero, final_mask, vec_els);
else
call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
dataref_ptr,
- gs_info.alias_ptr,
+ alias_align_ptr,
vec_offset, scale, zero);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
}
- else if (GATHER_SCATTER_LEGACY_P (gs_info))
+ else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
{
/* The builtin decls path for gather is legacy, x86 only. */
gcc_assert (!final_len && nunits.is_constant ());
@@ -10535,13 +10554,14 @@ vectorizable_load (vec_info *vinfo,
slp_node, 0, vect_body);
continue;
}
- poly_uint64 offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+ tree offset_vectype = TREE_TYPE (vec_offsets[0]);
+ poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
if (known_eq (nunits, offset_nunits))
{
new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, vectype, gsi, &gs_info,
- dataref_ptr, vec_offsets[i], final_mask);
+ (vinfo, stmt_info, slp_node, vectype, gsi,
+ ls.gs.decl, dataref_ptr, vec_offsets[i],
+ final_mask);
data_ref = NULL_TREE;
}
else if (known_eq (nunits, offset_nunits * 2))
@@ -10550,8 +10570,9 @@ vectorizable_load (vec_info *vinfo,
lanes but the builtins will produce full vectype
data with just the lower lanes filled. */
new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, vectype, gsi, &gs_info,
- dataref_ptr, vec_offsets[2 * i], final_mask);
+ (vinfo, stmt_info, slp_node, vectype, gsi,
+ ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
+ final_mask);
tree low = make_ssa_name (vectype);
gimple_set_lhs (new_stmt, low);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
@@ -10582,15 +10603,15 @@ vectorizable_load (vec_info *vinfo,
VEC_UNPACK_HI_EXPR,
final_mask);
final_mask = make_ssa_name
- (truth_type_for (gs_info.offset_vectype));
+ (truth_type_for (offset_vectype));
gimple_set_lhs (new_stmt, final_mask);
vect_finish_stmt_generation (vinfo, stmt_info,
new_stmt, gsi);
}
new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, vectype, gsi, &gs_info,
- dataref_ptr,
+ (vinfo, stmt_info, slp_node, vectype, gsi,
+ ls.gs.decl, dataref_ptr,
vec_offsets[2 * i + 1], final_mask);
tree high = make_ssa_name (vectype);
gimple_set_lhs (new_stmt, high);
@@ -10633,7 +10654,8 @@ vectorizable_load (vec_info *vinfo,
new_stmt, gsi);
}
new_stmt = vect_build_one_gather_load_call
- (vinfo, stmt_info, vectype, gsi, &gs_info,
+ (vinfo, stmt_info, slp_node, vectype, gsi,
+ ls.gs.decl,
dataref_ptr, vec_offset, final_mask);
data_ref = NULL_TREE;
}
@@ -10662,8 +10684,9 @@ vectorizable_load (vec_info *vinfo,
slp_node, 0, vect_body);
continue;
}
+ tree offset_vectype = TREE_TYPE (vec_offsets[0]);
unsigned HOST_WIDE_INT const_offset_nunits
- = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant ();
+ = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
vec<constructor_elt, va_gc> *ctor_elts;
vec_alloc (ctor_elts, const_nunits);
gimple_seq stmts = NULL;
@@ -10674,8 +10697,7 @@ vectorizable_load (vec_info *vinfo,
vec_offset = vec_offsets[i / factor];
unsigned elt_offset = (i % factor) * const_nunits;
tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
- tree scale = size_int (gs_info.scale);
- align = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
for (unsigned k = 0; k < const_nunits; ++k)
{
@@ -10744,6 +10766,11 @@ vectorizable_load (vec_info *vinfo,
return true;
}
+ aggr_type = vectype;
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
+
poly_uint64 group_elt = 0;
unsigned int inside_cost = 0, prologue_cost = 0;
/* For costing some adjacent vector loads, we'd like to cost with
@@ -10823,7 +10850,7 @@ vectorizable_load (vec_info *vinfo,
if (!costing_p)
{
- if (mask)
+ if (mask_node)
vec_mask = vec_masks[i];
if (loop_masks)
final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
@@ -11536,20 +11563,24 @@ vectorizable_condition (vec_info *vinfo,
if (code != COND_EXPR)
return false;
- stmt_vec_info reduc_info = NULL;
- int reduc_index = -1;
+ int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
- bool for_reduction
- = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
+ bool nested_cycle_p = false;
+ bool for_reduction = vect_is_reduction (stmt_info);
if (for_reduction)
{
if (SLP_TREE_LANES (slp_node) > 1)
return false;
- reduc_info = info_for_reduction (vinfo, stmt_info);
- reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
- reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
- gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
- || reduc_index != -1);
+ /* ??? With a reduction path we do not get at the reduction info from
+ every stmt, use the conservative default setting then. */
+ if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
+ {
+ vect_reduc_info reduc_info
+ = info_for_reduction (loop_vinfo, slp_node);
+ reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
+ nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ stmt_info);
+ }
}
else
{
@@ -11739,7 +11770,7 @@ vectorizable_condition (vec_info *vinfo,
vec_num, vectype, NULL);
}
/* Extra inactive lanes should be safe for vect_nested_cycle. */
- else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
+ else if (!nested_cycle_p)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11749,7 +11780,7 @@ vectorizable_condition (vec_info *vinfo,
}
}
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
return true;
}
@@ -12256,7 +12287,7 @@ vectorizable_comparison (vec_info *vinfo,
return false;
if (cost_vec)
- STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
return true;
}
@@ -12265,13 +12296,11 @@ vectorizable_comparison (vec_info *vinfo,
vectorization. */
bool
-vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
+vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
- loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
- if (!loop_vinfo
- || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
+ if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
@@ -12336,7 +12365,7 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
}
- if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
+ if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi,
slp_node, cost_vec))
return false;
@@ -12573,19 +12602,19 @@ vect_analyze_stmt (vec_info *vinfo,
}
tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
- STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
+ STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
- gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
+ gcc_assert (SLP_TREE_VECTYPE (node)
|| gimple_code (stmt_info->stmt) == GIMPLE_COND
|| (call && gimple_call_lhs (call) == NULL_TREE));
}
ok = true;
- if (!bb_vinfo
- && (STMT_VINFO_RELEVANT_P (stmt_info)
+ if (bb_vinfo
+ || (STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
/* Prefer vectorizable_call over vectorizable_simd_clone_call so
-mveclibabi= takes preference over library functions with
@@ -12593,58 +12622,31 @@ vect_analyze_stmt (vec_info *vinfo,
ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
|| vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node,
cost_vec)
- || vectorizable_conversion (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_operation (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_assignment (vinfo, stmt_info,
- NULL, node, cost_vec)
+ || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec)
+ || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec)
+ || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec)
|| vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec)
|| vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec)
- || vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
- stmt_info, node, cost_vec)
- || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
- node, node_instance, cost_vec)
- || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
- node, cost_vec)
|| vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec)
- || vectorizable_condition (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_comparison (vinfo, stmt_info, NULL, node,
- cost_vec)
- || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
- stmt_info, node)
- || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
- stmt_info, node, cost_vec)
- || vectorizable_early_exit (vinfo, stmt_info, NULL, node,
- cost_vec));
- else
- {
- if (bb_vinfo)
- ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
- || vectorizable_simd_clone_call (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_conversion (vinfo, stmt_info, NULL, node,
- cost_vec)
- || vectorizable_shift (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_operation (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_assignment (vinfo, stmt_info, NULL, node,
- cost_vec)
- || vectorizable_load (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_store (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_condition (vinfo, stmt_info,
- NULL, node, cost_vec)
- || vectorizable_comparison (vinfo, stmt_info, NULL, node,
- cost_vec)
- || vectorizable_phi (vinfo, stmt_info, node, cost_vec)
- || vectorizable_early_exit (vinfo, stmt_info, NULL, node,
- cost_vec));
-
- }
+ || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec)
+ || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec)
+ || (bb_vinfo
+ && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec))
+ || (is_a <loop_vec_info> (vinfo)
+ && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
+ stmt_info, node, cost_vec)
+ || vectorizable_reduction (as_a <loop_vec_info> (vinfo),
+ stmt_info,
+ node, node_instance, cost_vec)
+ || vectorizable_induction (as_a <loop_vec_info> (vinfo),
+ stmt_info, node, cost_vec)
+ || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
+ stmt_info, node)
+ || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
+ stmt_info, node, cost_vec)
+ || vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
+ stmt_info, NULL, node,
+ cost_vec))));
STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
@@ -12657,10 +12659,10 @@ vect_analyze_stmt (vec_info *vinfo,
/* Stmts that are (also) "live" (i.e. - that are used out of the loop)
need extra handling, except for vectorizable reductions. */
if (!bb_vinfo
- && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
- && (STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
- || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
- && (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ && SLP_TREE_TYPE (node) != reduc_vec_info_type
+ && (SLP_TREE_TYPE (node) != lc_phi_info_type
+ || SLP_TREE_DEF_TYPE (node) == vect_internal_def)
+ && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node))
&& !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
node, node_instance,
false, cost_vec))
@@ -12691,10 +12693,9 @@ vect_transform_stmt (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"------>vectorizing statement: %G", stmt_info->stmt);
- tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
- STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
+ STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
- switch (STMT_VINFO_TYPE (stmt_info))
+ switch (SLP_TREE_TYPE (slp_node))
{
case type_demotion_vec_info_type:
case type_promotion_vec_info_type:
@@ -12791,12 +12792,14 @@ vect_transform_stmt (vec_info *vinfo,
break;
case phi_info_type:
- done = vectorizable_phi (vinfo, stmt_info, slp_node, NULL);
+ done = vectorizable_phi (as_a <bb_vec_info> (vinfo),
+ stmt_info, slp_node, NULL);
gcc_assert (done);
break;
case loop_exit_ctrl_vec_info_type:
- done = vectorizable_early_exit (vinfo, stmt_info, gsi, slp_node, NULL);
+ done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
+ stmt_info, gsi, slp_node, NULL);
gcc_assert (done);
break;
@@ -12811,9 +12814,8 @@ vect_transform_stmt (vec_info *vinfo,
done = true;
}
- if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type
- && (!slp_node->ldst_lanes
- || SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR))
+ if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
+ && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node)))
{
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
@@ -12822,8 +12824,6 @@ vect_transform_stmt (vec_info *vinfo,
gcc_assert (done);
}
- STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
-
return is_store;
}
@@ -13285,7 +13285,7 @@ vect_is_simple_use (vec_info *vinfo, slp_tree slp_node,
}
else
{
- gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
+ gcc_assert (SLP_TREE_PERMUTE_P (child));
*op = error_mark_node;
*dt = vect_internal_def;
if (def_stmt_info_out)