aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r--gcc/tree-vect-slp.cc1033
1 files changed, 575 insertions, 458 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c0636d8..6258a8e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -118,18 +118,22 @@ _slp_tree::_slp_tree ()
SLP_TREE_CHILDREN (this) = vNULL;
SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
SLP_TREE_LANE_PERMUTATION (this) = vNULL;
- SLP_TREE_SIMD_CLONE_INFO (this) = vNULL;
SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
SLP_TREE_CODE (this) = ERROR_MARK;
+ SLP_TREE_GS_SCALE (this) = 0;
+ SLP_TREE_GS_BASE (this) = NULL_TREE;
this->ldst_lanes = false;
this->avoid_stlf_fail = false;
SLP_TREE_VECTYPE (this) = NULL_TREE;
SLP_TREE_REPRESENTATIVE (this) = NULL;
- SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT;
+ this->cycle_info.id = -1;
+ this->cycle_info.reduc_idx = -1;
SLP_TREE_REF_COUNT (this) = 1;
this->failed = NULL;
this->max_nunits = 1;
this->lanes = 0;
+ SLP_TREE_TYPE (this) = undef_vec_info_type;
+ this->data = NULL;
}
/* Tear down a SLP node. */
@@ -148,9 +152,10 @@ _slp_tree::~_slp_tree ()
SLP_TREE_VEC_DEFS (this).release ();
SLP_TREE_LOAD_PERMUTATION (this).release ();
SLP_TREE_LANE_PERMUTATION (this).release ();
- SLP_TREE_SIMD_CLONE_INFO (this).release ();
if (this->failed)
free (failed);
+ if (this->data)
+ delete this->data;
}
/* Push the single SSA definition in DEF to the vector of vector defs. */
@@ -636,7 +641,7 @@ vect_slp_child_index_for_operand (const gimple *stmt, int op,
swapping operands of father node of this one, return 1; if everything is
ok return 0. */
static int
-vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
+vect_get_and_check_slp_defs (vec_info *vinfo, tree vectype, unsigned char swap,
bool *skip_args,
vec<stmt_vec_info> stmts, unsigned stmt_num,
vec<slp_oprnd_info> *oprnds_info)
@@ -678,6 +683,15 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
{
internal_fn ifn = gimple_call_internal_fn (stmt);
commutative_op = first_commutative_argument (ifn);
+ if (internal_gather_scatter_fn_p (ifn))
+ {
+ vect_describe_gather_scatter_call
+ (stmt_info,
+ first ? &(*oprnds_info)[0]->first_gs_info : &gs_info);
+ if (first)
+ (*oprnds_info)[0]->first_gs_p = true;
+ gs_op = 0;
+ }
}
}
else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
@@ -697,7 +711,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
{
gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
if (!is_a <loop_vec_info> (vinfo)
- || !vect_check_gather_scatter (stmt_info,
+ || !vect_check_gather_scatter (stmt_info, vectype,
as_a <loop_vec_info> (vinfo),
first ? &oprnd_info->first_gs_info
: &gs_info))
@@ -1138,7 +1152,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
soft_fail_nunits_vectype = nunits_vectype;
}
- gcc_assert (vectype);
+ gcc_assert (vectype || !gimple_get_lhs (first_stmt_info->stmt));
*node_vectype = vectype;
/* For every stmt in NODE find its def stmt/s. */
@@ -1185,10 +1199,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
gcall *call_stmt = dyn_cast <gcall *> (stmt);
tree lhs = gimple_get_lhs (stmt);
- if (lhs == NULL_TREE
- && (!call_stmt
- || !gimple_call_internal_p (stmt)
- || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
+ if (lhs == NULL_TREE && !call_stmt)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1380,7 +1391,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
&& (TREE_CODE_CLASS (tree_code (first_stmt_code))
== tcc_comparison)
&& (swap_tree_comparison (tree_code (first_stmt_code))
- == tree_code (rhs_code))))
+ == tree_code (rhs_code))
+ && (first_reduc_idx == -1
+ || REDUC_GROUP_FIRST_ELEMENT (stmt_info))))
|| (ldst_p
&& (STMT_VINFO_GROUPED_ACCESS (stmt_info)
!= STMT_VINFO_GROUPED_ACCESS (first_stmt_info)))
@@ -1596,8 +1609,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
}
}
- if (rhs_code.is_tree_code ()
- && TREE_CODE_CLASS ((tree_code)rhs_code) == tcc_comparison
+ if (i != 0
+ && first_stmt_code != rhs_code
+ && first_stmt_code.is_tree_code ()
+ && rhs_code.is_tree_code ()
+ && TREE_CODE_CLASS ((tree_code)first_stmt_code) == tcc_comparison
&& (swap_tree_comparison ((tree_code)first_stmt_code)
== (tree_code)rhs_code))
swap[i] = 1;
@@ -2602,7 +2618,8 @@ out:
slp_oprnd_info oprnd_info;
FOR_EACH_VEC_ELT (stmts, i, stmt_info)
{
- int res = vect_get_and_check_slp_defs (vinfo, swap[i], skip_args,
+ int res = vect_get_and_check_slp_defs (vinfo, vectype,
+ swap[i], skip_args,
stmts, i, &oprnds_info);
if (res != 0)
matches[(res == -1) ? 0 : i] = false;
@@ -2721,6 +2738,10 @@ out:
stmt_info = stmts[0];
+ int reduc_idx = -1;
+ int gs_scale = 0;
+ tree gs_base = NULL_TREE;
+
/* Create SLP_TREE nodes for the definition node/s. */
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
{
@@ -2743,6 +2764,12 @@ out:
continue;
}
+ if (oprnd_info->first_gs_p)
+ {
+ gs_scale = oprnd_info->first_gs_info.scale;
+ gs_base = oprnd_info->first_gs_info.base;
+ }
+
if (is_a <bb_vec_info> (vinfo)
&& oprnd_info->first_dt == vect_internal_def
&& !oprnd_info->any_pattern)
@@ -2803,6 +2830,33 @@ out:
continue;
}
+ /* See which SLP operand a reduction chain continues on. We want
+ to chain even PHIs but not backedges. */
+ if (VECTORIZABLE_CYCLE_DEF (oprnd_info->first_dt)
+ || STMT_VINFO_REDUC_IDX (oprnd_info->def_stmts[0]) != -1)
+ {
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+ {
+ if (oprnd_info->first_dt == vect_double_reduction_def)
+ reduc_idx = i;
+ }
+ else if (is_a <gphi *> (stmt_info->stmt)
+ && gimple_phi_num_args
+ (as_a <gphi *> (stmt_info->stmt)) != 1)
+ ;
+ else if (STMT_VINFO_REDUC_IDX (stmt_info) == -1
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
+ ;
+ else if (reduc_idx == -1)
+ reduc_idx = i;
+ else
+ /* For .COND_* reduction operations the else value can be the
+ same as one of the operation operands. The other def
+ stmts have been moved, so we can't check easily. Check
+ it's a call at least. */
+ gcc_assert (is_a <gcall *> (stmt_info->stmt));
+ }
+
/* When we have a masked load with uniform mask discover this
as a single-lane mask with a splat permute. This way we can
recognize this as a masked load-lane by stripping the splat. */
@@ -3132,6 +3186,43 @@ fail:
node = vect_create_new_slp_node (node, stmts, nops);
SLP_TREE_VECTYPE (node) = vectype;
SLP_TREE_CHILDREN (node).splice (children);
+ SLP_TREE_GS_SCALE (node) = gs_scale;
+ SLP_TREE_GS_BASE (node) = gs_base;
+ if (reduc_idx != -1)
+ {
+ gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) != -1
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def);
+ SLP_TREE_REDUC_IDX (node) = reduc_idx;
+ node->cycle_info.id = SLP_TREE_CHILDREN (node)[reduc_idx]->cycle_info.id;
+ }
+ /* When reaching the reduction PHI, create a vect_reduc_info. */
+ else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+ && is_a <gphi *> (STMT_VINFO_STMT (stmt_info)))
+ {
+ loop_vec_info loop_vinfo = as_a <loop_vec_info> (vinfo);
+ gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) == -1);
+ node->cycle_info.id = loop_vinfo->reduc_infos.length ();
+ vect_reduc_info reduc_info = new vect_reduc_info_s ();
+ loop_vinfo->reduc_infos.safe_push (reduc_info);
+ stmt_vec_info reduc_phi = stmt_info;
+ /* ??? For double reductions vect_is_simple_reduction stores the
+ reduction type and code on the inner loop header PHI. */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+ {
+ use_operand_p use_p;
+ gimple *use_stmt;
+ bool res = single_imm_use (gimple_phi_result (stmt_info->stmt),
+ &use_p, &use_stmt);
+ gcc_assert (res);
+ reduc_phi = loop_vinfo->lookup_stmt (use_stmt);
+ }
+ VECT_REDUC_INFO_DEF_TYPE (reduc_info) = STMT_VINFO_DEF_TYPE (stmt_info);
+ VECT_REDUC_INFO_TYPE (reduc_info) = STMT_VINFO_REDUC_TYPE (reduc_phi);
+ VECT_REDUC_INFO_CODE (reduc_info) = STMT_VINFO_REDUC_CODE (reduc_phi);
+ VECT_REDUC_INFO_FN (reduc_info) = IFN_LAST;
+ }
return node;
}
@@ -3160,11 +3251,15 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
SLP_TREE_REF_COUNT (node));
if (SLP_TREE_VECTYPE (node))
dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
- dump_printf (metadata, "%s\n",
+ dump_printf (metadata, "%s",
node->avoid_stlf_fail ? " (avoid-stlf-fail)" : "");
+ if (node->cycle_info.id != -1 || node->cycle_info.reduc_idx != -1)
+ dump_printf (metadata, " cycle %d, link %d", node->cycle_info.id,
+ node->cycle_info.reduc_idx);
+ dump_printf (metadata, "\n");
if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
{
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
dump_printf_loc (metadata, user_loc, "op: VEC_PERM_EXPR\n");
else
dump_printf_loc (metadata, user_loc, "op template: %G",
@@ -3416,7 +3511,7 @@ vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
- if (SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+ if (!SLP_TREE_PERMUTE_P (node))
{
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
if (STMT_VINFO_DATA_REF (stmt_info)
@@ -3530,7 +3625,7 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
static inline bool
vect_is_slp_load_node (slp_tree root)
{
- return (SLP_TREE_CODE (root) != VEC_PERM_EXPR
+ return (!SLP_TREE_PERMUTE_P (root)
&& SLP_TREE_DEF_TYPE (root) == vect_internal_def
&& STMT_VINFO_GROUPED_ACCESS (SLP_TREE_REPRESENTATIVE (root))
&& DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))));
@@ -3555,7 +3650,7 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
/* For now, we don't know anything about externals so do not do anything. */
if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def)
return NULL;
- else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
+ else if (SLP_TREE_PERMUTE_P (root))
{
/* First convert this node into a load node and add it to the leaves
list and flatten the permute from a lane to a load one. If it's
@@ -3960,8 +4055,6 @@ vect_build_slp_instance (vec_info *vinfo,
vec<tree> &remain,
unsigned max_tree_size, unsigned *limit,
scalar_stmts_to_slp_tree_map_t *bst_map,
- /* ??? We need stmt_info for group splitting. */
- stmt_vec_info stmt_info_,
bool force_single_lane)
{
/* If there's no budget left bail out early. */
@@ -3997,7 +4090,6 @@ vect_build_slp_instance (vec_info *vinfo,
bool *matches = XALLOCAVEC (bool, group_size);
poly_uint64 max_nunits = 1;
unsigned tree_size = 0;
- unsigned i;
slp_tree node = NULL;
if (group_size > 1 && force_single_lane)
@@ -4057,68 +4149,345 @@ vect_build_slp_instance (vec_info *vinfo,
"SLP size %u vs. limit %u.\n",
tree_size, max_tree_size);
- /* Fixup SLP reduction chains. */
- if (kind == slp_inst_kind_reduc_chain)
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
+ }
+
+ return true;
+ }
+ }
+ /* Failed to SLP. */
+
+ /* While we arrive here even with slp_inst_kind_store we should only
+ for group_size == 1. The code to split store groups is only in
+ vect_analyze_slp_instance now. */
+ gcc_assert (kind != slp_inst_kind_store || group_size == 1);
+
+ /* Free the allocated memory. */
+ scalar_stmts.release ();
+
+ /* Failed to SLP. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "SLP discovery failed\n");
+ return false;
+}
+
+/* Analyze an SLP instance starting from a the start of a reduction chain.
+ Call vect_build_slp_tree to build a tree of packed stmts if possible.
+ Return FALSE if SLP build fails. */
+
+static bool
+vect_analyze_slp_reduc_chain (vec_info *vinfo,
+ scalar_stmts_to_slp_tree_map_t *bst_map,
+ stmt_vec_info stmt_info,
+ unsigned max_tree_size, unsigned *limit)
+{
+ vec<stmt_vec_info> scalar_stmts;
+
+ /* Collect the reduction stmts and store them in scalar_stmts. */
+ scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
+ stmt_vec_info next_info = stmt_info;
+ while (next_info)
+ {
+ scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
+ next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
+ }
+ /* Mark the first element of the reduction chain as reduction to properly
+ transform the node. In the reduction analysis phase only the last
+ element of the chain is marked as reduction. */
+ STMT_VINFO_DEF_TYPE (stmt_info)
+ = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
+ STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
+ = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+
+ /* Build the tree for the SLP instance. */
+ vec<stmt_vec_info> root_stmt_infos = vNULL;
+ vec<tree> remain = vNULL;
+
+ /* If there's no budget left bail out early. */
+ if (*limit == 0)
+ return false;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Starting SLP discovery for\n");
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " %G", scalar_stmts[i]->stmt);
+ }
+
+ /* Build the tree for the SLP instance. */
+ unsigned int group_size = scalar_stmts.length ();
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0;
+
+ slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ &max_nunits, matches, limit,
+ &tree_size, bst_map);
+ if (node != NULL)
+ {
+ /* Calculate the unrolling factor based on the smallest type. */
+ poly_uint64 unrolling_factor
+ = calculate_unrolling_factor (max_nunits, group_size);
+
+ if (maybe_ne (unrolling_factor, 1U)
+ && is_a <bb_vec_info> (vinfo))
+ {
+ unsigned HOST_WIDE_INT const_max_nunits;
+ if (!max_nunits.is_constant (&const_max_nunits)
+ || const_max_nunits > group_size)
{
- /* If this is a reduction chain with a conversion in front
- amend the SLP tree with a node for that. */
- gimple *scalar_def
- = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
- if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: store group "
+ "size not a multiple of the vector size "
+ "in basic block SLP\n");
+ vect_free_slp_tree (node);
+ return false;
+ }
+ /* Fatal mismatch. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP discovery succeeded but node needs "
+ "splitting\n");
+ memset (matches, true, group_size);
+ matches[group_size / const_max_nunits * const_max_nunits] = false;
+ vect_free_slp_tree (node);
+ }
+ else
+ {
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
+ SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
+
+ /* Fixup SLP reduction chains. If this is a reduction chain with
+ a conversion in front amend the SLP tree with a node for that. */
+ gimple *scalar_def
+ = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
+ if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
+ {
+ /* Get at the conversion stmt - we know it's the single use
+ of the last stmt of the reduction chain. */
+ use_operand_p use_p;
+ bool r = single_imm_use (gimple_assign_lhs (scalar_def),
+ &use_p, &scalar_def);
+ gcc_assert (r);
+ stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
+ next_info = vect_stmt_to_vectorize (next_info);
+ scalar_stmts = vNULL;
+ scalar_stmts.create (group_size);
+ for (unsigned i = 0; i < group_size; ++i)
+ scalar_stmts.quick_push (next_info);
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
+ SLP_TREE_VECTYPE (conv)
+ = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE
+ (gimple_assign_lhs (scalar_def)),
+ group_size);
+ SLP_TREE_REDUC_IDX (conv) = 0;
+ conv->cycle_info.id = node->cycle_info.id;
+ SLP_TREE_CHILDREN (conv).quick_push (node);
+ SLP_INSTANCE_TREE (new_instance) = conv;
+ /* We also have to fake this conversion stmt as SLP reduction
+ group so we don't have to mess with too much code
+ elsewhere. */
+ REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
+ REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
+ }
+ /* Fill the backedge child of the PHI SLP node. The
+ general matching code cannot find it because the
+ scalar code does not reflect how we vectorize the
+ reduction. */
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+ class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
+ gimple_get_lhs (scalar_def))
+ /* There are exactly two non-debug uses, the reduction
+ PHI and the loop-closed PHI node. */
+ if (!is_gimple_debug (USE_STMT (use_p))
+ && gimple_bb (USE_STMT (use_p)) == loop->header)
{
- /* Get at the conversion stmt - we know it's the single use
- of the last stmt of the reduction chain. */
- use_operand_p use_p;
- bool r = single_imm_use (gimple_assign_lhs (scalar_def),
- &use_p, &scalar_def);
- gcc_assert (r);
- stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
- next_info = vect_stmt_to_vectorize (next_info);
- scalar_stmts = vNULL;
- scalar_stmts.create (group_size);
+ auto_vec<stmt_vec_info, 64> phis (group_size);
+ stmt_vec_info phi_info
+ = vinfo->lookup_stmt (USE_STMT (use_p));
for (unsigned i = 0; i < group_size; ++i)
- scalar_stmts.quick_push (next_info);
- slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
- SLP_TREE_VECTYPE (conv)
- = get_vectype_for_scalar_type (vinfo,
- TREE_TYPE
- (gimple_assign_lhs
- (scalar_def)),
- group_size);
- SLP_TREE_CHILDREN (conv).quick_push (node);
- SLP_INSTANCE_TREE (new_instance) = conv;
- /* We also have to fake this conversion stmt as SLP reduction
- group so we don't have to mess with too much code
- elsewhere. */
- REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
- REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
+ phis.quick_push (phi_info);
+ slp_tree *phi_node = bst_map->get (phis);
+ unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
+ SLP_TREE_CHILDREN (*phi_node)[dest_idx]
+ = SLP_INSTANCE_TREE (new_instance);
+ SLP_INSTANCE_TREE (new_instance)->refcnt++;
}
- /* Fill the backedge child of the PHI SLP node. The
- general matching code cannot find it because the
- scalar code does not reflect how we vectorize the
- reduction. */
- use_operand_p use_p;
- imm_use_iterator imm_iter;
- class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
- gimple_get_lhs (scalar_def))
- /* There are exactly two non-debug uses, the reduction
- PHI and the loop-closed PHI node. */
- if (!is_gimple_debug (USE_STMT (use_p))
- && gimple_bb (USE_STMT (use_p)) == loop->header)
- {
- auto_vec<stmt_vec_info, 64> phis (group_size);
- stmt_vec_info phi_info
- = vinfo->lookup_stmt (USE_STMT (use_p));
- for (unsigned i = 0; i < group_size; ++i)
- phis.quick_push (phi_info);
- slp_tree *phi_node = bst_map->get (phis);
- unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
- SLP_TREE_CHILDREN (*phi_node)[dest_idx]
- = SLP_INSTANCE_TREE (new_instance);
- SLP_INSTANCE_TREE (new_instance)->refcnt++;
- }
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
+ }
+
+ return true;
+ }
+ }
+ /* Failed to SLP. */
+
+ /* Free the allocated memory. */
+ scalar_stmts.release ();
+
+ /* Failed to SLP. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "SLP discovery failed\n");
+ return false;
+}
+
+/* Analyze an SLP instance starting from a group of grouped stores. Call
+ vect_build_slp_tree to build a tree of packed stmts if possible.
+ Return FALSE if it's impossible to SLP any stmt in the group. */
+
+static bool
+vect_analyze_slp_instance (vec_info *vinfo,
+ scalar_stmts_to_slp_tree_map_t *bst_map,
+ stmt_vec_info stmt_info,
+ slp_instance_kind kind,
+ unsigned max_tree_size, unsigned *limit,
+ bool force_single_lane)
+{
+ vec<stmt_vec_info> scalar_stmts;
+
+ if (is_a <bb_vec_info> (vinfo))
+ vect_location = stmt_info->stmt;
+
+ gcc_assert (kind == slp_inst_kind_store);
+
+ /* Collect the stores and store them in scalar_stmts. */
+ scalar_stmts.create (DR_GROUP_SIZE (stmt_info));
+ stmt_vec_info next_info = stmt_info;
+ while (next_info)
+ {
+ scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
+ next_info = DR_GROUP_NEXT_ELEMENT (next_info);
+ }
+
+ vec<stmt_vec_info> root_stmt_infos = vNULL;
+ vec<tree> remain = vNULL;
+
+ /* Build the tree for the SLP instance. */
+
+ /* If there's no budget left bail out early. */
+ if (*limit == 0)
+ return false;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Starting SLP discovery for\n");
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " %G", scalar_stmts[i]->stmt);
+ }
+
+ /* Build the tree for the SLP instance. */
+ unsigned int group_size = scalar_stmts.length ();
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0;
+ unsigned i;
+
+ slp_tree node = NULL;
+ if (group_size > 1 && force_single_lane)
+ {
+ matches[0] = true;
+ matches[1] = false;
+ }
+ else
+ node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ &max_nunits, matches, limit,
+ &tree_size, bst_map);
+ if (node != NULL)
+ {
+ /* Calculate the unrolling factor based on the smallest type. */
+ poly_uint64 unrolling_factor
+ = calculate_unrolling_factor (max_nunits, group_size);
+
+ if (maybe_ne (unrolling_factor, 1U)
+ && is_a <bb_vec_info> (vinfo))
+ {
+ unsigned HOST_WIDE_INT const_max_nunits;
+ if (!max_nunits.is_constant (&const_max_nunits)
+ || const_max_nunits > group_size)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: store group "
+ "size not a multiple of the vector size "
+ "in basic block SLP\n");
+ vect_free_slp_tree (node);
+ return false;
}
+ /* Fatal mismatch. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP discovery succeeded but node needs "
+ "splitting\n");
+ memset (matches, true, group_size);
+ matches[group_size / const_max_nunits * const_max_nunits] = false;
+ vect_free_slp_tree (node);
+ }
+ else
+ {
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
+ SLP_INSTANCE_KIND (new_instance) = kind;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
vinfo->slp_instances.safe_push (new_instance);
@@ -4142,7 +4511,6 @@ vect_build_slp_instance (vec_info *vinfo,
}
/* Failed to SLP. */
- stmt_vec_info stmt_info = stmt_info_;
/* Try to break the group up into pieces. */
if (*limit > 0 && kind == slp_inst_kind_store)
{
@@ -4403,70 +4771,6 @@ vect_build_slp_instance (vec_info *vinfo,
return false;
}
-
-/* Analyze an SLP instance starting from a group of grouped stores. Call
- vect_build_slp_tree to build a tree of packed stmts if possible.
- Return FALSE if it's impossible to SLP any stmt in the loop. */
-
-static bool
-vect_analyze_slp_instance (vec_info *vinfo,
- scalar_stmts_to_slp_tree_map_t *bst_map,
- stmt_vec_info stmt_info,
- slp_instance_kind kind,
- unsigned max_tree_size, unsigned *limit,
- bool force_single_lane)
-{
- vec<stmt_vec_info> scalar_stmts;
-
- if (is_a <bb_vec_info> (vinfo))
- vect_location = stmt_info->stmt;
-
- stmt_vec_info next_info = stmt_info;
- if (kind == slp_inst_kind_store)
- {
- /* Collect the stores and store them in scalar_stmts. */
- scalar_stmts.create (DR_GROUP_SIZE (stmt_info));
- while (next_info)
- {
- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
- next_info = DR_GROUP_NEXT_ELEMENT (next_info);
- }
- }
- else if (kind == slp_inst_kind_reduc_chain)
- {
- /* Collect the reduction stmts and store them in scalar_stmts. */
- scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
- while (next_info)
- {
- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
- next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
- }
- /* Mark the first element of the reduction chain as reduction to properly
- transform the node. In the reduction analysis phase only the last
- element of the chain is marked as reduction. */
- STMT_VINFO_DEF_TYPE (stmt_info)
- = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
- STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
- = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
- }
- else
- gcc_unreachable ();
-
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- /* Build the tree for the SLP instance. */
- bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
- roots, remain,
- max_tree_size, limit, bst_map,
- kind == slp_inst_kind_store
- ? stmt_info : NULL, force_single_lane);
-
- /* ??? If this is slp_inst_kind_store and the above succeeded here's
- where we should do store group splitting. */
-
- return res;
-}
-
/* qsort comparator ordering SLP load nodes. */
static int
@@ -4886,9 +5190,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
/* Find SLP sequences starting from groups of grouped stores. */
FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
- vect_analyze_slp_instance (vinfo, bst_map, first_element,
- slp_inst_kind_store, max_tree_size, &limit,
- force_single_lane);
+ if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ slp_inst_kind_store, max_tree_size, &limit,
+ force_single_lane)
+ && loop_vinfo)
+ return opt_result::failure_at (vect_location, "SLP build failed.\n");
/* For loops also start SLP discovery from non-grouped stores. */
if (loop_vinfo)
@@ -4906,10 +5212,27 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
vec<tree> remain = vNULL;
stmts.create (1);
stmts.quick_push (stmt_info);
- vect_build_slp_instance (vinfo, slp_inst_kind_store,
- stmts, roots, remain, max_tree_size,
- &limit, bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo, slp_inst_kind_store,
+ stmts, roots, remain, max_tree_size,
+ &limit, bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
+
+ stmt_vec_info stmt_info;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo), i, stmt_info)
+ {
+ vec<stmt_vec_info> stmts;
+ vec<stmt_vec_info> roots = vNULL;
+ vec<tree> remain = vNULL;
+ stmts.create (1);
+ stmts.quick_push (stmt_info);
+ if (! vect_build_slp_instance (vinfo, slp_inst_kind_store,
+ stmts, roots, remain, max_tree_size,
+ &limit, bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
+ }
}
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
@@ -4925,8 +5248,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
bb_vinfo->roots[i].stmts,
bb_vinfo->roots[i].roots,
bb_vinfo->roots[i].remain,
- max_tree_size, &limit, bst_map, NULL,
- false))
+ max_tree_size, &limit, bst_map, false))
{
bb_vinfo->roots[i].stmts = vNULL;
bb_vinfo->roots[i].roots = vNULL;
@@ -4943,11 +5265,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
&& ! STMT_VINFO_LIVE_P (first_element))
;
else if (force_single_lane
- || ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
- slp_inst_kind_reduc_chain,
- max_tree_size, &limit,
- force_single_lane))
+ || ! vect_analyze_slp_reduc_chain (vinfo, bst_map,
+ first_element,
+ max_tree_size, &limit))
{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "SLP discovery of reduction chain failed\n");
/* Dissolve reduction chain group. */
stmt_vec_info vinfo = first_element;
stmt_vec_info last = NULL;
@@ -4960,6 +5284,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
vinfo = next;
}
STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
+ /* ??? When there's a conversion around the reduction
+ chain 'last' isn't the entry of the reduction. */
+ if (STMT_VINFO_DEF_TYPE (last) != vect_reduction_def)
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
/* It can be still vectorized as part of an SLP reduction. */
loop_vinfo->reductions.safe_push (last);
}
@@ -4997,12 +5326,14 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
vec<tree> remain = vNULL;
stmts.create (1);
stmts.quick_push (next_info);
- vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL,
- force_single_lane);
+ if (! vect_build_slp_instance (vinfo,
+ slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map,
+ force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
}
@@ -5015,7 +5346,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
slp_inst_kind_reduc_group,
scalar_stmts, roots, remain,
max_tree_size, &limit, bst_map,
- NULL, force_single_lane))
+ force_single_lane))
{
if (scalar_stmts.length () <= 1)
scalar_stmts.release ();
@@ -5027,11 +5358,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
vec<tree> remain = vNULL;
stmts.create (1);
stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
- vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo,
+ slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
saved_stmts.release ();
@@ -5051,20 +5384,21 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
&& ((stmt_info = vect_stmt_to_vectorize (stmt_info)), true)
&& STMT_VINFO_RELEVANT (stmt_info) == vect_used_only_live
&& STMT_VINFO_LIVE_P (stmt_info)
- && (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
- || (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
- && STMT_VINFO_REDUC_IDX (stmt_info) == -1)))
+ && !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))
+ && STMT_VINFO_REDUC_IDX (stmt_info) == -1)
{
vec<stmt_vec_info> stmts;
vec<stmt_vec_info> roots = vNULL;
vec<tree> remain = vNULL;
stmts.create (1);
stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
- vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo,
+ slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
@@ -5087,7 +5421,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
|| !integer_zerop (args1))
{
roots.release ();
- continue;
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
/* An argument without a loop def will be codegened from vectorizing the
@@ -5104,8 +5439,12 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
if (! vect_build_slp_instance (vinfo, slp_inst_kind_gcond,
stmts, roots, remain,
max_tree_size, &limit,
- bst_map, NULL, force_single_lane))
- roots.release ();
+ bst_map, force_single_lane))
+ {
+ roots.release ();
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
+ }
}
/* Find and create slp instances for inductions that have been forced
@@ -5123,10 +5462,12 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
{
stmts.create (1);
stmts.quick_push (vect_stmt_to_vectorize (lc_info));
- vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
/* When the latch def is from a different cycle this can only
be a induction. Build a simple instance for this.
@@ -5139,10 +5480,12 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
{
stmts.create (1);
stmts.quick_push (stmt_info);
- vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL, force_single_lane);
+ if (! vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
}
@@ -5882,7 +6225,7 @@ vect_optimize_slp_pass::is_cfg_latch_edge (graph_edge *ud)
slp_tree use = m_vertices[ud->src].node;
slp_tree def = m_vertices[ud->dest].node;
if ((SLP_TREE_DEF_TYPE (use) != vect_internal_def
- || SLP_TREE_CODE (use) == VEC_PERM_EXPR)
+ || SLP_TREE_PERMUTE_P (use))
|| SLP_TREE_DEF_TYPE (def) != vect_internal_def)
return false;
@@ -6215,7 +6558,7 @@ vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
{
const int fallback_cost = 1;
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
auto_lane_permutation_t tmp_perm;
tmp_perm.safe_splice (SLP_TREE_LANE_PERMUTATION (node));
@@ -6350,7 +6693,7 @@ vect_optimize_slp_pass::start_choosing_layouts ()
imin = DR_GROUP_SIZE (dr_stmt) + 1;
tmp_perm.safe_splice (SLP_TREE_LOAD_PERMUTATION (node));
}
- else if (SLP_TREE_CODE (node) == VEC_PERM_EXPR
+ else if (SLP_TREE_PERMUTE_P (node)
&& SLP_TREE_CHILDREN (node).length () == 1
&& (child = SLP_TREE_CHILDREN (node)[0])
&& (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (child))
@@ -6448,10 +6791,12 @@ vect_optimize_slp_pass::start_choosing_layouts ()
{
stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (instance));
- stmt_vec_info reduc_info = info_for_reduction (m_vinfo, stmt_info);
+ vect_reduc_info reduc_info
+ = info_for_reduction (as_a <loop_vec_info> (m_vinfo),
+ SLP_INSTANCE_TREE (instance));
if (needs_fold_left_reduction_p (TREE_TYPE
(gimple_get_lhs (stmt_info->stmt)),
- STMT_VINFO_REDUC_CODE (reduc_info)))
+ VECT_REDUC_INFO_CODE (reduc_info)))
{
unsigned int node_i = SLP_INSTANCE_TREE (instance)->vertex;
m_partitions[m_vertices[node_i].partition].layout = 0;
@@ -6668,7 +7013,7 @@ vect_optimize_slp_pass::backward_cost (graph_edge *ud, unsigned int to_node_i,
auto &to_costs = partition_layout_costs (to_partition_i,
to_partition.layout);
if (ud->src == int (to_node_i)
- && SLP_TREE_CODE (to_vertex.node) == VEC_PERM_EXPR)
+ && SLP_TREE_PERMUTE_P (to_vertex.node))
{
auto &from_partition = m_partitions[m_vertices[ud->dest].partition];
auto old_layout = from_partition.layout;
@@ -6723,7 +7068,7 @@ vect_optimize_slp_pass::forward_pass ()
{
unsigned int node_i = m_partitioned_nodes[partition.node_begin];
single_node = m_vertices[node_i].node;
- if (SLP_TREE_CODE (single_node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (single_node))
in_cost = total_in_cost (node_i);
}
@@ -6820,8 +7165,7 @@ vect_optimize_slp_pass::forward_pass ()
if the VEC_PERM_EXPR can be changed to support output layout
LAYOUT_I while keeping all the provisional choices of input
layout. */
- if (single_node
- && SLP_TREE_CODE (single_node) == VEC_PERM_EXPR)
+ if (single_node && SLP_TREE_PERMUTE_P (single_node))
{
int factor = internal_node_cost (single_node, -1, layout_i);
if (factor >= 0)
@@ -6980,7 +7324,7 @@ vect_optimize_slp_pass::get_result_with_layout (slp_tree node,
in TMP_PERM on success. */
auto_lane_permutation_t tmp_perm;
unsigned int num_inputs = 1;
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
tmp_perm.safe_splice (SLP_TREE_LANE_PERMUTATION (node));
if (from_layout_i != 0)
@@ -7074,7 +7418,7 @@ vect_optimize_slp_pass::materialize ()
SLP_TREE_SCALAR_STMTS (node), true);
/* Update load and lane permutations. */
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
/* First try to absorb the input vector layouts. If that fails,
force the inputs to have layout LAYOUT_I too. We checked that
@@ -7278,7 +7622,7 @@ vect_optimize_slp_pass::dump ()
" out weight: %f (degree %d)\n",
vertex.out_weight.to_double (),
vertex.out_degree);
- if (SLP_TREE_CODE (vertex.node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (vertex.node))
dump_printf_loc (MSG_NOTE, vect_location,
" op: VEC_PERM_EXPR\n");
else if (auto rep = SLP_TREE_REPRESENTATIVE (vertex.node))
@@ -7357,7 +7701,7 @@ vect_optimize_slp_pass::decide_masked_load_lanes ()
{
slp_tree node = v.node;
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def
- || SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ || SLP_TREE_PERMUTE_P (node))
continue;
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
if (! STMT_VINFO_GROUPED_ACCESS (stmt_info)
@@ -7377,7 +7721,7 @@ vect_optimize_slp_pass::decide_masked_load_lanes ()
/* Uniform masks need to be suitably represented. */
slp_tree mask = SLP_TREE_CHILDREN (node)[0];
- if (SLP_TREE_CODE (mask) != VEC_PERM_EXPR
+ if (!SLP_TREE_PERMUTE_P (mask)
|| SLP_TREE_CHILDREN (mask).length () != 1)
continue;
bool match = true;
@@ -7396,7 +7740,7 @@ vect_optimize_slp_pass::decide_masked_load_lanes ()
{
slp_tree pred_node = m_vertices[pred->src].node;
/* All consumers should be a permute with a single outgoing lane. */
- if (SLP_TREE_CODE (pred_node) != VEC_PERM_EXPR
+ if (!SLP_TREE_PERMUTE_P (pred_node)
|| SLP_TREE_LANES (pred_node) != 1)
{
match = false;
@@ -7557,8 +7901,7 @@ vect_update_slp_vf_for_node (slp_tree node, poly_uint64 &vf,
/* For permute nodes that are fed from externs or constants we have to
consider their number of lanes as well. Likewise for store-lanes. */
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR
- || node->ldst_lanes)
+ if (SLP_TREE_PERMUTE_P (node) || node->ldst_lanes)
for (slp_tree child : SLP_TREE_CHILDREN (node))
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
{
@@ -7605,7 +7948,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
/* If all instances ended up with vector(1) T roots make sure to
not vectorize. RVV for example relies on loop vectorization
when some instances are essentially kept scalar. See PR121048. */
- if (known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
+ if (SLP_TREE_VECTYPE (root)
+ && known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
decided_to_slp++;
}
@@ -7623,232 +7967,6 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
return (decided_to_slp > 0);
}
-/* Private data for vect_detect_hybrid_slp. */
-struct vdhs_data
-{
- loop_vec_info loop_vinfo;
- vec<stmt_vec_info> *worklist;
-};
-
-/* Walker for walk_gimple_op. */
-
-static tree
-vect_detect_hybrid_slp (tree *tp, int *, void *data)
-{
- walk_stmt_info *wi = (walk_stmt_info *)data;
- vdhs_data *dat = (vdhs_data *)wi->info;
-
- if (wi->is_lhs)
- return NULL_TREE;
-
- stmt_vec_info def_stmt_info = dat->loop_vinfo->lookup_def (*tp);
- if (!def_stmt_info)
- return NULL_TREE;
- def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
- if (PURE_SLP_STMT (def_stmt_info))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
- def_stmt_info->stmt);
- STMT_SLP_TYPE (def_stmt_info) = hybrid;
- dat->worklist->safe_push (def_stmt_info);
- }
-
- return NULL_TREE;
-}
-
-/* Look if STMT_INFO is consumed by SLP indirectly and mark it pure_slp
- if so, otherwise pushing it to WORKLIST. */
-
-static void
-maybe_push_to_hybrid_worklist (vec_info *vinfo,
- vec<stmt_vec_info> &worklist,
- stmt_vec_info stmt_info)
-{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Processing hybrid candidate : %G", stmt_info->stmt);
- stmt_vec_info orig_info = vect_orig_stmt (stmt_info);
- imm_use_iterator iter2;
- ssa_op_iter iter1;
- use_operand_p use_p;
- def_operand_p def_p;
- bool any_def = false;
- FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_info->stmt, iter1, SSA_OP_DEF)
- {
- any_def = true;
- FOR_EACH_IMM_USE_FAST (use_p, iter2, DEF_FROM_PTR (def_p))
- {
- if (is_gimple_debug (USE_STMT (use_p)))
- continue;
- stmt_vec_info use_info = vinfo->lookup_stmt (USE_STMT (use_p));
- /* An out-of loop use means this is a loop_vect sink. */
- if (!use_info)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Found loop_vect sink: %G", stmt_info->stmt);
- worklist.safe_push (stmt_info);
- return;
- }
- else if (!STMT_SLP_TYPE (vect_stmt_to_vectorize (use_info)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Found loop_vect use: %G", use_info->stmt);
- worklist.safe_push (stmt_info);
- return;
- }
- }
- }
- /* No def means this is a loop_vect sink. Gimple conditionals also don't have a
- def but shouldn't be considered sinks. */
- if (!any_def && STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Found loop_vect sink: %G", stmt_info->stmt);
- worklist.safe_push (stmt_info);
- return;
- }
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Marked SLP consumed stmt pure: %G", stmt_info->stmt);
- STMT_SLP_TYPE (stmt_info) = pure_slp;
-}
-
-/* Find stmts that must be both vectorized and SLPed. */
-
-bool
-vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
-{
- DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
-
- /* All stmts participating in SLP are marked pure_slp, all other
- stmts are loop_vect.
- First collect all loop_vect stmts into a worklist.
- SLP patterns cause not all original scalar stmts to appear in
- SLP_TREE_SCALAR_STMTS and thus not all of them are marked pure_slp.
- Rectify this here and do a backward walk over the IL only considering
- stmts as loop_vect when they are used by a loop_vect stmt and otherwise
- mark them as pure_slp. */
- auto_vec<stmt_vec_info> worklist;
- for (int i = LOOP_VINFO_LOOP (loop_vinfo)->num_nodes - 1; i >= 0; --i)
- {
- basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
- for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
- gsi_next (&gsi))
- {
- gphi *phi = gsi.phi ();
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
- if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
- maybe_push_to_hybrid_worklist (loop_vinfo,
- worklist, stmt_info);
- }
- for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi);
- gsi_prev (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
- if (is_gimple_debug (stmt))
- continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
- if (STMT_VINFO_IN_PATTERN_P (stmt_info))
- {
- for (gimple_stmt_iterator gsi2
- = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
- !gsi_end_p (gsi2); gsi_next (&gsi2))
- {
- stmt_vec_info patt_info
- = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
- if (!STMT_SLP_TYPE (patt_info)
- && STMT_VINFO_RELEVANT (patt_info))
- maybe_push_to_hybrid_worklist (loop_vinfo,
- worklist, patt_info);
- }
- stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
- }
- if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
- maybe_push_to_hybrid_worklist (loop_vinfo,
- worklist, stmt_info);
- }
- }
-
- /* Now we have a worklist of non-SLP stmts, follow use->def chains and
- mark any SLP vectorized stmt as hybrid.
- ??? We're visiting def stmts N times (once for each non-SLP and
- once for each hybrid-SLP use). */
- walk_stmt_info wi;
- vdhs_data dat;
- dat.worklist = &worklist;
- dat.loop_vinfo = loop_vinfo;
- memset (&wi, 0, sizeof (wi));
- wi.info = (void *)&dat;
- while (!worklist.is_empty ())
- {
- stmt_vec_info stmt_info = worklist.pop ();
- /* Since SSA operands are not set up for pattern stmts we need
- to use walk_gimple_op. */
- wi.is_lhs = 0;
- walk_gimple_op (stmt_info->stmt, vect_detect_hybrid_slp, &wi);
- /* For gather/scatter make sure to walk the offset operand, that
- can be a scaling and conversion away. */
- gather_scatter_info gs_info;
- if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info))
- {
- int dummy;
- vect_detect_hybrid_slp (&gs_info.offset, &dummy, &wi);
- }
- }
-
- /* Determine if all the stmts in the loop can be SLPed. */
- for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; i++)
- {
- basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
- if (!stmt_info)
- continue;
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- {
- /* STMT needs both SLP and loop-based vectorization. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
- return false;
- }
- }
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- if (is_gimple_debug (gsi_stmt (si)))
- continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- stmt_info = vect_stmt_to_vectorize (stmt_info);
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- {
- /* STMT needs both SLP and loop-based vectorization. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
- return false;
- }
- }
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains only SLP stmts\n");
- return true;
-}
-
-
/* Initialize a bb_vec_info struct for the statements in BBS basic blocks. */
_bb_vec_info::_bb_vec_info (vec<basic_block> _bbs, vec_info_shared *shared)
@@ -7932,10 +8050,13 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
elements in a vector. For single-defuse-cycle, lane-reducing op, and
PHI statement that starts reduction comprised of only lane-reducing ops,
the number is more than effective vector statements actually required. */
- SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+ if (SLP_TREE_VECTYPE (node))
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+ else
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
/* Handle purely internal nodes. */
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
if (!vectorizable_slp_permutation (vinfo, NULL, node, cost_vec))
return false;
@@ -8259,8 +8380,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
/* Masked loads can have an undefined (default SSA definition)
else operand. We do not need to cost it. */
vec<tree> ops = SLP_TREE_SCALAR_OPS (child);
- if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
- == load_vec_info_type)
+ if (SLP_TREE_TYPE (node) == load_vec_info_type
&& ((ops.length ()
&& TREE_CODE (ops[0]) == SSA_NAME
&& SSA_NAME_IS_DEFAULT_DEF (ops[0])
@@ -8271,8 +8391,7 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
/* For shifts with a scalar argument we don't need
to cost or code-generate anything.
??? Represent this more explicitely. */
- gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
- == shift_vec_info_type)
+ gcc_assert (SLP_TREE_TYPE (node) == shift_vec_info_type
&& j == 1);
continue;
}
@@ -8648,7 +8767,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
&& !vectorizable_bb_reduc_epilogue (instance, &cost_vec))
/* Check we can vectorize the gcond. */
|| (SLP_INSTANCE_KIND (instance) == slp_inst_kind_gcond
- && !vectorizable_early_exit (vinfo,
+ && !vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
SLP_INSTANCE_ROOT_STMTS (instance)[0],
NULL,
SLP_INSTANCE_TREE (instance),
@@ -9012,7 +9131,7 @@ next_lane:
{
/* Do not directly pass LIFE to the recursive call, copy it to
confine changes in the callee to the current child/subtree. */
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
subtree_life.safe_grow_cleared (SLP_TREE_LANES (child), true);
for (unsigned j = 0;
@@ -11291,11 +11410,12 @@ vect_schedule_slp_node (vec_info *vinfo,
stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
- gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
- SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+ gcc_assert (!SLP_TREE_VECTYPE (node)
+ || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
+ if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
+ SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
- if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
- && STMT_VINFO_DATA_REF (stmt_info))
+ if (!SLP_TREE_PERMUTE_P (node) && STMT_VINFO_DATA_REF (stmt_info))
{
/* Vectorized loads go before the first scalar load to make it
ready early, vectorized stores go before the last scalar
@@ -11307,10 +11427,10 @@ vect_schedule_slp_node (vec_info *vinfo,
last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
si = gsi_for_stmt (last_stmt_info->stmt);
}
- else if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
- && (STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type
- || STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type
- || STMT_VINFO_TYPE (stmt_info) == phi_info_type))
+ else if (!SLP_TREE_PERMUTE_P (node)
+ && (SLP_TREE_TYPE (node) == cycle_phi_info_type
+ || SLP_TREE_TYPE (node) == induc_vec_info_type
+ || SLP_TREE_TYPE (node) == phi_info_type))
{
/* For PHI node vectorization we do not use the insertion iterator. */
si = gsi_none ();
@@ -11330,8 +11450,7 @@ vect_schedule_slp_node (vec_info *vinfo,
last scalar def here. */
if (SLP_TREE_VEC_DEFS (child).is_empty ())
{
- gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child))
- == cycle_phi_info_type);
+ gcc_assert (SLP_TREE_TYPE (child) == cycle_phi_info_type);
gphi *phi = as_a <gphi *>
(vect_find_last_scalar_stmt_in_slp (child)->stmt);
if (!last_stmt)
@@ -11432,7 +11551,7 @@ vect_schedule_slp_node (vec_info *vinfo,
si = gsi_after_labels (vinfo->bbs[0]);
}
else if (is_a <bb_vec_info> (vinfo)
- && SLP_TREE_CODE (node) != VEC_PERM_EXPR
+ && !SLP_TREE_PERMUTE_P (node)
&& gimple_bb (last_stmt) != gimple_bb (stmt_info->stmt)
&& gimple_could_trap_p (stmt_info->stmt))
{
@@ -11477,12 +11596,12 @@ vect_schedule_slp_node (vec_info *vinfo,
}
/* Handle purely internal nodes. */
- if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ if (SLP_TREE_PERMUTE_P (node))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"------>vectorizing SLP permutation node\n");
- /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
+ /* ??? the transform kind was stored to STMT_VINFO_TYPE which might
be shared with different SLP nodes (but usually it's the same
operation apart from the case the stmt is only there for denoting
the actual scalar lane defs ...). So do not call vect_transform_stmt
@@ -11538,8 +11657,6 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
{
if (!stmt_info)
continue;
- if (!PURE_SLP_STMT (stmt_info))
- continue;
stmt_info = vect_orig_stmt (stmt_info);
gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
if (!stmt || gimple_bb (stmt) == NULL)
@@ -11682,8 +11799,8 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i
auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
- bool res = vectorizable_early_exit (vinfo, root_stmt_info, &rgsi,
- node, NULL);
+ bool res = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
+ root_stmt_info, &rgsi, node, NULL);
gcc_assert (res);
return;
}
@@ -11758,7 +11875,7 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance,
stack.pop ();
info->on_stack = false;
vect_schedule_slp_node (vinfo, node, instance);
- if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+ if (!SLP_TREE_PERMUTE_P (node)
&& is_a <gphi *> (SLP_TREE_REPRESENTATIVE (node)->stmt))
phis_to_fixup.quick_push (node);
}
@@ -11781,7 +11898,7 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance,
slp_tree entry = stack[idx];
if (!entry)
continue;
- bool phi = (SLP_TREE_CODE (entry) != VEC_PERM_EXPR
+ bool phi = (!SLP_TREE_PERMUTE_P (entry)
&& is_a <gphi *> (SLP_TREE_REPRESENTATIVE (entry)->stmt));
bool ready = !phi;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (entry), i, child)