aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r--gcc/tree-vect-slp.cc432
1 files changed, 231 insertions, 201 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index f553e8f..13a2995 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -53,6 +53,9 @@ along with GCC; see the file COPYING3. If not see
#include "sreal.h"
#include "predict.h"
+#define REDUC_GROUP_FIRST_ELEMENT(S) \
+ (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
+
static bool vect_transform_slp_perm_load_1 (vec_info *, slp_tree,
load_permutation_t &,
const vec<tree> &,
@@ -4187,41 +4190,60 @@ vect_build_slp_instance (vec_info *vinfo,
Return FALSE if SLP build fails. */
static bool
-vect_analyze_slp_reduc_chain (vec_info *vinfo,
+vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
scalar_stmts_to_slp_tree_map_t *bst_map,
- stmt_vec_info stmt_info,
+ stmt_vec_info scalar_stmt,
unsigned max_tree_size, unsigned *limit)
{
- vec<stmt_vec_info> scalar_stmts;
+ vec<stmt_vec_info> scalar_stmts = vNULL;
- /* Collect the reduction stmts and store them in scalar_stmts. */
- scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
- stmt_vec_info next_info = stmt_info;
- while (next_info)
+ bool fail = false;
+ /* ??? We could leave operation code checking to SLP discovery. */
+ code_helper code = STMT_VINFO_REDUC_CODE (STMT_VINFO_REDUC_DEF
+ (vect_orig_stmt (scalar_stmt)));
+ bool first = true;
+ stmt_vec_info next_stmt = scalar_stmt;
+ do
{
- scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
- next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
+ stmt_vec_info stmt = next_stmt;
+ gimple_match_op op;
+ if (!gimple_extract_op (STMT_VINFO_STMT (stmt), &op))
+ gcc_unreachable ();
+ tree reduc_def = gimple_arg (STMT_VINFO_STMT (stmt),
+ STMT_VINFO_REDUC_IDX (stmt));
+ next_stmt = vect_stmt_to_vectorize (vinfo->lookup_def (reduc_def));
+ gcc_assert (is_a <gphi *> (STMT_VINFO_STMT (next_stmt))
+ || STMT_VINFO_REDUC_IDX (next_stmt) != -1);
+ if (!gimple_extract_op (STMT_VINFO_STMT (vect_orig_stmt (stmt)), &op))
+ gcc_unreachable ();
+ if (CONVERT_EXPR_CODE_P (op.code)
+ && (first
+ || is_a <gphi *> (STMT_VINFO_STMT (next_stmt))))
+ ;
+ else if (code != op.code)
+ {
+ fail = true;
+ break;
+ }
+ else
+ scalar_stmts.safe_push (stmt);
+ first = false;
}
- /* Mark the first element of the reduction chain as reduction to properly
- transform the node. In the reduction analysis phase only the last
- element of the chain is marked as reduction. */
- STMT_VINFO_DEF_TYPE (stmt_info)
- = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
- STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
- = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+ while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt)));
+ if (fail || scalar_stmts.length () <= 1)
+ return false;
+
+ scalar_stmts.reverse ();
+ stmt_vec_info reduc_phi_info = next_stmt;
/* Build the tree for the SLP instance. */
vec<stmt_vec_info> root_stmt_infos = vNULL;
vec<tree> remain = vNULL;
- /* If there's no budget left bail out early. */
- if (*limit == 0)
- return false;
-
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- "Starting SLP discovery for\n");
+ "Starting SLP discovery of reduction chain for\n");
for (unsigned i = 0; i < scalar_stmts.length (); ++i)
dump_printf_loc (MSG_NOTE, vect_location,
" %G", scalar_stmts[i]->stmt);
@@ -4233,136 +4255,195 @@ vect_analyze_slp_reduc_chain (vec_info *vinfo,
poly_uint64 max_nunits = 1;
unsigned tree_size = 0;
+ /* ??? We need this only for SLP discovery. */
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
+
slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, matches, limit,
&tree_size, bst_map);
+
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = NULL;
+
if (node != NULL)
{
- /* Calculate the unrolling factor based on the smallest type. */
- poly_uint64 unrolling_factor
- = calculate_unrolling_factor (max_nunits, group_size);
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
+ SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
- if (maybe_ne (unrolling_factor, 1U)
- && is_a <bb_vec_info> (vinfo))
+ vect_reduc_info reduc_info = info_for_reduction (vinfo, node);
+ reduc_info->is_reduc_chain = true;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
+
+ /* Fixup SLP reduction chains. If this is a reduction chain with
+ a conversion in front amend the SLP tree with a node for that. */
+ gimple *scalar_def = STMT_VINFO_REDUC_DEF (reduc_phi_info)->stmt;
+ if (is_gimple_assign (scalar_def)
+ && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (scalar_def)))
+ {
+ stmt_vec_info conv_info = vect_stmt_to_vectorize
+ (STMT_VINFO_REDUC_DEF (reduc_phi_info));
+ scalar_stmts = vNULL;
+ scalar_stmts.create (group_size);
+ for (unsigned i = 0; i < group_size; ++i)
+ scalar_stmts.quick_push (conv_info);
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
+ SLP_TREE_VECTYPE (conv)
+ = get_vectype_for_scalar_type (vinfo,
+ TREE_TYPE
+ (gimple_assign_lhs (scalar_def)),
+ group_size);
+ SLP_TREE_REDUC_IDX (conv) = 0;
+ conv->cycle_info.id = node->cycle_info.id;
+ SLP_TREE_CHILDREN (conv).quick_push (node);
+ SLP_INSTANCE_TREE (new_instance) = conv;
+ }
+ /* Fill the backedge child of the PHI SLP node. The
+ general matching code cannot find it because the
+ scalar code does not reflect how we vectorize the
+ reduction. */
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+ class loop *loop = LOOP_VINFO_LOOP (vinfo);
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
+ gimple_get_lhs (scalar_def))
+ /* There are exactly two non-debug uses, the reduction
+ PHI and the loop-closed PHI node. */
+ if (!is_gimple_debug (USE_STMT (use_p))
+ && gimple_bb (USE_STMT (use_p)) == loop->header)
+ {
+ auto_vec<stmt_vec_info, 64> phis (group_size);
+ stmt_vec_info phi_info = vinfo->lookup_stmt (USE_STMT (use_p));
+ for (unsigned i = 0; i < group_size; ++i)
+ phis.quick_push (phi_info);
+ slp_tree *phi_node = bst_map->get (phis);
+ unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
+ SLP_TREE_CHILDREN (*phi_node)[dest_idx]
+ = SLP_INSTANCE_TREE (new_instance);
+ SLP_INSTANCE_TREE (new_instance)->refcnt++;
+ }
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
{
- unsigned HOST_WIDE_INT const_max_nunits;
- if (!max_nunits.is_constant (&const_max_nunits)
- || const_max_nunits > group_size)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: store group "
- "size not a multiple of the vector size "
- "in basic block SLP\n");
- vect_free_slp_tree (node);
- return false;
- }
- /* Fatal mismatch. */
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "SLP discovery succeeded but node needs "
- "splitting\n");
- memset (matches, true, group_size);
- matches[group_size / const_max_nunits * const_max_nunits] = false;
- vect_free_slp_tree (node);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
}
- else
- {
- /* Create a new SLP instance. */
- slp_instance new_instance = XNEW (class _slp_instance);
- SLP_INSTANCE_TREE (new_instance) = node;
- SLP_INSTANCE_LOADS (new_instance) = vNULL;
- SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
- SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
- SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
- new_instance->reduc_phis = NULL;
- new_instance->cost_vec = vNULL;
- new_instance->subgraph_entries = vNULL;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "SLP size %u vs. limit %u.\n",
- tree_size, max_tree_size);
+ return true;
+ }
- /* Fixup SLP reduction chains. If this is a reduction chain with
- a conversion in front amend the SLP tree with a node for that. */
- gimple *scalar_def
- = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
- if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
- {
- /* Get at the conversion stmt - we know it's the single use
- of the last stmt of the reduction chain. */
- use_operand_p use_p;
- bool r = single_imm_use (gimple_assign_lhs (scalar_def),
- &use_p, &scalar_def);
- gcc_assert (r);
- stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
- next_info = vect_stmt_to_vectorize (next_info);
- scalar_stmts = vNULL;
- scalar_stmts.create (group_size);
- for (unsigned i = 0; i < group_size; ++i)
- scalar_stmts.quick_push (next_info);
- slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
- SLP_TREE_VECTYPE (conv)
- = get_vectype_for_scalar_type (vinfo,
- TREE_TYPE
- (gimple_assign_lhs (scalar_def)),
- group_size);
- SLP_TREE_REDUC_IDX (conv) = 0;
- conv->cycle_info.id = node->cycle_info.id;
- SLP_TREE_CHILDREN (conv).quick_push (node);
- SLP_INSTANCE_TREE (new_instance) = conv;
- /* We also have to fake this conversion stmt as SLP reduction
- group so we don't have to mess with too much code
- elsewhere. */
- REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
- REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
- }
- /* Fill the backedge child of the PHI SLP node. The
- general matching code cannot find it because the
- scalar code does not reflect how we vectorize the
- reduction. */
- use_operand_p use_p;
- imm_use_iterator imm_iter;
- class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
- gimple_get_lhs (scalar_def))
- /* There are exactly two non-debug uses, the reduction
- PHI and the loop-closed PHI node. */
- if (!is_gimple_debug (USE_STMT (use_p))
- && gimple_bb (USE_STMT (use_p)) == loop->header)
- {
- auto_vec<stmt_vec_info, 64> phis (group_size);
- stmt_vec_info phi_info
- = vinfo->lookup_stmt (USE_STMT (use_p));
- for (unsigned i = 0; i < group_size; ++i)
- phis.quick_push (phi_info);
- slp_tree *phi_node = bst_map->get (phis);
- unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
- SLP_TREE_CHILDREN (*phi_node)[dest_idx]
- = SLP_INSTANCE_TREE (new_instance);
- SLP_INSTANCE_TREE (new_instance)->refcnt++;
- }
+ /* Failed to SLP. */
+ scalar_stmts.release ();
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP discovery of reduction chain failed\n");
+ return false;
+}
- vinfo->slp_instances.safe_push (new_instance);
+/* Analyze an SLP instance starting from SCALAR_STMTS which are a group
+ of KIND. Return true if successful. */
- /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
- the number of scalar stmts in the root in a few places.
- Verify that assumption holds. */
- gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
- .length () == group_size);
+static bool
+vect_analyze_slp_reduction (loop_vec_info vinfo,
+ stmt_vec_info scalar_stmt,
+ unsigned max_tree_size, unsigned *limit,
+ scalar_stmts_to_slp_tree_map_t *bst_map,
+ bool force_single_lane)
+{
+ slp_instance_kind kind = slp_inst_kind_reduc_group;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Final SLP tree for instance %p:\n",
- (void *) new_instance);
- vect_print_slp_graph (MSG_NOTE, vect_location,
- SLP_INSTANCE_TREE (new_instance));
- }
+ /* If there's no budget left bail out early. */
+ if (*limit == 0)
+ return false;
- return true;
+ /* Try to gather a reduction chain. */
+ if (! force_single_lane
+ && STMT_VINFO_DEF_TYPE (scalar_stmt) == vect_reduction_def
+ && vect_analyze_slp_reduc_chain (vinfo, bst_map, scalar_stmt,
+ max_tree_size, limit))
+ return true;
+
+ vec<stmt_vec_info> scalar_stmts;
+ scalar_stmts.create (1);
+ scalar_stmts.quick_push (scalar_stmt);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Starting SLP discovery for\n");
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " %G", scalar_stmts[i]->stmt);
+ }
+
+ /* Build the tree for the SLP instance. */
+ unsigned int group_size = scalar_stmts.length ();
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0;
+
+ slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ &max_nunits, matches, limit,
+ &tree_size, bst_map);
+ if (node != NULL)
+ {
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = node;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+ SLP_INSTANCE_KIND (new_instance) = kind;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "SLP size %u vs. limit %u.\n",
+ tree_size, max_tree_size);
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
+ the number of scalar stmts in the root in a few places.
+ Verify that assumption holds. */
+ gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
+ .length () == group_size);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
}
+
+ return true;
}
/* Failed to SLP. */
@@ -5256,40 +5337,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
{
- /* Find SLP sequences starting from reduction chains. */
- FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
- if (! STMT_VINFO_RELEVANT_P (first_element)
- && ! STMT_VINFO_LIVE_P (first_element))
- ;
- else if (force_single_lane
- || ! vect_analyze_slp_reduc_chain (vinfo, bst_map,
- first_element,
- max_tree_size, &limit))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP discovery of reduction chain failed\n");
- /* Dissolve reduction chain group. */
- stmt_vec_info vinfo = first_element;
- stmt_vec_info last = NULL;
- while (vinfo)
- {
- stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
- REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
- REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- last = vinfo;
- vinfo = next;
- }
- STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
- /* ??? When there's a conversion around the reduction
- chain 'last' isn't the entry of the reduction. */
- if (STMT_VINFO_DEF_TYPE (last) != vect_reduction_def)
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- /* It can be still vectorized as part of an SLP reduction. */
- loop_vinfo->reductions.safe_push (last);
- }
-
/* Find SLP sequences starting from groups of reductions. */
if (loop_vinfo->reductions.length () > 0)
{
@@ -5315,23 +5362,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
if (!force_single_lane
&& !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
scalar_stmts.quick_push (next_info);
- else
- {
- /* Do SLP discovery for single-lane reductions. */
- vec<stmt_vec_info> stmts;
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- stmts.create (1);
- stmts.quick_push (next_info);
- if (! vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map,
- force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
+ /* Do SLP discovery for single-lane reductions. */
+ else if (! vect_analyze_slp_reduction (loop_vinfo, next_info,
+ max_tree_size, &limit,
+ bst_map,
+ force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
}
/* Save for re-processing on failure. */
@@ -5349,20 +5386,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
scalar_stmts.release ();
/* Do SLP discovery for single-lane reductions. */
for (auto stmt_info : saved_stmts)
- {
- vec<stmt_vec_info> stmts;
- vec<stmt_vec_info> roots = vNULL;
- vec<tree> remain = vNULL;
- stmts.create (1);
- stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
- if (! vect_build_slp_instance (vinfo,
- slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, force_single_lane))
- return opt_result::failure_at (vect_location,
- "SLP build failed.\n");
- }
+ if (! vect_analyze_slp_reduction (loop_vinfo,
+ vect_stmt_to_vectorize
+ (stmt_info),
+ max_tree_size, &limit,
+ bst_map, force_single_lane))
+ return opt_result::failure_at (vect_location,
+ "SLP build failed.\n");
}
saved_stmts.release ();
}