aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r--gcc/tree-vect-slp.cc196
1 files changed, 194 insertions, 2 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 13a2995..9698709 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4230,9 +4230,200 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
first = false;
}
while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt)));
- if (fail || scalar_stmts.length () <= 1)
+ if (fail)
return false;
+ /* When the SSA def chain through reduc-idx does not form a natural
+ reduction chain try to linearize an associative operation manually. */
+ if (scalar_stmts.length () == 1
+ && code.is_tree_code ()
+ && associative_tree_code ((tree_code)code)
+ /* We may not associate if a fold-left reduction is required. */
+ && !needs_fold_left_reduction_p (TREE_TYPE (gimple_get_lhs
+ (scalar_stmt->stmt)),
+ code))
+ {
+ auto_vec<chain_op_t> chain;
+ auto_vec<std::pair<tree_code, gimple *> > worklist;
+ gimple *op_stmt = NULL, *other_op_stmt = NULL;
+ vect_slp_linearize_chain (vinfo, worklist, chain, (tree_code)code,
+ scalar_stmts[0]->stmt, op_stmt, other_op_stmt,
+ NULL);
+
+ scalar_stmts.truncate (0);
+ stmt_vec_info tail = NULL;
+ for (auto el : chain)
+ {
+ if (el.dt == vect_external_def
+ || el.dt == vect_constant_def
+ || el.code != (tree_code) code)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ stmt_vec_info stmt = vinfo->lookup_def (el.op);
+ if (STMT_VINFO_REDUC_IDX (stmt) != -1
+ || STMT_VINFO_REDUC_DEF (stmt))
+ {
+ gcc_assert (tail == NULL);
+ tail = stmt;
+ continue;
+ }
+ scalar_stmts.safe_push (stmt);
+ }
+ gcc_assert (tail);
+
+ /* When this linearization didn't produce a chain see if stripping
+ a wrapping sign conversion produces one. */
+ if (scalar_stmts.length () == 1)
+ {
+ gimple *stmt = scalar_stmts[0]->stmt;
+ if (!is_gimple_assign (stmt)
+ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+ || TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME
+ || !tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
+ TREE_TYPE (gimple_assign_rhs1 (stmt))))
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ if (!is_gimple_assign (stmt)
+ || gimple_assign_rhs_code (stmt) != (tree_code)code)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ chain.truncate (0);
+ vect_slp_linearize_chain (vinfo, worklist, chain, (tree_code)code,
+ stmt, op_stmt, other_op_stmt, NULL);
+
+ scalar_stmts.truncate (0);
+ tail = NULL;
+ for (auto el : chain)
+ {
+ if (el.dt == vect_external_def
+ || el.dt == vect_constant_def
+ || el.code != (tree_code) code)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ stmt_vec_info stmt = vinfo->lookup_def (el.op);
+ if (STMT_VINFO_REDUC_IDX (stmt) != -1
+ || STMT_VINFO_REDUC_DEF (stmt))
+ {
+ gcc_assert (tail == NULL);
+ tail = stmt;
+ continue;
+ }
+ scalar_stmts.safe_push (stmt);
+ }
+ /* Unlike the above this does not include the reduction SSA
+ cycle. */
+ gcc_assert (!tail);
+ }
+
+ if (scalar_stmts.length () < 2)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Starting SLP discovery of reduction chain for\n");
+ for (unsigned i = 0; i < scalar_stmts.length (); ++i)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " %G", scalar_stmts[i]->stmt);
+ }
+
+ unsigned int group_size = scalar_stmts.length ();
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0;
+ slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ &max_nunits, matches, limit,
+ &tree_size, bst_map);
+ if (!node)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+
+ unsigned cycle_id = vinfo->reduc_infos.length ();
+ vect_reduc_info reduc_info = new vect_reduc_info_s ();
+ vinfo->reduc_infos.safe_push (reduc_info);
+ VECT_REDUC_INFO_DEF_TYPE (reduc_info) = STMT_VINFO_DEF_TYPE (next_stmt);
+ VECT_REDUC_INFO_TYPE (reduc_info) = STMT_VINFO_REDUC_TYPE (next_stmt);
+ VECT_REDUC_INFO_CODE (reduc_info) = STMT_VINFO_REDUC_CODE (next_stmt);
+ VECT_REDUC_INFO_FN (reduc_info) = IFN_LAST;
+ reduc_info->is_reduc_chain = true;
+
+ /* Build the node for the PHI and possibly the conversion(s?). */
+ slp_tree phis = vect_create_new_slp_node (2, ERROR_MARK);
+ SLP_TREE_REPRESENTATIVE (phis) = next_stmt;
+ phis->cycle_info.id = cycle_id;
+ SLP_TREE_LANES (phis) = group_size;
+ SLP_TREE_VECTYPE (phis) = SLP_TREE_VECTYPE (node);
+ /* ??? vect_cse_slp_nodes cannot cope with cycles without any
+ SLP_TREE_SCALAR_STMTS. */
+ SLP_TREE_SCALAR_STMTS (phis).create (group_size);
+ for (unsigned i = 0; i < group_size; ++i)
+ SLP_TREE_SCALAR_STMTS (phis).quick_push (next_stmt);
+
+ slp_tree reduc = vect_create_new_slp_node (2, ERROR_MARK);
+ SLP_TREE_REPRESENTATIVE (reduc) = scalar_stmt;
+ SLP_TREE_CHILDREN (reduc).quick_push (phis);
+ SLP_TREE_CHILDREN (reduc).quick_push (node);
+ reduc->cycle_info.id = cycle_id;
+ SLP_TREE_REDUC_IDX (reduc) = 0;
+ SLP_TREE_LANES (reduc) = group_size;
+ SLP_TREE_VECTYPE (reduc) = SLP_TREE_VECTYPE (node);
+ /* ??? For the reduction epilogue we need a live lane. */
+ SLP_TREE_SCALAR_STMTS (reduc).create (group_size);
+ SLP_TREE_SCALAR_STMTS (reduc).quick_push (scalar_stmt);
+ for (unsigned i = 1; i < group_size; ++i)
+ SLP_TREE_SCALAR_STMTS (reduc).quick_push (NULL);
+
+ edge le = loop_latch_edge (LOOP_VINFO_LOOP (vinfo));
+ SLP_TREE_CHILDREN (phis).quick_push (NULL);
+ SLP_TREE_CHILDREN (phis).quick_push (NULL);
+ SLP_TREE_CHILDREN (phis)[le->dest_idx] = reduc;
+ SLP_TREE_REF_COUNT (reduc)++;
+
+ /* Create a new SLP instance. */
+ slp_instance new_instance = XNEW (class _slp_instance);
+ SLP_INSTANCE_TREE (new_instance) = reduc;
+ SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
+ SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
+ new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
+
+ vinfo->slp_instances.safe_push (new_instance);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Final SLP tree for instance %p:\n",
+ (void *) new_instance);
+ vect_print_slp_graph (MSG_NOTE, vect_location,
+ SLP_INSTANCE_TREE (new_instance));
+ }
+
+ return true;
+ }
+
+ if (scalar_stmts.length () <= 1)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+
scalar_stmts.reverse ();
stmt_vec_info reduc_phi_info = next_stmt;
@@ -12046,7 +12237,8 @@ vect_schedule_slp (vec_info *vinfo, const vec<slp_instance> &slp_instances)
/* Remove vectorized stores original scalar stmts. */
for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++)
{
- if (!STMT_VINFO_DATA_REF (store_info)
+ if (!store_info
+ || !STMT_VINFO_DATA_REF (store_info)
|| !DR_IS_WRITE (STMT_VINFO_DATA_REF (store_info)))
break;