diff options
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r-- | gcc/tree-vect-slp.cc | 432 |
1 files changed, 231 insertions, 201 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f553e8f..13a2995 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -53,6 +53,9 @@ along with GCC; see the file COPYING3. If not see #include "sreal.h" #include "predict.h" +#define REDUC_GROUP_FIRST_ELEMENT(S) \ + (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) + static bool vect_transform_slp_perm_load_1 (vec_info *, slp_tree, load_permutation_t &, const vec<tree> &, @@ -4187,41 +4190,60 @@ vect_build_slp_instance (vec_info *vinfo, Return FALSE if SLP build fails. */ static bool -vect_analyze_slp_reduc_chain (vec_info *vinfo, +vect_analyze_slp_reduc_chain (loop_vec_info vinfo, scalar_stmts_to_slp_tree_map_t *bst_map, - stmt_vec_info stmt_info, + stmt_vec_info scalar_stmt, unsigned max_tree_size, unsigned *limit) { - vec<stmt_vec_info> scalar_stmts; + vec<stmt_vec_info> scalar_stmts = vNULL; - /* Collect the reduction stmts and store them in scalar_stmts. */ - scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info)); - stmt_vec_info next_info = stmt_info; - while (next_info) + bool fail = false; + /* ??? We could leave operation code checking to SLP discovery. */ + code_helper code = STMT_VINFO_REDUC_CODE (STMT_VINFO_REDUC_DEF + (vect_orig_stmt (scalar_stmt))); + bool first = true; + stmt_vec_info next_stmt = scalar_stmt; + do { - scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info)); - next_info = REDUC_GROUP_NEXT_ELEMENT (next_info); + stmt_vec_info stmt = next_stmt; + gimple_match_op op; + if (!gimple_extract_op (STMT_VINFO_STMT (stmt), &op)) + gcc_unreachable (); + tree reduc_def = gimple_arg (STMT_VINFO_STMT (stmt), + STMT_VINFO_REDUC_IDX (stmt)); + next_stmt = vect_stmt_to_vectorize (vinfo->lookup_def (reduc_def)); + gcc_assert (is_a <gphi *> (STMT_VINFO_STMT (next_stmt)) + || STMT_VINFO_REDUC_IDX (next_stmt) != -1); + if (!gimple_extract_op (STMT_VINFO_STMT (vect_orig_stmt (stmt)), &op)) + gcc_unreachable (); + if (CONVERT_EXPR_CODE_P (op.code) + && (first + || is_a <gphi *> (STMT_VINFO_STMT (next_stmt)))) + ; + else if (code != op.code) + { + fail = true; + break; + } + else + scalar_stmts.safe_push (stmt); + first = false; } - /* Mark the first element of the reduction chain as reduction to properly - transform the node. In the reduction analysis phase only the last - element of the chain is marked as reduction. */ - STMT_VINFO_DEF_TYPE (stmt_info) - = STMT_VINFO_DEF_TYPE (scalar_stmts.last ()); - STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) - = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); + while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt))); + if (fail || scalar_stmts.length () <= 1) + return false; + + scalar_stmts.reverse (); + stmt_vec_info reduc_phi_info = next_stmt; /* Build the tree for the SLP instance. */ vec<stmt_vec_info> root_stmt_infos = vNULL; vec<tree> remain = vNULL; - /* If there's no budget left bail out early. */ - if (*limit == 0) - return false; - if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, - "Starting SLP discovery for\n"); + "Starting SLP discovery of reduction chain for\n"); for (unsigned i = 0; i < scalar_stmts.length (); ++i) dump_printf_loc (MSG_NOTE, vect_location, " %G", scalar_stmts[i]->stmt); @@ -4233,136 +4255,195 @@ vect_analyze_slp_reduc_chain (vec_info *vinfo, poly_uint64 max_nunits = 1; unsigned tree_size = 0; + /* ??? We need this only for SLP discovery. */ + for (unsigned i = 0; i < scalar_stmts.length (); ++i) + REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0]; + slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, &max_nunits, matches, limit, &tree_size, bst_map); + + for (unsigned i = 0; i < scalar_stmts.length (); ++i) + REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = NULL; + if (node != NULL) { - /* Calculate the unrolling factor based on the smallest type. */ - poly_uint64 unrolling_factor - = calculate_unrolling_factor (max_nunits, group_size); + /* Create a new SLP instance. */ + slp_instance new_instance = XNEW (class _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_LOADS (new_instance) = vNULL; + SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos; + SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain; + SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain; + new_instance->reduc_phis = NULL; + new_instance->cost_vec = vNULL; + new_instance->subgraph_entries = vNULL; - if (maybe_ne (unrolling_factor, 1U) - && is_a <bb_vec_info> (vinfo)) + vect_reduc_info reduc_info = info_for_reduction (vinfo, node); + reduc_info->is_reduc_chain = true; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP size %u vs. limit %u.\n", + tree_size, max_tree_size); + + /* Fixup SLP reduction chains. If this is a reduction chain with + a conversion in front amend the SLP tree with a node for that. */ + gimple *scalar_def = STMT_VINFO_REDUC_DEF (reduc_phi_info)->stmt; + if (is_gimple_assign (scalar_def) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (scalar_def))) + { + stmt_vec_info conv_info = vect_stmt_to_vectorize + (STMT_VINFO_REDUC_DEF (reduc_phi_info)); + scalar_stmts = vNULL; + scalar_stmts.create (group_size); + for (unsigned i = 0; i < group_size; ++i) + scalar_stmts.quick_push (conv_info); + slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1); + SLP_TREE_VECTYPE (conv) + = get_vectype_for_scalar_type (vinfo, + TREE_TYPE + (gimple_assign_lhs (scalar_def)), + group_size); + SLP_TREE_REDUC_IDX (conv) = 0; + conv->cycle_info.id = node->cycle_info.id; + SLP_TREE_CHILDREN (conv).quick_push (node); + SLP_INSTANCE_TREE (new_instance) = conv; + } + /* Fill the backedge child of the PHI SLP node. The + general matching code cannot find it because the + scalar code does not reflect how we vectorize the + reduction. */ + use_operand_p use_p; + imm_use_iterator imm_iter; + class loop *loop = LOOP_VINFO_LOOP (vinfo); + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, + gimple_get_lhs (scalar_def)) + /* There are exactly two non-debug uses, the reduction + PHI and the loop-closed PHI node. */ + if (!is_gimple_debug (USE_STMT (use_p)) + && gimple_bb (USE_STMT (use_p)) == loop->header) + { + auto_vec<stmt_vec_info, 64> phis (group_size); + stmt_vec_info phi_info = vinfo->lookup_stmt (USE_STMT (use_p)); + for (unsigned i = 0; i < group_size; ++i) + phis.quick_push (phi_info); + slp_tree *phi_node = bst_map->get (phis); + unsigned dest_idx = loop_latch_edge (loop)->dest_idx; + SLP_TREE_CHILDREN (*phi_node)[dest_idx] + = SLP_INSTANCE_TREE (new_instance); + SLP_INSTANCE_TREE (new_instance)->refcnt++; + } + + vinfo->slp_instances.safe_push (new_instance); + + /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with + the number of scalar stmts in the root in a few places. + Verify that assumption holds. */ + gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance)) + .length () == group_size); + + if (dump_enabled_p ()) { - unsigned HOST_WIDE_INT const_max_nunits; - if (!max_nunits.is_constant (&const_max_nunits) - || const_max_nunits > group_size) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: store group " - "size not a multiple of the vector size " - "in basic block SLP\n"); - vect_free_slp_tree (node); - return false; - } - /* Fatal mismatch. */ - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "SLP discovery succeeded but node needs " - "splitting\n"); - memset (matches, true, group_size); - matches[group_size / const_max_nunits * const_max_nunits] = false; - vect_free_slp_tree (node); + dump_printf_loc (MSG_NOTE, vect_location, + "Final SLP tree for instance %p:\n", + (void *) new_instance); + vect_print_slp_graph (MSG_NOTE, vect_location, + SLP_INSTANCE_TREE (new_instance)); } - else - { - /* Create a new SLP instance. */ - slp_instance new_instance = XNEW (class _slp_instance); - SLP_INSTANCE_TREE (new_instance) = node; - SLP_INSTANCE_LOADS (new_instance) = vNULL; - SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos; - SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain; - SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain; - new_instance->reduc_phis = NULL; - new_instance->cost_vec = vNULL; - new_instance->subgraph_entries = vNULL; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "SLP size %u vs. limit %u.\n", - tree_size, max_tree_size); + return true; + } - /* Fixup SLP reduction chains. If this is a reduction chain with - a conversion in front amend the SLP tree with a node for that. */ - gimple *scalar_def - = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt; - if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def) - { - /* Get at the conversion stmt - we know it's the single use - of the last stmt of the reduction chain. */ - use_operand_p use_p; - bool r = single_imm_use (gimple_assign_lhs (scalar_def), - &use_p, &scalar_def); - gcc_assert (r); - stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def); - next_info = vect_stmt_to_vectorize (next_info); - scalar_stmts = vNULL; - scalar_stmts.create (group_size); - for (unsigned i = 0; i < group_size; ++i) - scalar_stmts.quick_push (next_info); - slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1); - SLP_TREE_VECTYPE (conv) - = get_vectype_for_scalar_type (vinfo, - TREE_TYPE - (gimple_assign_lhs (scalar_def)), - group_size); - SLP_TREE_REDUC_IDX (conv) = 0; - conv->cycle_info.id = node->cycle_info.id; - SLP_TREE_CHILDREN (conv).quick_push (node); - SLP_INSTANCE_TREE (new_instance) = conv; - /* We also have to fake this conversion stmt as SLP reduction - group so we don't have to mess with too much code - elsewhere. */ - REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info; - REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL; - } - /* Fill the backedge child of the PHI SLP node. The - general matching code cannot find it because the - scalar code does not reflect how we vectorize the - reduction. */ - use_operand_p use_p; - imm_use_iterator imm_iter; - class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo)); - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, - gimple_get_lhs (scalar_def)) - /* There are exactly two non-debug uses, the reduction - PHI and the loop-closed PHI node. */ - if (!is_gimple_debug (USE_STMT (use_p)) - && gimple_bb (USE_STMT (use_p)) == loop->header) - { - auto_vec<stmt_vec_info, 64> phis (group_size); - stmt_vec_info phi_info - = vinfo->lookup_stmt (USE_STMT (use_p)); - for (unsigned i = 0; i < group_size; ++i) - phis.quick_push (phi_info); - slp_tree *phi_node = bst_map->get (phis); - unsigned dest_idx = loop_latch_edge (loop)->dest_idx; - SLP_TREE_CHILDREN (*phi_node)[dest_idx] - = SLP_INSTANCE_TREE (new_instance); - SLP_INSTANCE_TREE (new_instance)->refcnt++; - } + /* Failed to SLP. */ + scalar_stmts.release (); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery of reduction chain failed\n"); + return false; +} - vinfo->slp_instances.safe_push (new_instance); +/* Analyze an SLP instance starting from SCALAR_STMTS which are a group + of KIND. Return true if successful. */ - /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with - the number of scalar stmts in the root in a few places. - Verify that assumption holds. */ - gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance)) - .length () == group_size); +static bool +vect_analyze_slp_reduction (loop_vec_info vinfo, + stmt_vec_info scalar_stmt, + unsigned max_tree_size, unsigned *limit, + scalar_stmts_to_slp_tree_map_t *bst_map, + bool force_single_lane) +{ + slp_instance_kind kind = slp_inst_kind_reduc_group; - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "Final SLP tree for instance %p:\n", - (void *) new_instance); - vect_print_slp_graph (MSG_NOTE, vect_location, - SLP_INSTANCE_TREE (new_instance)); - } + /* If there's no budget left bail out early. */ + if (*limit == 0) + return false; - return true; + /* Try to gather a reduction chain. */ + if (! force_single_lane + && STMT_VINFO_DEF_TYPE (scalar_stmt) == vect_reduction_def + && vect_analyze_slp_reduc_chain (vinfo, bst_map, scalar_stmt, + max_tree_size, limit)) + return true; + + vec<stmt_vec_info> scalar_stmts; + scalar_stmts.create (1); + scalar_stmts.quick_push (scalar_stmt); + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "Starting SLP discovery for\n"); + for (unsigned i = 0; i < scalar_stmts.length (); ++i) + dump_printf_loc (MSG_NOTE, vect_location, + " %G", scalar_stmts[i]->stmt); + } + + /* Build the tree for the SLP instance. */ + unsigned int group_size = scalar_stmts.length (); + bool *matches = XALLOCAVEC (bool, group_size); + poly_uint64 max_nunits = 1; + unsigned tree_size = 0; + + slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, + &max_nunits, matches, limit, + &tree_size, bst_map); + if (node != NULL) + { + /* Create a new SLP instance. */ + slp_instance new_instance = XNEW (class _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_LOADS (new_instance) = vNULL; + SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL; + SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL; + SLP_INSTANCE_KIND (new_instance) = kind; + new_instance->reduc_phis = NULL; + new_instance->cost_vec = vNULL; + new_instance->subgraph_entries = vNULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP size %u vs. limit %u.\n", + tree_size, max_tree_size); + + vinfo->slp_instances.safe_push (new_instance); + + /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with + the number of scalar stmts in the root in a few places. + Verify that assumption holds. */ + gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance)) + .length () == group_size); + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "Final SLP tree for instance %p:\n", + (void *) new_instance); + vect_print_slp_graph (MSG_NOTE, vect_location, + SLP_INSTANCE_TREE (new_instance)); } + + return true; } /* Failed to SLP. */ @@ -5256,40 +5337,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) { - /* Find SLP sequences starting from reduction chains. */ - FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) - if (! STMT_VINFO_RELEVANT_P (first_element) - && ! STMT_VINFO_LIVE_P (first_element)) - ; - else if (force_single_lane - || ! vect_analyze_slp_reduc_chain (vinfo, bst_map, - first_element, - max_tree_size, &limit)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "SLP discovery of reduction chain failed\n"); - /* Dissolve reduction chain group. */ - stmt_vec_info vinfo = first_element; - stmt_vec_info last = NULL; - while (vinfo) - { - stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo); - REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL; - REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL; - last = vinfo; - vinfo = next; - } - STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def; - /* ??? When there's a conversion around the reduction - chain 'last' isn't the entry of the reduction. */ - if (STMT_VINFO_DEF_TYPE (last) != vect_reduction_def) - return opt_result::failure_at (vect_location, - "SLP build failed.\n"); - /* It can be still vectorized as part of an SLP reduction. */ - loop_vinfo->reductions.safe_push (last); - } - /* Find SLP sequences starting from groups of reductions. */ if (loop_vinfo->reductions.length () > 0) { @@ -5315,23 +5362,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, if (!force_single_lane && !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info))) scalar_stmts.quick_push (next_info); - else - { - /* Do SLP discovery for single-lane reductions. */ - vec<stmt_vec_info> stmts; - vec<stmt_vec_info> roots = vNULL; - vec<tree> remain = vNULL; - stmts.create (1); - stmts.quick_push (next_info); - if (! vect_build_slp_instance (vinfo, - slp_inst_kind_reduc_group, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, - force_single_lane)) - return opt_result::failure_at (vect_location, - "SLP build failed.\n"); - } + /* Do SLP discovery for single-lane reductions. */ + else if (! vect_analyze_slp_reduction (loop_vinfo, next_info, + max_tree_size, &limit, + bst_map, + force_single_lane)) + return opt_result::failure_at (vect_location, + "SLP build failed.\n"); } } /* Save for re-processing on failure. */ @@ -5349,20 +5386,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, scalar_stmts.release (); /* Do SLP discovery for single-lane reductions. */ for (auto stmt_info : saved_stmts) - { - vec<stmt_vec_info> stmts; - vec<stmt_vec_info> roots = vNULL; - vec<tree> remain = vNULL; - stmts.create (1); - stmts.quick_push (vect_stmt_to_vectorize (stmt_info)); - if (! vect_build_slp_instance (vinfo, - slp_inst_kind_reduc_group, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, force_single_lane)) - return opt_result::failure_at (vect_location, - "SLP build failed.\n"); - } + if (! vect_analyze_slp_reduction (loop_vinfo, + vect_stmt_to_vectorize + (stmt_info), + max_tree_size, &limit, + bst_map, force_single_lane)) + return opt_result::failure_at (vect_location, + "SLP build failed.\n"); } saved_stmts.release (); } |