diff options
author | Richard Biener <rguenther@suse.de> | 2025-08-22 12:29:35 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2025-09-01 14:04:24 +0200 |
commit | b3355804c7054bce288b643ce127ec5e9d0c2061 (patch) | |
tree | 865e81563a23e270841e358cd973f95a3bdadf9b /gcc | |
parent | 42bcdfa5cb64724dc0b6f1fdcdd537aa2bbefac3 (diff) | |
download | gcc-b3355804c7054bce288b643ce127ec5e9d0c2061.zip gcc-b3355804c7054bce288b643ce127ec5e9d0c2061.tar.gz gcc-b3355804c7054bce288b643ce127ec5e9d0c2061.tar.bz2 |
Introduce abstraction for vect reduction info, tracked from SLP nodes
While we have already the accessor info_for_reduction, its result
is a plain stmt_vec_info. The following turns that into a class
for the purpose of changing accesses to reduction info to a new
set of accessors prefixed with VECT_REDUC_INFO and removes
the corresponding STMT_VINFO prefixed accessors where possible.
There is few reduction related things that are used by scalar
cycle detection and thus have to stay as-is for now and as
copies in future.
This also separates reduction info into one object per reduction
and associate it with SLP nodes, splitting it out from
stmt_vec_info, retaining (and duplicating) parts used by scalar
cycle analysis. The data is then associated with SLP nodes
forming reduction cycles and accessible via info_for_reduction.
The data is created at SLP discovery time as we look at it even
pre-vectorizable_reduction analysis, but most of the data is
only populated by the latter. There is no reduction info with
nested cycles that are not part of an outer reduction.
In the process this adds cycle info to each SLP tree, notably
the reduc-idx and a way to identify the reduction info.
* tree-vectorizer.h (vect_reduc_info): New.
(create_info_for_reduction): Likewise.
(VECT_REDUC_INFO_TYPE): Likewise.
(VECT_REDUC_INFO_CODE): Likewise.
(VECT_REDUC_INFO_FN): Likewise.
(VECT_REDUC_INFO_SCALAR_RESULTS): Likewise.
(VECT_REDUC_INFO_INITIAL_VALUES): Likewise.
(VECT_REDUC_INFO_REUSED_ACCUMULATOR): Likewise.
(VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL): Likewise.
(VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT): Likewise.
(VECT_REDUC_INFO_FORCE_SINGLE_CYCLE): Likewise.
(VECT_REDUC_INFO_RESULT_POS): Likewise.
(VECT_REDUC_INFO_VECTYPE): Likewise.
(STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL): Remove.
(STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT): Likewise.
(STMT_VINFO_FORCE_SINGLE_CYCLE): Likewise.
(STMT_VINFO_REDUC_FN): Likewise.
(STMT_VINFO_REDUC_VECTYPE): Likewise.
(vect_reusable_accumulator::reduc_info): Adjust.
(vect_reduc_type): Adjust.
(_slp_tree::cycle_info): New member.
(SLP_TREE_REDUC_IDX): Likewise.
(vect_reduc_info_s): Move/copy data from ...
(_stmt_vec_info): ... here.
(_loop_vec_info::redcu_infos): New member.
(info_for_reduction): Adjust to take SLP node.
(vect_reduc_type): Adjust.
(vect_is_reduction): Add overload for SLP node.
* tree-vectorizer.cc (vec_info::new_stmt_vec_info):
Do not initialize removed members.
(vec_info::free_stmt_vec_info): Do not release them.
* tree-vect-stmts.cc (vectorizable_condition): Adjust.
* tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize
cycle info.
(vect_build_slp_tree_2): Compute SLP reduc_idx and store
it. Create, populate and propagate reduction info.
(vect_print_slp_tree): Print cycle info.
(vect_analyze_slp_reduc_chain): Set cycle info on the
manual added conversion node.
(vect_optimize_slp_pass::start_choosing_layouts): Adjust.
* tree-vect-loop.cc (_loop_vec_info::~_loop_vec_info):
Release reduction infos.
(info_for_reduction): Get the reduction info from
the vector in the loop_vinfo.
(vect_create_epilog_for_reduction): Adjust.
(vectorizable_reduction): Likewise.
(vect_transform_reduction): Likewise.
(vect_transform_cycle_phi): Likewise, deal with nested
cycles not part of a double reduction have no reduction info.
* config/aarch64/aarch64.cc (aarch64_force_single_cycle):
Use VECT_REDUC_INFO_FORCE_SINGLE_CYCLE, get SLP node and use
that.
(aarch64_vector_costs::count_ops): Adjust.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 11 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 215 | ||||
-rw-r--r-- | gcc/tree-vect-slp.cc | 79 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 24 | ||||
-rw-r--r-- | gcc/tree-vectorizer.cc | 6 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 157 |
6 files changed, 294 insertions, 198 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fb05ea8..ef9c165 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -17770,13 +17770,12 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind, with the single accumulator being read and written multiple times. */ static bool -aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info) +aarch64_force_single_cycle (vec_info *vinfo, slp_tree node) { - if (!STMT_VINFO_REDUC_DEF (stmt_info)) + auto reduc_info = info_for_reduction (as_a <loop_vec_info> (vinfo), node); + if (!reduc_info) return false; - - auto reduc_info = info_for_reduction (vinfo, stmt_info); - return STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info); + return VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info); } /* COUNT, KIND and STMT_INFO are the same as for vector_costs::add_stmt_cost @@ -17803,7 +17802,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, = aarch64_in_loop_reduction_latency (m_vinfo, node, stmt_info, m_vec_flags); if (m_costing_for_scalar - || aarch64_force_single_cycle (m_vinfo, stmt_info)) + || aarch64_force_single_cycle (m_vinfo, node)) /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector, and then accumulate that, but at the moment the loop-carried dependency includes all copies. */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index a4e96cf..d3e25a5 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -947,6 +947,8 @@ _loop_vec_info::~_loop_vec_info () delete scan_map; delete scalar_costs; delete vector_costs; + for (auto reduc_info : reduc_infos) + delete reduc_info; /* When we release an epiloge vinfo that we do not intend to use avoid clearing AUX of the main loop which should continue to @@ -4985,9 +4987,9 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, static void vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, gimple *seq) + vect_reduc_info reduc_info, gimple *seq) { - if (reduc_info->reused_accumulator) + if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)) { /* When reusing an accumulator from the main loop, we only need initialization instructions if the main loop can be skipped. @@ -5015,13 +5017,13 @@ vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, static void get_initial_defs_for_reduction (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, + vect_reduc_info reduc_info, tree vector_type, vec<tree> *vec_oprnds, unsigned int number_of_vectors, unsigned int group_size, tree neutral_op) { - vec<tree> &initial_values = reduc_info->reduc_initial_values; + vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info); unsigned HOST_WIDE_INT nunits; unsigned j, number_of_places_left_in_vector; unsigned int i; @@ -5127,30 +5129,12 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo, vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq); } -/* For a statement STMT_INFO taking part in a reduction operation return - the stmt_vec_info the meta information is stored on. */ - -stmt_vec_info -info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) +vect_reduc_info +info_for_reduction (loop_vec_info loop_vinfo, slp_tree node) { - stmt_info = vect_orig_stmt (stmt_info); - gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info)); - if (!is_a <gphi *> (stmt_info->stmt) - || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - gphi *phi = as_a <gphi *> (stmt_info->stmt); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) - { - if (gimple_phi_num_args (phi) == 1) - stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - } - else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) - { - stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi)); - if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def) - stmt_info = info; - } - return stmt_info; + if (node->cycle_info.id == -1) + return NULL; + return loop_vinfo->reduc_infos[node->cycle_info.id]; } /* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that @@ -5159,16 +5143,16 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) static bool vect_find_reusable_accumulator (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, tree vectype) + vect_reduc_info reduc_info, tree vectype) { loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); if (!main_loop_vinfo) return false; - if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION) + if (VECT_REDUC_INFO_TYPE (reduc_info) != TREE_CODE_REDUCTION) return false; - unsigned int num_phis = reduc_info->reduc_initial_values.length (); + unsigned int num_phis = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).length (); auto_vec<tree, 16> main_loop_results (num_phis); auto_vec<tree, 16> initial_values (num_phis); if (edge main_loop_edge = loop_vinfo->main_loop_edge) @@ -5176,7 +5160,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, /* The epilogue loop can be entered either from the main loop or from an earlier guard block. */ edge skip_edge = loop_vinfo->skip_main_loop_edge; - for (tree incoming_value : reduc_info->reduc_initial_values) + for (tree incoming_value : VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)) { /* Look for: @@ -5196,15 +5180,15 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, } else /* The main loop dominates the epilogue loop. */ - main_loop_results.splice (reduc_info->reduc_initial_values); + main_loop_results.splice (VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)); /* See if the main loop has the kind of accumulator we need. */ vect_reusable_accumulator *accumulator = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]); if (!accumulator - || num_phis != accumulator->reduc_info->reduc_scalar_results.length () + || num_phis != VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).length () || !std::equal (main_loop_results.begin (), main_loop_results.end (), - accumulator->reduc_info->reduc_scalar_results.begin ())) + VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).begin ())) return false; /* Handle the case where we can reduce wider vectors to narrower ones. */ @@ -5222,7 +5206,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, tree intermediate_vectype = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits); if (!intermediate_vectype - || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), + || !directly_supported_p (VECT_REDUC_INFO_CODE (reduc_info), intermediate_vectype) || !can_vec_extract (TYPE_MODE (prev_vectype), TYPE_MODE (intermediate_vectype))) @@ -5241,7 +5225,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, to select the correct adjustment, but in practice that shouldn't be necessary.) */ tree main_adjustment - = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info); + = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (accumulator->reduc_info); if (loop_vinfo->main_loop_edge && main_adjustment) { gcc_assert (num_phis == 1); @@ -5250,14 +5234,14 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, initialize the accumulator with a neutral value instead. */ if (!operand_equal_p (initial_value, main_adjustment)) return false; - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value), code, initial_value); } - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment; - reduc_info->reduc_initial_values.truncate (0); - reduc_info->reduc_initial_values.splice (initial_values); - reduc_info->reused_accumulator = accumulator; + VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment; + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).truncate (0); + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).splice (initial_values); + VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) = accumulator; return true; } @@ -5410,8 +5394,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, slp_instance slp_node_instance, edge loop_exit) { - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); /* For double reductions we need to get at the inner loop reduction stmt which has the meta info attached. Our stmt_info is that of the loop-closed PHI of the inner loop which we remember as @@ -5424,8 +5407,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, (stmt_info->stmt, 0)); stmt_info = vect_stmt_to_vectorize (stmt_info); } - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); tree vectype; machine_mode mode; class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; @@ -5445,7 +5428,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gimple *use_stmt; auto_vec<tree> reduc_inputs; int j, i; - vec<tree> &scalar_results = reduc_info->reduc_scalar_results; + vec<tree> &scalar_results = VECT_REDUC_INFO_SCALAR_RESULTS (reduc_info); unsigned int k; /* SLP reduction without reduction chain, e.g., # a1 = phi <a2, a0> @@ -5465,7 +5448,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (double_reduc); } - vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); + vectype = VECT_REDUC_INFO_VECTYPE (reduc_info); gcc_assert (vectype); mode = TYPE_MODE (vectype); @@ -5473,12 +5456,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree adjustment_def = NULL; /* Optimize: for induction condition reduction, if we can't use zero for induc_val, use initial_def. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) - induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); + if (VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info); else if (double_reduc) ; else - adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info); + adjustment_def = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info); stmt_vec_info single_live_out_stmt[] = { stmt_info }; array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt; @@ -5499,7 +5482,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, PR92772: This algorithm is broken for architectures that support masked vectors, but do not provide fold_extract_last. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) + if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION) { auto_vec<std::pair<tree, bool>, 2> ccompares; slp_tree cond_node = slp_node_instance->root; @@ -5730,7 +5713,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, the minor(?) benefit of making the epilogue loop's scalar result independent of the main loop's scalar result. */ bool unify_with_main_loop_p = false; - if (reduc_info->reused_accumulator + if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) && loop_vinfo->skip_this_loop_edge && single_succ_p (exit_bb) && single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest) @@ -5742,7 +5725,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block); add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb), UNKNOWN_LOCATION); - add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input, + add_phi_arg (new_phi, + VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)->reduc_input, loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION); exit_gsi = gsi_after_labels (reduc_block); } @@ -5750,7 +5734,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Shouldn't be used beyond this point. */ exit_bb = nullptr; - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION + if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION && reduc_fn != IFN_LAST) { /* For condition reductions, we have a vector (REDUC_INPUTS 0) containing @@ -5856,7 +5840,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); scalar_results.safe_push (new_temp); } - else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION + else if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION && reduc_fn == IFN_LAST) { /* Condition reduction without supported IFN_REDUC_MAX. Generate @@ -5949,7 +5933,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, new_temp = gimple_convert (&stmts, scalar_type, new_temp); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) && induc_val) { /* Earlier we set the initial value to be a vector if induc_val @@ -5960,7 +5944,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, new_temp, induc_val); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - tree initial_def = reduc_info->reduc_initial_values[0]; + tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0]; tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, initial_def, new_temp); @@ -6011,7 +5995,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, for MIN and MAX reduction, for example. */ if (!neutral_op) { - tree scalar_value = reduc_info->reduc_initial_values[i]; + tree scalar_value + = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[i]; scalar_value = gimple_convert (&seq, TREE_TYPE (vectype), scalar_value); vector_identity = gimple_build_vector_from_val (&seq, vectype, @@ -6236,7 +6221,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } - if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) && induc_val) { /* Earlier we set the initial value to be a vector if induc_val @@ -6247,7 +6232,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, scalar_results[0], induc_val); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - tree initial_def = reduc_info->reduc_initial_values[0]; + tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0]; tree tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, initial_def, scalar_results[0]); @@ -6290,7 +6275,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } /* Record this operation if it could be reused by the epilogue loop. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION + if (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION && reduc_inputs.length () == 1) loop_vinfo->reusable_accumulators.put (scalar_results[0], { orig_reduc_input, reduc_info }); @@ -6789,13 +6774,13 @@ build_vect_cond_expr (code_helper code, tree vop[3], tree mask, static void vect_reduction_update_partial_vector_usage (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, + vect_reduc_info reduc_info, slp_tree slp_node, code_helper code, tree type, tree vectype_in) { - enum vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (reduc_info); - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + enum vect_reduction_type reduc_type = VECT_REDUC_INFO_TYPE (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); internal_fn cond_fn = get_conditional_internal_fn (code, type); if (reduc_type != FOLD_LEFT_REDUCTION @@ -6895,12 +6880,12 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, || STMT_VINFO_REDUC_IDX (stmt_info) < 0) return false; - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); /* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not recoginized. */ gcc_assert (!nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)); - gcc_assert (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION); + gcc_assert (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION); for (int i = 0; i < (int) gimple_num_ops (stmt) - 1; i++) { @@ -7058,9 +7043,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle) return false; - /* The stmt we store reduction analysis meta on. */ - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - reduc_info->is_reduc_info = true; + /* The reduction meta. */ + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) { @@ -7136,7 +7120,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, slp_tree vdef_slp = slp_node_instance->root; /* For double-reductions we start SLP analysis at the inner loop LC PHI which is the def of the outer loop live stmt. */ - if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def) + if (VECT_REDUC_INFO_DEF_TYPE (reduc_info) == vect_double_reduction_def) vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0]; while (reduc_def != PHI_RESULT (reduc_def_phi)) { @@ -7283,7 +7267,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, which is defined by the loop-header-phi. */ tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info); - STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; + VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out; gimple_match_op op; if (!gimple_extract_op (stmt_info->stmt, &op)) @@ -7375,8 +7359,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } } - enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info); - STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type; + enum vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); /* If we have a condition reduction, see if we can simplify it further. */ if (reduction_type == COND_REDUCTION) { @@ -7403,7 +7386,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "optimizing condition reduction with" " FOLD_EXTRACT_LAST.\n"); - STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION; } else if (cond_reduc_dt == vect_induction_def) { @@ -7447,10 +7430,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_NOTE, vect_location, "condition expression based on " "integer induction.\n"); - STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code; - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) + VECT_REDUC_INFO_CODE (reduc_info) = cond_reduc_op_code; + VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info) = cond_reduc_val; - STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION; } } else if (cond_reduc_dt == vect_constant_def) @@ -7471,9 +7454,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "condition expression based on " "compile time constant.\n"); /* Record reduction code at analysis stage. */ - STMT_VINFO_REDUC_CODE (reduc_info) + VECT_REDUC_INFO_CODE (reduc_info) = integer_onep (e) ? MAX_EXPR : MIN_EXPR; - STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = CONST_COND_REDUCTION; } } } @@ -7490,7 +7473,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (nested_cycle) { - gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) + gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info) == vect_double_reduction_def); double_reduc = true; } @@ -7530,7 +7513,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, (and also the same tree-code) when generating the epilog code and when generating the code inside the loop. */ - code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info); + code_helper orig_code = VECT_REDUC_INFO_CODE (reduc_info); /* If conversion might have created a conditional operation like IFN_COND_ADD already. Use the internal code for the following checks. */ @@ -7540,9 +7523,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, orig_code = new_code != ERROR_MARK ? new_code : orig_code; } - STMT_VINFO_REDUC_CODE (reduc_info) = orig_code; + VECT_REDUC_INFO_CODE (reduc_info) = orig_code; - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); + reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); if (reduction_type == TREE_CODE_REDUCTION) { /* Check whether it's ok to change the order of the computation. @@ -7582,7 +7565,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "supported.\n"); return false; } - STMT_VINFO_REDUC_TYPE (reduc_info) + VECT_REDUC_INFO_TYPE (reduc_info) = reduction_type = FOLD_LEFT_REDUCTION; } else if (!commutative_binary_op_p (orig_code, op.type) @@ -7653,7 +7636,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, OPTIMIZE_FOR_SPEED)) reduc_fn = IFN_REDUC_MAX; } - STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn; + VECT_REDUC_INFO_FN (reduc_info) = reduc_fn; if (reduction_type != EXTRACT_LAST_REDUCTION && (!nested_cycle || double_reduc) @@ -7866,7 +7849,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_NOTE, vect_location, "using single def-use cycle for reduction by reducing " "multiple vectors to one in the loop body\n"); - STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle; + VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle; /* For lane-reducing operation, the below processing related to single defuse-cycle will be done in its own vectorizable function. One more @@ -8007,13 +7990,13 @@ vect_transform_reduction (loop_vec_info loop_vinfo, class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); unsigned vec_num; - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); if (nested_in_vect_loop_p (loop, stmt_info)) { loop = loop->inner; - gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def); + gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info) + == vect_double_reduction_def); } gimple_match_op op; @@ -8060,10 +8043,10 @@ vect_transform_reduction (loop_vec_info loop_vinfo, bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); - vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); + vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); if (reduction_type == FOLD_LEFT_REDUCTION) { - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); gcc_assert (code.is_tree_code () || cond_fn_p); return vectorize_fold_left_reduction (loop_vinfo, stmt_info, gsi, slp_node, @@ -8071,7 +8054,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, reduc_index, masks, lens); } - bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info); + bool single_defuse_cycle = VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info); bool lane_reducing = lane_reducing_op_p (code); gcc_assert (single_defuse_cycle || lane_reducing); @@ -8214,11 +8197,11 @@ vect_transform_reduction (loop_vec_info loop_vinfo, { /* Find suitable def-use cycles to generate vectorized statements into, and reorder operands based on the selection. */ - unsigned curr_pos = reduc_info->reduc_result_pos; + unsigned curr_pos = VECT_REDUC_INFO_RESULT_POS (reduc_info); unsigned next_pos = (curr_pos + effec_ncopies) % effec_reduc_ncopies; gcc_assert (curr_pos < effec_reduc_ncopies); - reduc_info->reduc_result_pos = next_pos; + VECT_REDUC_INFO_RESULT_POS (reduc_info) = next_pos; if (curr_pos) { @@ -8359,11 +8342,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, nested_cycle = true; } - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); - - if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION - || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION) + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); + if (reduc_info + && (VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION + || VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)) /* Leave the scalar phi in place. */ return true; @@ -8371,7 +8353,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Check whether we should use a single PHI node and accumulate vectors to one before the backedge. */ - if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info)) + if (reduc_info && VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info)) vec_num = 1; /* Create the destination vector */ @@ -8386,23 +8368,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Optimize: if initial_def is for REDUC_MAX smaller than the base and we can't use zero for induc_val, use initial_def. Similarly for REDUC_MIN and initial_def larger than the base. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if (reduc_info + && VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) { gcc_assert (SLP_TREE_LANES (slp_node) == 1); tree initial_def = vect_phi_initial_value (phi); - reduc_info->reduc_initial_values.safe_push (initial_def); - tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).safe_push (initial_def); + tree induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info); if (TREE_CODE (initial_def) == INTEGER_CST && !integer_zerop (induc_val) - && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR + && ((VECT_REDUC_INFO_CODE (reduc_info) == MAX_EXPR && tree_int_cst_lt (initial_def, induc_val)) - || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR + || (VECT_REDUC_INFO_CODE (reduc_info) == MIN_EXPR && tree_int_cst_lt (induc_val, initial_def)))) { induc_val = initial_def; /* Communicate we used the initial_def to epilouge generation. */ - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; + VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; } vec_initial_defs.quick_push (build_vector_from_val (vectype_out, induc_val)); @@ -8416,7 +8399,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, else { gcc_assert (slp_node == slp_node_instance->reduc_phis); - vec<tree> &initial_values = reduc_info->reduc_initial_values; + vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info); vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node); unsigned int num_phis = stmts.length (); @@ -8434,7 +8417,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, { tree initial_value = (num_phis == 1 ? initial_values[0] : NULL_TREE); - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), code, initial_value); @@ -8444,11 +8427,11 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, requires to keep the initial value live across the loop. */ if (neutral_op && initial_values.length () == 1 - && !reduc_info->reused_accumulator + && !VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def && !operand_equal_p (neutral_op, initial_values[0])) { - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) + VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) = initial_values[0]; initial_values[0] = neutral_op; } @@ -8464,7 +8447,8 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, vec_initial_defs.quick_push (vec_initial_def); } - if (auto *accumulator = reduc_info->reused_accumulator) + if (reduc_info) + if (auto *accumulator = VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)) { tree def = accumulator->reduc_input; if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) @@ -8487,7 +8471,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, TYPE_VECTOR_SUBPARTS (vectype_out)); def = vect_create_partial_epilog (def, rvectype, - STMT_VINFO_REDUC_CODE + VECT_REDUC_INFO_CODE (reduc_info), &stmts); } @@ -10258,10 +10242,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, if (SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_group && slp_index != 0) return true; - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); - if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION - || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); + if (VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION + || VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) return true; if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 86508e2..5236eac 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -126,6 +126,8 @@ _slp_tree::_slp_tree () this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; + this->cycle_info.id = -1; + this->cycle_info.reduc_idx = -1; SLP_TREE_REF_COUNT (this) = 1; this->failed = NULL; this->max_nunits = 1; @@ -2735,6 +2737,7 @@ out: stmt_info = stmts[0]; + int reduc_idx = -1; int gs_scale = 0; tree gs_base = NULL_TREE; @@ -2826,6 +2829,33 @@ out: continue; } + /* See which SLP operand a reduction chain continues on. We want + to chain even PHIs but not backedges. */ + if (VECTORIZABLE_CYCLE_DEF (oprnd_info->first_dt) + || STMT_VINFO_REDUC_IDX (oprnd_info->def_stmts[0]) != -1) + { + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) + { + if (oprnd_info->first_dt == vect_double_reduction_def) + reduc_idx = i; + } + else if (is_a <gphi *> (stmt_info->stmt) + && gimple_phi_num_args + (as_a <gphi *> (stmt_info->stmt)) != 1) + ; + else if (STMT_VINFO_REDUC_IDX (stmt_info) == -1 + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) + ; + else if (reduc_idx == -1) + reduc_idx = i; + else + /* For .COND_* reduction operations the else value can be the + same as one of the operation operands. The other def + stmts have been moved, so we can't check easily. Check + it's a call at least. */ + gcc_assert (is_a <gcall *> (stmt_info->stmt)); + } + /* When we have a masked load with uniform mask discover this as a single-lane mask with a splat permute. This way we can recognize this as a masked load-lane by stripping the splat. */ @@ -3157,6 +3187,41 @@ fail: SLP_TREE_CHILDREN (node).splice (children); SLP_TREE_GS_SCALE (node) = gs_scale; SLP_TREE_GS_BASE (node) = gs_base; + if (reduc_idx != -1) + { + gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) != -1 + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def); + SLP_TREE_REDUC_IDX (node) = reduc_idx; + node->cycle_info.id = SLP_TREE_CHILDREN (node)[reduc_idx]->cycle_info.id; + } + /* When reaching the reduction PHI, create a vect_reduc_info. */ + else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) + && is_a <gphi *> (STMT_VINFO_STMT (stmt_info))) + { + loop_vec_info loop_vinfo = as_a <loop_vec_info> (vinfo); + gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) == -1); + node->cycle_info.id = loop_vinfo->reduc_infos.length (); + vect_reduc_info reduc_info = new vect_reduc_info_s (); + loop_vinfo->reduc_infos.safe_push (reduc_info); + stmt_vec_info reduc_phi = stmt_info; + /* ??? For double reductions vect_is_simple_reduction stores the + reduction type and code on the inner loop header PHI. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) + { + use_operand_p use_p; + gimple *use_stmt; + bool res = single_imm_use (gimple_phi_result (stmt_info->stmt), + &use_p, &use_stmt); + gcc_assert (res); + reduc_phi = loop_vinfo->lookup_stmt (use_stmt); + } + VECT_REDUC_INFO_DEF_TYPE (reduc_info) = STMT_VINFO_DEF_TYPE (stmt_info); + VECT_REDUC_INFO_TYPE (reduc_info) = STMT_VINFO_REDUC_TYPE (reduc_phi); + VECT_REDUC_INFO_CODE (reduc_info) = STMT_VINFO_REDUC_CODE (reduc_phi); + VECT_REDUC_INFO_FN (reduc_info) = IFN_LAST; + } return node; } @@ -3185,8 +3250,12 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, SLP_TREE_REF_COUNT (node)); if (SLP_TREE_VECTYPE (node)) dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node)); - dump_printf (metadata, "%s\n", + dump_printf (metadata, "%s", node->avoid_stlf_fail ? " (avoid-stlf-fail)" : ""); + if (node->cycle_info.id != -1 || node->cycle_info.reduc_idx != -1) + dump_printf (metadata, " cycle %d, link %d", node->cycle_info.id, + node->cycle_info.reduc_idx); + dump_printf (metadata, "\n"); if (SLP_TREE_DEF_TYPE (node) == vect_internal_def) { if (SLP_TREE_PERMUTE_P (node)) @@ -4241,6 +4310,8 @@ vect_analyze_slp_reduc_chain (vec_info *vinfo, TREE_TYPE (gimple_assign_lhs (scalar_def)), group_size); + SLP_TREE_REDUC_IDX (conv) = 0; + conv->cycle_info.id = node->cycle_info.id; SLP_TREE_CHILDREN (conv).quick_push (node); SLP_INSTANCE_TREE (new_instance) = conv; /* We also have to fake this conversion stmt as SLP reduction @@ -6719,10 +6790,12 @@ vect_optimize_slp_pass::start_choosing_layouts () { stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (instance)); - stmt_vec_info reduc_info = info_for_reduction (m_vinfo, stmt_info); + vect_reduc_info reduc_info + = info_for_reduction (as_a <loop_vec_info> (m_vinfo), + SLP_INSTANCE_TREE (instance)); if (needs_fold_left_reduction_p (TREE_TYPE (gimple_get_lhs (stmt_info->stmt)), - STMT_VINFO_REDUC_CODE (reduc_info))) + VECT_REDUC_INFO_CODE (reduc_info))) { unsigned int node_i = SLP_INSTANCE_TREE (instance)->vertex; m_partitions[m_vertices[node_i].partition].layout = 0; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 1545fab..77a03ed 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11560,20 +11560,24 @@ vectorizable_condition (vec_info *vinfo, if (code != COND_EXPR) return false; - stmt_vec_info reduc_info = NULL; - int reduc_index = -1; + int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); vect_reduction_type reduction_type = TREE_CODE_REDUCTION; - bool for_reduction - = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL; + bool nested_cycle_p = false; + bool for_reduction = vect_is_reduction (stmt_info); if (for_reduction) { if (SLP_TREE_LANES (slp_node) > 1) return false; - reduc_info = info_for_reduction (vinfo, stmt_info); - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); - reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); - gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION - || reduc_index != -1); + /* ??? With a reduction path we do not get at the reduction info from + every stmt, use the conservative default setting then. */ + if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))) + { + vect_reduc_info reduc_info + = info_for_reduction (loop_vinfo, slp_node); + reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); + nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), + stmt_info); + } } else { @@ -11763,7 +11767,7 @@ vectorizable_condition (vec_info *vinfo, vec_num, vectype, NULL); } /* Extra inactive lanes should be safe for vect_nested_cycle. */ - else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle) + else if (!nested_cycle_p) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc index 50985a6..d7dc30b 100644 --- a/gcc/tree-vectorizer.cc +++ b/gcc/tree-vectorizer.cc @@ -719,12 +719,10 @@ vec_info::new_stmt_vec_info (gimple *stmt) STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION; STMT_VINFO_REDUC_CODE (res) = ERROR_MARK; - STMT_VINFO_REDUC_FN (res) = IFN_LAST; STMT_VINFO_REDUC_IDX (res) = -1; + STMT_VINFO_REDUC_DEF (res) = NULL; STMT_VINFO_SLP_VECT_ONLY (res) = false; STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false; - res->reduc_initial_values = vNULL; - res->reduc_scalar_results = vNULL; if (is_a <loop_vec_info> (this) && gimple_code (stmt) == GIMPLE_PHI @@ -786,8 +784,6 @@ vec_info::free_stmt_vec_info (stmt_vec_info stmt_info) release_ssa_name (lhs); } - stmt_info->reduc_initial_values.release (); - stmt_info->reduc_scalar_results.release (); free (stmt_info); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index ad7500e..260cb2d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -310,6 +310,13 @@ struct _slp_tree { code generation. */ stmt_vec_info representative; + struct { + /* SLP cycle the node resides in, or -1. */ + int id; + /* The SLP operand index with the edge on the SLP cycle, or -1. */ + int reduc_idx; + } cycle_info; + /* Load permutation relative to the stores, NULL if there is no permutation. */ load_permutation_t load_permutation; @@ -446,6 +453,7 @@ public: #define SLP_TREE_TYPE(S) (S)->type #define SLP_TREE_GS_SCALE(S) (S)->gs_scale #define SLP_TREE_GS_BASE(S) (S)->gs_base +#define SLP_TREE_REDUC_IDX(S) (S)->cycle_info.reduc_idx #define SLP_TREE_PERMUTE_P(S) ((S)->code == VEC_PERM_EXPR) inline vect_memory_access_type @@ -814,6 +822,73 @@ typedef auto_vec<rgroup_controls> vec_loop_lens; typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec; +/* Abstraction around info on reductions which is still in stmt_vec_info + but will be duplicated or moved elsewhere. */ +class vect_reduc_info_s +{ +public: + /* The def type of the main reduction PHI, vect_reduction_def or + vect_double_reduction_def. */ + enum vect_def_type def_type; + + /* The reduction type as detected by + vect_is_simple_reduction and vectorizable_reduction. */ + enum vect_reduction_type reduc_type; + + /* The original scalar reduction code, to be used in the epilogue. */ + code_helper reduc_code; + + /* A vector internal function we should use in the epilogue. */ + internal_fn reduc_fn; + + /* For loop reduction with multiple vectorized results (ncopies > 1), a + lane-reducing operation participating in it may not use all of those + results, this field specifies result index starting from which any + following land-reducing operation would be assigned to. */ + unsigned int reduc_result_pos; + + /* Whether we force a single cycle PHI during reduction vectorization. */ + bool force_single_cycle; + + /* The vector type for performing the actual reduction operation. */ + tree reduc_vectype; + + /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ + tree induc_cond_initial_val; + + /* If not NULL the value to be added to compute final reduction value. */ + tree reduc_epilogue_adjustment; + + /* If non-null, the reduction is being performed by an epilogue loop + and we have decided to reuse this accumulator from the main loop. */ + struct vect_reusable_accumulator *reused_accumulator; + + /* If the vector code is performing N scalar reductions in parallel, + this variable gives the initial scalar values of those N reductions. */ + auto_vec<tree> reduc_initial_values; + + /* If the vector code is performing N scalar reductions in parallel, this + variable gives the vectorized code's final (scalar) result for each of + those N reductions. In other words, REDUC_SCALAR_RESULTS[I] replaces + the original scalar code's loop-closed SSA PHI for reduction number I. */ + auto_vec<tree> reduc_scalar_results; +}; + +typedef class vect_reduc_info_s *vect_reduc_info; + +#define VECT_REDUC_INFO_DEF_TYPE(I) ((I)->def_type) +#define VECT_REDUC_INFO_TYPE(I) ((I)->reduc_type) +#define VECT_REDUC_INFO_CODE(I) ((I)->reduc_code) +#define VECT_REDUC_INFO_FN(I) ((I)->reduc_fn) +#define VECT_REDUC_INFO_SCALAR_RESULTS(I) ((I)->reduc_scalar_results) +#define VECT_REDUC_INFO_INITIAL_VALUES(I) ((I)->reduc_initial_values) +#define VECT_REDUC_INFO_REUSED_ACCUMULATOR(I) ((I)->reused_accumulator) +#define VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL(I) ((I)->induc_cond_initial_val) +#define VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT(I) ((I)->reduc_epilogue_adjustment) +#define VECT_REDUC_INFO_VECTYPE(I) ((I)->reduc_vectype) +#define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle) +#define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos) + /* Information about a reduction accumulator from the main loop that could conceivably be reused as the input to a reduction in an epilogue loop. */ struct vect_reusable_accumulator { @@ -823,7 +898,7 @@ struct vect_reusable_accumulator { /* The stmt_vec_info that describes the reduction (i.e. the one for which is_reduc_info is true). */ - stmt_vec_info reduc_info; + vect_reduc_info reduc_info; }; /*-----------------------------------------------------------------*/ @@ -879,6 +954,10 @@ public: the main loop, this edge is the one that skips the epilogue. */ edge skip_this_loop_edge; + /* Reduction descriptors of this loop. Referenced to from SLP nodes + by index. */ + auto_vec<vect_reduc_info> reduc_infos; + /* The vectorized form of a standard reduction replaces the original scalar code's final result (a loop-closed SSA PHI) with the result of a vector-to-scalar reduction operation. After vectorization, @@ -1494,62 +1573,22 @@ public: /* For both loads and stores. */ unsigned simd_lane_access_p : 3; - /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ - tree induc_cond_initial_val; - - /* If not NULL the value to be added to compute final reduction value. */ - tree reduc_epilogue_adjustment; - /* On a reduction PHI the reduction type as detected by - vect_is_simple_reduction and vectorizable_reduction. */ + vect_is_simple_reduction. */ enum vect_reduction_type reduc_type; - /* The original reduction code, to be used in the epilogue. */ + /* On a reduction PHI, the original reduction code as detected by + vect_is_simple_reduction. */ code_helper reduc_code; - /* An internal function we should use in the epilogue. */ - internal_fn reduc_fn; - /* On a stmt participating in the reduction the index of the operand + /* On a stmt participating in a reduction the index of the operand on the reduction SSA cycle. */ int reduc_idx; - /* On a reduction PHI the def returned by vect_force_simple_reduction. - On the def returned by vect_force_simple_reduction the - corresponding PHI. */ + /* On a reduction PHI the def returned by vect_is_simple_reduction. + On the def returned by vect_is_simple_reduction the corresponding PHI. */ stmt_vec_info reduc_def; - /* The vector type for performing the actual reduction. */ - tree reduc_vectype; - - /* For loop reduction with multiple vectorized results (ncopies > 1), a - lane-reducing operation participating in it may not use all of those - results, this field specifies result index starting from which any - following land-reducing operation would be assigned to. */ - unsigned int reduc_result_pos; - - /* If IS_REDUC_INFO is true and if the vector code is performing - N scalar reductions in parallel, this variable gives the initial - scalar values of those N reductions. */ - vec<tree> reduc_initial_values; - - /* If IS_REDUC_INFO is true and if the vector code is performing - N scalar reductions in parallel, this variable gives the vectorized code's - final (scalar) result for each of those N reductions. In other words, - REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed - SSA PHI for reduction number I. */ - vec<tree> reduc_scalar_results; - - /* Only meaningful if IS_REDUC_INFO. If non-null, the reduction is - being performed by an epilogue loop and we have decided to reuse - this accumulator from the main loop. */ - vect_reusable_accumulator *reused_accumulator; - - /* Whether we force a single cycle PHI during reduction vectorization. */ - bool force_single_cycle; - - /* Whether on this stmt reduction meta is recorded. */ - bool is_reduc_info; - /* If nonzero, the lhs of the statement could be truncated to this many bits without affecting any users of the result. */ unsigned int min_output_precision; @@ -1634,10 +1673,7 @@ struct gather_scatter_info { #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p #define STMT_VINFO_STRIDED_P(S) (S)->strided_p #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p -#define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val -#define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx -#define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address @@ -1667,12 +1703,10 @@ struct gather_scatter_info { #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code -#define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def -#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype -#define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p +#define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in #define DR_GROUP_FIRST_ELEMENT(S) \ (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) @@ -2656,7 +2690,7 @@ extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, unsigned int, tree, tree, unsigned int, unsigned int); extern gimple_seq vect_gen_len (tree, tree, tree, tree); -extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); +extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree); extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); /* Drive for loop transformation stage. */ @@ -2872,7 +2906,14 @@ vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info) inline bool vect_is_reduction (stmt_vec_info stmt_info) { - return STMT_VINFO_REDUC_IDX (stmt_info) >= 0; + return STMT_VINFO_REDUC_IDX (stmt_info) != -1; +} + +/* Return true if SLP_NODE represents part of a reduction. */ +inline bool +vect_is_reduction (slp_tree slp_node) +{ + return SLP_TREE_REDUC_IDX (slp_node) != -1; } /* If STMT_INFO describes a reduction, return the vect_reduction_type @@ -2885,9 +2926,9 @@ vect_reduc_type (vec_info *vinfo, slp_tree node) stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); if (STMT_VINFO_REDUC_DEF (stmt_info)) { - stmt_vec_info reduc_info - = info_for_reduction (loop_vinfo, stmt_info); - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); + vect_reduc_info reduc_info + = info_for_reduction (loop_vinfo, node); + return int (VECT_REDUC_INFO_TYPE (reduc_info)); } } return -1; |