diff options
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 1635 |
1 files changed, 490 insertions, 1145 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2782d61..d623672 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "case-cfn-macros.h" #include "langhooks.h" +#include "opts.h" /* Loop Vectorization Pass. @@ -162,218 +163,6 @@ static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *, static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, bool *, bool *, bool); -/* Subroutine of vect_determine_vf_for_stmt that handles only one - statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE - may already be set for general statements (not just data refs). */ - -static opt_result -vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, - bool vectype_maybe_set_p, - poly_uint64 *vf) -{ - gimple *stmt = stmt_info->stmt; - - if ((!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) - || gimple_clobber_p (stmt)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); - return opt_result::success (); - } - - tree stmt_vectype, nunits_vectype; - opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info, - &stmt_vectype, - &nunits_vectype); - if (!res) - return res; - - if (stmt_vectype) - { - if (STMT_VINFO_VECTYPE (stmt_info)) - /* The only case when a vectype had been already set is for stmts - that contain a data ref, or for "pattern-stmts" (stmts generated - by the vectorizer to represent/replace a certain idiom). */ - gcc_assert ((STMT_VINFO_DATA_REF (stmt_info) - || vectype_maybe_set_p) - && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype); - else - STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; - } - - if (nunits_vectype) - vect_update_max_nunits (vf, nunits_vectype); - - return opt_result::success (); -} - -/* Subroutine of vect_determine_vectorization_factor. Set the vector - types of STMT_INFO and all attached pattern statements and update - the vectorization factor VF accordingly. Return true on success - or false if something prevented vectorization. */ - -static opt_result -vect_determine_vf_for_stmt (vec_info *vinfo, - stmt_vec_info stmt_info, poly_uint64 *vf) -{ - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", - stmt_info->stmt); - opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); - if (!res) - return res; - - if (STMT_VINFO_IN_PATTERN_P (stmt_info) - && STMT_VINFO_RELATED_STMT (stmt_info)) - { - gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); - stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); - - /* If a pattern statement has def stmts, analyze them too. */ - for (gimple_stmt_iterator si = gsi_start (pattern_def_seq); - !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern def stmt: %G", - def_stmt_info->stmt); - res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); - if (!res) - return res; - } - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern statement: %G", - stmt_info->stmt); - res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); - if (!res) - return res; - } - - return opt_result::success (); -} - -/* Function vect_determine_vectorization_factor - - Determine the vectorization factor (VF). VF is the number of data elements - that are operated upon in parallel in a single iteration of the vectorized - loop. For example, when vectorizing a loop that operates on 4byte elements, - on a target with vector size (VS) 16byte, the VF is set to 4, since 4 - elements can fit in a single vector register. - - We currently support vectorization of loops in which all types operated upon - are of the same size. Therefore this function currently sets VF according to - the size of the types operated upon, and fails if there are multiple sizes - in the loop. - - VF is also the factor by which the loop iterations are strip-mined, e.g.: - original loop: - for (i=0; i<N; i++){ - a[i] = b[i] + c[i]; - } - - vectorized loop: - for (i=0; i<N; i+=VF){ - a[i:VF] = b[i:VF] + c[i:VF]; - } -*/ - -static opt_result -vect_determine_vectorization_factor (loop_vec_info loop_vinfo) -{ - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned nbbs = loop->num_nodes; - poly_uint64 vectorization_factor = 1; - tree scalar_type = NULL_TREE; - gphi *phi; - tree vectype; - stmt_vec_info stmt_info; - unsigned i; - - DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); - - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) - { - phi = si.phi (); - stmt_info = loop_vinfo->lookup_stmt (phi); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", - (gimple *) phi); - - gcc_assert (stmt_info); - - if (STMT_VINFO_RELEVANT_P (stmt_info) - || STMT_VINFO_LIVE_P (stmt_info)) - { - gcc_assert (!STMT_VINFO_VECTYPE (stmt_info)); - scalar_type = TREE_TYPE (PHI_RESULT (phi)); - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for scalar type: %T\n", - scalar_type); - - vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); - if (!vectype) - return opt_result::failure_at (phi, - "not vectorized: unsupported " - "data-type %T\n", - scalar_type); - STMT_VINFO_VECTYPE (stmt_info) = vectype; - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", - vectype); - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); - dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); - dump_printf (MSG_NOTE, "\n"); - } - - vect_update_max_nunits (&vectorization_factor, vectype); - } - } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - if (is_gimple_debug (gsi_stmt (si))) - continue; - stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - opt_result res - = vect_determine_vf_for_stmt (loop_vinfo, - stmt_info, &vectorization_factor); - if (!res) - return res; - } - } - - /* TODO: Analyze cost. Decide if worth while to vectorize. */ - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); - dump_dec (MSG_NOTE, vectorization_factor); - dump_printf (MSG_NOTE, "\n"); - } - - if (known_le (vectorization_factor, 1U)) - return opt_result::failure_at (vect_location, - "not vectorized: unsupported data-type\n"); - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - return opt_result::success (); -} - /* Function vect_is_simple_iv_evolution. @@ -381,8 +170,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) considered a polynomial evolution. */ static bool -vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, - tree * step) +vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, + stmt_vec_info stmt_info) { tree init_expr; tree step_expr; @@ -406,8 +195,8 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", step_expr, init_expr); - *init = init_expr; - *step = step_expr; + STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = init_expr; + STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = step_expr; if (TREE_CODE (step_expr) != INTEGER_CST && (TREE_CODE (step_expr) != SSA_NAME @@ -438,7 +227,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, For neg induction, return a fake step as integer -1. */ static bool vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, - gphi* loop_phi_node, tree *init, tree *step) + gphi* loop_phi_node) { tree init_expr, ev_expr, result, op1, op2; gimple* def; @@ -453,7 +242,6 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr))) return false; - *init = init_expr; result = PHI_RESULT (loop_phi_node); if (TREE_CODE (ev_expr) != SSA_NAME @@ -462,12 +250,13 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, return false; enum tree_code t_code = gimple_assign_rhs_code (def); + tree step; switch (t_code) { case NEGATE_EXPR: if (gimple_assign_rhs1 (def) != result) return false; - *step = build_int_cst (TREE_TYPE (init_expr), -1); + step = build_int_cst (TREE_TYPE (init_expr), -1); STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_neg; break; @@ -479,7 +268,7 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, if (TREE_CODE (op2) != INTEGER_CST || op1 != result) return false; - *step = op2; + step = op2; if (t_code == LSHIFT_EXPR) STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shl; else if (t_code == RSHIFT_EXPR) @@ -493,8 +282,8 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info, return false; } - STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = *init; - STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = *step; + STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = init_expr; + STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = step; return true; } @@ -589,7 +378,6 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop, bool slp) { basic_block bb = loop->header; - tree init, step; auto_vec<stmt_vec_info, 64> worklist; gphi_iterator gsi; bool double_reduc, reduc_chain; @@ -619,28 +407,21 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop, /* Analyze the evolution function. */ access_fn = analyze_scalar_evolution (loop, def); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Access function of PHI: %T\n", access_fn); if (access_fn) - { - STRIP_NOPS (access_fn); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Access function of PHI: %T\n", access_fn); - STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) - = initial_condition_in_loop_num (access_fn, loop->num); - STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) - = evolution_part_in_loop_num (access_fn, loop->num); - } + STRIP_NOPS (access_fn); if ((!access_fn || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi) - || !vect_is_simple_iv_evolution (loop->num, access_fn, - &init, &step) + || !vect_is_simple_iv_evolution (loop->num, access_fn, stmt_vinfo) || (LOOP_VINFO_LOOP (loop_vinfo) != loop - && TREE_CODE (step) != INTEGER_CST)) + && (TREE_CODE (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)) + != INTEGER_CST))) /* Only handle nonlinear iv for same loop. */ && (LOOP_VINFO_LOOP (loop_vinfo) != loop - || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo, - phi, &init, &step))) + || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo, phi))) { worklist.safe_push (stmt_vinfo); continue; @@ -1057,6 +838,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) unaligned_dr (NULL), peeling_for_alignment (0), ptr_mask (0), + max_spec_read_amount (0), nonlinear_iv (false), ivexpr_map (NULL), scan_map (NULL), @@ -1165,6 +947,8 @@ _loop_vec_info::~_loop_vec_info () delete scan_map; delete scalar_costs; delete vector_costs; + for (auto reduc_info : reduc_infos) + delete reduc_info; /* When we release an epiloge vinfo that we do not intend to use avoid clearing AUX of the main loop which should continue to @@ -1939,6 +1723,17 @@ vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call, } } + if (!integer_onep (info->assumptions)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "Loop to be versioned with niter assumption "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, info->assumptions); + dump_printf (MSG_NOTE, "\n"); + } + } + return opt_result::success (); } @@ -1967,7 +1762,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, for (gcond *cond : info->conds) { stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond); - STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type; /* Mark the statement as a condition. */ STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def; } @@ -1984,9 +1778,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, if (info->inner_loop_cond) { - stmt_vec_info inner_loop_cond_info - = loop_vinfo->lookup_stmt (info->inner_loop_cond); - STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type; /* If we have an estimate on the number of iterations of the inner loop use that to limit the scale for costing, otherwise use --param vect-inner-loop-cost-factor literally. */ @@ -2001,234 +1792,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, -/* Scan the loop stmts and dependent on whether there are any (non-)SLP - statements update the vectorization factor. */ - -static void -vect_update_vf_for_slp (loop_vec_info loop_vinfo) -{ - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - poly_uint64 vectorization_factor; - int i; - - DUMP_VECT_SCOPE ("vect_update_vf_for_slp"); - - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - gcc_assert (known_ne (vectorization_factor, 0U)); - - /* If all the stmts in the loop can be SLPed, we perform only SLP, and - vectorization factor of the loop is the unrolling factor required by - the SLP instances. If that unrolling factor is 1, we say, that we - perform pure SLP on loop - cross iteration parallelism is not - exploited. */ - bool only_slp_in_loop = true; - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); - if (!stmt_info) - continue; - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - if (is_gimple_debug (gsi_stmt (si))) - continue; - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - stmt_info = vect_stmt_to_vectorize (stmt_info); - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - } - - if (only_slp_in_loop) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Loop contains only SLP stmts\n"); - vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); - } - else - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Loop contains SLP and non-SLP stmts\n"); - /* Both the vectorization factor and unroll factor have the form - GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, - so they must have a common multiple. */ - vectorization_factor - = force_common_multiple (vectorization_factor, - LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); - } - - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "Updating vectorization factor to "); - dump_dec (MSG_NOTE, vectorization_factor); - dump_printf (MSG_NOTE, ".\n"); - } -} - -/* Return true if STMT_INFO describes a double reduction phi and if - the other phi in the reduction is also relevant for vectorization. - This rejects cases such as: - - outer1: - x_1 = PHI <x_3(outer2), ...>; - ... - - inner: - x_2 = ...; - ... - - outer2: - x_3 = PHI <x_2(inner)>; - - if nothing in x_2 or elsewhere makes x_1 relevant. */ - -static bool -vect_active_double_reduction_p (stmt_vec_info stmt_info) -{ - if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) - return false; - - return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info)); -} - -/* Function vect_analyze_loop_operations. - - Scan the loop stmts and make sure they are all vectorizable. */ - -static opt_result -vect_analyze_loop_operations (loop_vec_info loop_vinfo) -{ - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - int i; - stmt_vec_info stmt_info; - - DUMP_VECT_SCOPE ("vect_analyze_loop_operations"); - - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gphi *phi = si.phi (); - - stmt_info = loop_vinfo->lookup_stmt (phi); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", - (gimple *) phi); - if (virtual_operand_p (gimple_phi_result (phi))) - continue; - - /* ??? All of the below unconditional FAILs should be in - done earlier after analyzing cycles, possibly when - determining stmt relevancy? */ - - /* Inner-loop loop-closed exit phi in outer-loop vectorization - (i.e., a phi in the tail of the outer-loop). */ - if (! is_loop_header_bb_p (bb)) - { - /* FORNOW: we currently don't support the case that these phis - are not used in the outerloop (unless it is double reduction, - i.e., this phi is vect_reduction_def), cause this case - requires to actually do something here. */ - if (STMT_VINFO_LIVE_P (stmt_info) - && !vect_active_double_reduction_p (stmt_info)) - return opt_result::failure_at (phi, - "Unsupported loop-closed phi" - " in outer-loop.\n"); - - /* If PHI is used in the outer loop, we check that its operand - is defined in the inner loop. */ - if (STMT_VINFO_RELEVANT_P (stmt_info)) - { - tree phi_op; - - if (gimple_phi_num_args (phi) != 1) - return opt_result::failure_at (phi, "unsupported phi"); - - phi_op = PHI_ARG_DEF (phi, 0); - stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); - if (!op_def_info) - return opt_result::failure_at (phi, "unsupported phi\n"); - - if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer - && (STMT_VINFO_RELEVANT (op_def_info) - != vect_used_in_outer_by_reduction)) - return opt_result::failure_at (phi, "unsupported phi\n"); - - if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def - || (STMT_VINFO_DEF_TYPE (stmt_info) - == vect_double_reduction_def)) - && ! PURE_SLP_STMT (stmt_info)) - return opt_result::failure_at (phi, "unsupported phi\n"); - } - - continue; - } - - gcc_assert (stmt_info); - - if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope - || STMT_VINFO_LIVE_P (stmt_info)) - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence) - /* A scalar-dependence cycle that we don't support. */ - return opt_result::failure_at (phi, - "not vectorized:" - " scalar dependence cycle.\n"); - - if (STMT_VINFO_RELEVANT_P (stmt_info) - && ! PURE_SLP_STMT (stmt_info)) - return opt_result::failure_at (phi, - "not vectorized: relevant phi not " - "supported: %G", - static_cast <gimple *> (phi)); - } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gimple *stmt = gsi_stmt (si); - if (!gimple_clobber_p (stmt) - && !is_gimple_debug (stmt)) - { - bool need_to_vectorize = false; - opt_result res - = vect_analyze_stmt (loop_vinfo, - loop_vinfo->lookup_stmt (stmt), - &need_to_vectorize, - NULL, NULL, NULL); - if (!res) - return res; - } - } - } /* bbs */ - - return opt_result::success (); -} - /* Return true if we know that the iteration count is smaller than the vectorization factor. Return false if it isn't, or if we can't be sure either way. */ @@ -2529,78 +2092,6 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, return opt_result::success (); } -/* Look for SLP-only access groups and turn each individual access into its own - group. */ -static void -vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) -{ - unsigned int i; - struct data_reference *dr; - - DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups"); - - vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); - FOR_EACH_VEC_ELT (datarefs, i, dr) - { - gcc_assert (DR_REF (dr)); - stmt_vec_info stmt_info - = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (DR_STMT (dr))); - - /* Check if the load is a part of an interleaving chain. */ - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - { - stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info); - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element); - unsigned int group_size = DR_GROUP_SIZE (first_element); - - /* Check if SLP-only groups. */ - if (!STMT_SLP_TYPE (stmt_info) - && STMT_VINFO_SLP_VECT_ONLY (first_element)) - { - /* Dissolve the group. */ - STMT_VINFO_SLP_VECT_ONLY (first_element) = false; - - stmt_vec_info vinfo = first_element; - while (vinfo) - { - stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo); - DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo; - DR_GROUP_NEXT_ELEMENT (vinfo) = NULL; - DR_GROUP_SIZE (vinfo) = 1; - if (STMT_VINFO_STRIDED_P (first_element) - /* We cannot handle stores with gaps. */ - || DR_IS_WRITE (dr_info->dr)) - { - STMT_VINFO_STRIDED_P (vinfo) = true; - DR_GROUP_GAP (vinfo) = 0; - } - else - DR_GROUP_GAP (vinfo) = group_size - 1; - /* Duplicate and adjust alignment info, it needs to - be present on each group leader, see dr_misalignment. */ - if (vinfo != first_element) - { - dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo); - dr_info2->target_alignment = dr_info->target_alignment; - int misalignment = dr_info->misalignment; - if (misalignment != DR_MISALIGNMENT_UNKNOWN) - { - HOST_WIDE_INT diff - = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr)) - - TREE_INT_CST_LOW (DR_INIT (dr_info->dr))); - unsigned HOST_WIDE_INT align_c - = dr_info->target_alignment.to_constant (); - misalignment = (misalignment + diff) % align_c; - } - dr_info2->misalignment = misalignment; - } - vinfo = next; - } - } - } - } -} - /* Determine if operating on full vectors for LOOP_VINFO might leave some scalar iterations still to do. If so, decide how we should handle those scalar iterations. The possibilities are: @@ -2710,18 +2201,17 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo) indicates if some analysis meets fatal error. If one non-NULL pointer SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one worked out suggested unroll factor, while one NULL pointer shows it's - going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF - is to hold the slp decision when the suggested unroll factor is worked - out. */ + going to apply the suggested unroll factor. + SINGLE_LANE_SLP_DONE_FOR_SUGGESTED_UF is to hold whether single-lane + slp was forced when the suggested unroll factor was worked out. */ static opt_result vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *suggested_unroll_factor, - unsigned& slp_done_for_suggested_uf) + bool& single_lane_slp_done_for_suggested_uf) { opt_result ok = opt_result::success (); int res; unsigned int max_vf = MAX_VECTORIZATION_FACTOR; - poly_uint64 min_vf = 2; loop_vec_info orig_loop_vinfo = NULL; /* If we are dealing with an epilogue then orig_loop_vinfo points to the @@ -2768,7 +2258,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, /* Analyze the data references and also adjust the minimal vectorization factor according to the loads and stores. */ - ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); + ok = vect_analyze_data_refs (loop_vinfo, &fatal); if (!ok) { if (dump_enabled_p ()) @@ -2781,14 +2271,14 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1; gcc_assert (!applying_suggested_uf || !suggested_unroll_factor); - /* If the slp decision is false when suggested unroll factor is worked - out, and we are applying suggested unroll factor, we can simply skip - all slp related analyses this time. */ - unsigned slp = !applying_suggested_uf ? 2 : slp_done_for_suggested_uf; + /* When single-lane SLP was forced and we are applying suggested unroll + factor, keep that decision here. */ + bool force_single_lane = (applying_suggested_uf + && single_lane_slp_done_for_suggested_uf); /* Classify all cross-iteration scalar data-flow cycles. Cross-iteration cycles caused by virtual phis are analyzed separately. */ - vect_analyze_scalar_cycles (loop_vinfo, slp == 2); + vect_analyze_scalar_cycles (loop_vinfo, !force_single_lane); vect_pattern_recog (loop_vinfo); @@ -2833,24 +2323,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, "bad data dependence.\n"); return ok; } - if (max_vf != MAX_VECTORIZATION_FACTOR - && maybe_lt (max_vf, min_vf)) - return opt_result::failure_at (vect_location, "bad data dependence.\n"); LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; - ok = vect_determine_vectorization_factor (loop_vinfo); - if (!ok) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't determine vectorization factor.\n"); - return ok; - } - /* Compute the scalar iteration cost. */ vect_compute_single_scalar_iteration_cost (loop_vinfo); - poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); bool saved_can_use_partial_vectors_p = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo); @@ -2861,26 +2338,33 @@ start_over: /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ ok = vect_analyze_slp (loop_vinfo, loop_vinfo->stmt_vec_infos.length (), - slp == 1); + force_single_lane); if (!ok) return ok; /* If there are any SLP instances mark them as pure_slp. */ - if (vect_make_slp_decision (loop_vinfo)) - { - /* Find stmts that need to be both vectorized and SLPed. */ - vect_detect_hybrid_slp (loop_vinfo); + if (!vect_make_slp_decision (loop_vinfo)) + return opt_result::failure_at (vect_location, "no stmts to vectorize.\n"); - /* Update the vectorization factor based on the SLP decision. */ - vect_update_vf_for_slp (loop_vinfo); - - /* Optimize the SLP graph with the vectorization factor fixed. */ - vect_optimize_slp (loop_vinfo); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "Loop contains only SLP stmts\n"); - /* Gather the loads reachable from the SLP graph entries. */ - vect_gather_slp_loads (loop_vinfo); + /* Determine the vectorization factor from the SLP decision. */ + LOOP_VINFO_VECT_FACTOR (loop_vinfo) + = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); + dump_dec (MSG_NOTE, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + dump_printf (MSG_NOTE, "\n"); } + /* Optimize the SLP graph with the vectorization factor fixed. */ + vect_optimize_slp (loop_vinfo); + + /* Gather the loads reachable from the SLP graph entries. */ + vect_gather_slp_loads (loop_vinfo); + /* We don't expect to have to roll back to anything other than an empty set of rgroups. */ gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()); @@ -2949,19 +2433,6 @@ start_over: goto again; } - /* Dissolve SLP-only groups. */ - vect_dissolve_slp_only_groups (loop_vinfo); - - /* Scan all the remaining operations in the loop that we did not catch - during SLP build and make sure we fail. */ - ok = vect_analyze_loop_operations (loop_vinfo); - if (!ok) - { - ok = opt_result::failure_at (vect_location, - "bad operation or unsupported loop bound\n"); - goto again; - } - /* For now, we don't expect to mix both masking and length approaches for one loop, disable it if both are recorded. */ if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) @@ -3202,7 +2673,7 @@ start_over: gcc_assert (known_eq (vectorization_factor, LOOP_VINFO_VECT_FACTOR (loop_vinfo))); - slp_done_for_suggested_uf = slp; + single_lane_slp_done_for_suggested_uf = force_single_lane; /* Ok to vectorize! */ LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; @@ -3213,7 +2684,7 @@ again: gcc_assert (!ok); /* Try again with single-lane SLP. */ - if (slp == 1) + if (force_single_lane) return ok; /* If we are applying suggested unroll factor, we don't need to @@ -3241,7 +2712,7 @@ again: continue; vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); unsigned int size = DR_GROUP_SIZE (vinfo); - tree vectype = STMT_VINFO_VECTYPE (vinfo); + tree vectype = SLP_TREE_VECTYPE (SLP_INSTANCE_TREE (instance)); if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U) && ! vect_grouped_store_supported (vectype, size)) @@ -3255,7 +2726,7 @@ again: vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); size = DR_GROUP_SIZE (vinfo); - vectype = STMT_VINFO_VECTYPE (vinfo); + vectype = SLP_TREE_VECTYPE (node); if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST && ! vect_grouped_load_supported (vectype, single_element_p, size)) @@ -3266,13 +2737,13 @@ again: } /* Roll back state appropriately. Force single-lane SLP this time. */ - slp = 1; + force_single_lane = true; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "re-trying with single-lane SLP\n"); - /* Restore vectorization factor as it were without SLP. */ - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; + /* Reset the vectorization factor. */ + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = 0; /* Free the SLP instances. */ FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) vect_free_slp_instance (instance); @@ -3285,7 +2756,7 @@ again: !gsi_end_p (si); gsi_next (&si)) { stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - STMT_SLP_TYPE (stmt_info) = loop_vect; + STMT_SLP_TYPE (stmt_info) = not_vect; if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) { @@ -3304,7 +2775,7 @@ again: if (is_gimple_debug (gsi_stmt (si))) continue; stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - STMT_SLP_TYPE (stmt_info) = loop_vect; + STMT_SLP_TYPE (stmt_info) = not_vect; if (STMT_VINFO_IN_PATTERN_P (stmt_info)) { stmt_vec_info pattern_stmt_info @@ -3313,11 +2784,11 @@ again: STMT_VINFO_IN_PATTERN_P (stmt_info) = false; gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); - STMT_SLP_TYPE (pattern_stmt_info) = loop_vect; + STMT_SLP_TYPE (pattern_stmt_info) = not_vect; for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); !gsi_end_p (pi); gsi_next (&pi)) STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi))) - = loop_vect; + = not_vect; } } } @@ -3400,8 +2871,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, } /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is - not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance - MODE_I to the next mode useful to analyze. + not NULL. When MASKED_P is not -1 override the default + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it. + Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next + mode useful to analyze. Return the loop_vinfo on success and wrapped null on failure. */ static opt_loop_vec_info @@ -3409,6 +2882,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *loop_form_info, loop_vec_info orig_loop_vinfo, const vector_modes &vector_modes, unsigned &mode_i, + int masked_p, machine_mode &autodetected_vector_mode, bool &fatal) { @@ -3417,13 +2891,15 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, machine_mode vector_mode = vector_modes[mode_i]; loop_vinfo->vector_mode = vector_mode; + if (masked_p != -1) + loop_vinfo->can_use_partial_vectors_p = masked_p; unsigned int suggested_unroll_factor = 1; - unsigned slp_done_for_suggested_uf = 0; + bool single_lane_slp_done_for_suggested_uf = false; /* Run the main analysis. */ opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &suggested_unroll_factor, - slp_done_for_suggested_uf); + single_lane_slp_done_for_suggested_uf); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "***** Analysis %s with vector mode %s\n", @@ -3455,16 +2931,17 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "***** Re-trying analysis for unrolling" - " with unroll factor %d and slp %s.\n", + " with unroll factor %d and %s slp.\n", suggested_unroll_factor, - slp_done_for_suggested_uf ? "on" : "off"); + single_lane_slp_done_for_suggested_uf + ? "single-lane" : ""); loop_vec_info unroll_vinfo = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); unroll_vinfo->vector_mode = vector_mode; unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, - slp_done_for_suggested_uf); + single_lane_slp_done_for_suggested_uf); if (new_res) { delete loop_vinfo; @@ -3600,7 +3077,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, cached_vf_per_mode[last_mode_i] = -1; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, - NULL, vector_modes, mode_i, + NULL, vector_modes, mode_i, -1, autodetected_vector_mode, fatal); if (fatal) break; @@ -3685,18 +3162,21 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, array may contain length-agnostic and length-specific modes. Their ordering is not guaranteed, so we could end up picking a mode for the main loop that is after the epilogue's optimal mode. */ + int masked_p = -1; if (!unlimited_cost_model (loop) - && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != VOIDmode) + && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p) + != VOIDmode)) { vector_modes[0] - = first_loop_vinfo->vector_costs->suggested_epilogue_mode (); + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p); cached_vf_per_mode[0] = 0; } else vector_modes[0] = autodetected_vector_mode; mode_i = 0; - bool supports_partial_vectors = param_vect_partial_vector_usage != 0; + bool supports_partial_vectors = (param_vect_partial_vector_usage != 0 + || masked_p == 1); machine_mode mask_mode; if (supports_partial_vectors && !partial_vectors_supported_p () @@ -3710,6 +3190,10 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, loop_vec_info orig_loop_vinfo = first_loop_vinfo; do { + /* Let the user override what the target suggests. */ + if (OPTION_SET_P (param_vect_partial_vector_usage)) + masked_p = -1; + while (1) { /* If the target does not support partial vectors we can shorten the @@ -3750,7 +3234,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, orig_loop_vinfo, - vector_modes, mode_i, + vector_modes, mode_i, masked_p, autodetected_vector_mode, fatal); if (fatal) break; @@ -3781,6 +3265,9 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, break; } + /* Revert back to the default from the suggested prefered + epilogue vectorization mode. */ + masked_p = -1; if (mode_i == vector_modes.length ()) break; } @@ -3791,13 +3278,14 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, /* When we selected a first vectorized epilogue, see if the target suggests to have another one. */ + masked_p = -1; if (!unlimited_cost_model (loop) && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo) - && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode () + && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p) != VOIDmode)) { vector_modes[0] - = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (); + = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p); cached_vf_per_mode[0] = 0; mode_i = 0; } @@ -5303,8 +4791,9 @@ have_whole_vector_shift (machine_mode mode) See vect_emulate_mixed_dot_prod for the actual sequence used. */ static bool -vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info) +vect_is_emulated_mixed_dot_prod (slp_tree slp_node) { + stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (slp_node); gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); if (!assign || gimple_assign_rhs_code (assign) != DOT_PROD_EXPR) return false; @@ -5314,10 +4803,10 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info) if (TYPE_SIGN (TREE_TYPE (rhs1)) == TYPE_SIGN (TREE_TYPE (rhs2))) return false; - gcc_assert (STMT_VINFO_REDUC_VECTYPE_IN (stmt_info)); return !directly_supported_p (DOT_PROD_EXPR, - STMT_VINFO_VECTYPE (stmt_info), - STMT_VINFO_REDUC_VECTYPE_IN (stmt_info), + SLP_TREE_VECTYPE (slp_node), + SLP_TREE_VECTYPE + (SLP_TREE_CHILDREN (slp_node)[0]), optab_vector_mixed_sign); } @@ -5332,7 +4821,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info) static void vect_model_reduction_cost (loop_vec_info loop_vinfo, - stmt_vec_info stmt_info, internal_fn reduc_fn, + slp_tree node, internal_fn reduc_fn, vect_reduction_type reduction_type, int ncopies, stmt_vector_for_cost *cost_vec) { @@ -5348,9 +4837,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, if (reduction_type == COND_REDUCTION) ncopies *= 2; - vectype = STMT_VINFO_VECTYPE (stmt_info); + vectype = SLP_TREE_VECTYPE (node); mode = TYPE_MODE (vectype); - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); + stmt_vec_info orig_stmt_info + = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node)); gimple_match_op op; if (!gimple_extract_op (orig_stmt_info->stmt, &op)) @@ -5368,16 +4858,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, if (reduc_fn != IFN_LAST) /* Count one reduction-like operation per vector. */ inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, - stmt_info, 0, vect_body); + node, 0, vect_body); else { /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, nelements, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, node, 0, vect_body); inside_cost += record_stmt_cost (cost_vec, nelements, - scalar_stmt, stmt_info, 0, + scalar_stmt, node, 0, vect_body); } } @@ -5394,7 +4884,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, /* We need the initial reduction value. */ prologue_stmts = 1; prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, node, 0, vect_prologue); } @@ -5411,24 +4901,24 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, { /* An EQ stmt and an COND_EXPR stmt. */ epilogue_cost += record_stmt_cost (cost_vec, 2, - vector_stmt, stmt_info, 0, + vector_stmt, node, 0, vect_epilogue); /* Reduction of the max index and a reduction of the found values. */ epilogue_cost += record_stmt_cost (cost_vec, 2, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, node, 0, vect_epilogue); /* A broadcast of the max value. */ epilogue_cost += record_stmt_cost (cost_vec, 1, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, node, 0, vect_epilogue); } else { epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, - stmt_info, 0, vect_epilogue); + node, 0, vect_epilogue); epilogue_cost += record_stmt_cost (cost_vec, 1, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, node, 0, vect_epilogue); } } @@ -5438,12 +4928,12 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, /* Extraction of scalar elements. */ epilogue_cost += record_stmt_cost (cost_vec, 2 * estimated_nunits, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, node, 0, vect_epilogue); /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ epilogue_cost += record_stmt_cost (cost_vec, 2 * estimated_nunits - 3, - scalar_stmt, stmt_info, 0, + scalar_stmt, node, 0, vect_epilogue); } else if (reduction_type == EXTRACT_LAST_REDUCTION @@ -5469,10 +4959,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, Also requires scalar extract. */ epilogue_cost += record_stmt_cost (cost_vec, exact_log2 (nelements) * 2, - vector_stmt, stmt_info, 0, + vector_stmt, node, 0, vect_epilogue); epilogue_cost += record_stmt_cost (cost_vec, 1, - vec_to_scalar, stmt_info, 0, + vec_to_scalar, node, 0, vect_epilogue); } else @@ -5480,7 +4970,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, elements, we have N extracts and N-1 reduction ops. */ epilogue_cost += record_stmt_cost (cost_vec, nelements + nelements - 1, - vector_stmt, stmt_info, 0, + vector_stmt, node, 0, vect_epilogue); } } @@ -5497,9 +4987,9 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, static void vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, gimple *seq) + vect_reduc_info reduc_info, gimple *seq) { - if (reduc_info->reused_accumulator) + if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)) { /* When reusing an accumulator from the main loop, we only need initialization instructions if the main loop can be skipped. @@ -5527,15 +5017,15 @@ vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, static void get_initial_defs_for_reduction (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, + vect_reduc_info reduc_info, + tree vector_type, vec<tree> *vec_oprnds, unsigned int number_of_vectors, unsigned int group_size, tree neutral_op) { - vec<tree> &initial_values = reduc_info->reduc_initial_values; + vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info); unsigned HOST_WIDE_INT nunits; unsigned j, number_of_places_left_in_vector; - tree vector_type = STMT_VINFO_VECTYPE (reduc_info); unsigned int i; gcc_assert (group_size == initial_values.length () || neutral_op); @@ -5549,7 +5039,7 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo, two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES will be 2). - If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors containing the operands. For example, NUNITS is four as before, and the group size is 8 @@ -5639,30 +5129,12 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo, vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq); } -/* For a statement STMT_INFO taking part in a reduction operation return - the stmt_vec_info the meta information is stored on. */ - -stmt_vec_info -info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) +vect_reduc_info +info_for_reduction (loop_vec_info loop_vinfo, slp_tree node) { - stmt_info = vect_orig_stmt (stmt_info); - gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info)); - if (!is_a <gphi *> (stmt_info->stmt) - || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - gphi *phi = as_a <gphi *> (stmt_info->stmt); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) - { - if (gimple_phi_num_args (phi) == 1) - stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - } - else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) - { - stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi)); - if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def) - stmt_info = info; - } - return stmt_info; + if (node->cycle_info.id == -1) + return NULL; + return loop_vinfo->reduc_infos[node->cycle_info.id]; } /* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that @@ -5671,16 +5143,16 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) static bool vect_find_reusable_accumulator (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info) + vect_reduc_info reduc_info, tree vectype) { loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); if (!main_loop_vinfo) return false; - if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION) + if (VECT_REDUC_INFO_TYPE (reduc_info) != TREE_CODE_REDUCTION) return false; - unsigned int num_phis = reduc_info->reduc_initial_values.length (); + unsigned int num_phis = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).length (); auto_vec<tree, 16> main_loop_results (num_phis); auto_vec<tree, 16> initial_values (num_phis); if (edge main_loop_edge = loop_vinfo->main_loop_edge) @@ -5688,7 +5160,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, /* The epilogue loop can be entered either from the main loop or from an earlier guard block. */ edge skip_edge = loop_vinfo->skip_main_loop_edge; - for (tree incoming_value : reduc_info->reduc_initial_values) + for (tree incoming_value : VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)) { /* Look for: @@ -5708,19 +5180,18 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, } else /* The main loop dominates the epilogue loop. */ - main_loop_results.splice (reduc_info->reduc_initial_values); + main_loop_results.splice (VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)); /* See if the main loop has the kind of accumulator we need. */ vect_reusable_accumulator *accumulator = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]); if (!accumulator - || num_phis != accumulator->reduc_info->reduc_scalar_results.length () + || num_phis != VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).length () || !std::equal (main_loop_results.begin (), main_loop_results.end (), - accumulator->reduc_info->reduc_scalar_results.begin ())) + VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).begin ())) return false; /* Handle the case where we can reduce wider vectors to narrower ones. */ - tree vectype = STMT_VINFO_VECTYPE (reduc_info); tree old_vectype = TREE_TYPE (accumulator->reduc_input); unsigned HOST_WIDE_INT m; if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), @@ -5735,7 +5206,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, tree intermediate_vectype = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits); if (!intermediate_vectype - || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), + || !directly_supported_p (VECT_REDUC_INFO_CODE (reduc_info), intermediate_vectype) || !can_vec_extract (TYPE_MODE (prev_vectype), TYPE_MODE (intermediate_vectype))) @@ -5754,7 +5225,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, to select the correct adjustment, but in practice that shouldn't be necessary.) */ tree main_adjustment - = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info); + = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (accumulator->reduc_info); if (loop_vinfo->main_loop_edge && main_adjustment) { gcc_assert (num_phis == 1); @@ -5763,14 +5234,14 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, initialize the accumulator with a neutral value instead. */ if (!operand_equal_p (initial_value, main_adjustment)) return false; - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value), code, initial_value); } - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment; - reduc_info->reduc_initial_values.truncate (0); - reduc_info->reduc_initial_values.splice (initial_values); - reduc_info->reused_accumulator = accumulator; + VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment; + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).truncate (0); + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).splice (initial_values); + VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) = accumulator; return true; } @@ -5923,8 +5394,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, slp_instance slp_node_instance, edge loop_exit) { - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); /* For double reductions we need to get at the inner loop reduction stmt which has the meta info attached. Our stmt_info is that of the loop-closed PHI of the inner loop which we remember as @@ -5937,8 +5407,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, (stmt_info->stmt, 0)); stmt_info = vect_stmt_to_vectorize (stmt_info); } - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); tree vectype; machine_mode mode; class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; @@ -5958,16 +5428,15 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gimple *use_stmt; auto_vec<tree> reduc_inputs; int j, i; - vec<tree> &scalar_results = reduc_info->reduc_scalar_results; + vec<tree> &scalar_results = VECT_REDUC_INFO_SCALAR_RESULTS (reduc_info); unsigned int k; /* SLP reduction without reduction chain, e.g., # a1 = phi <a2, a0> # b1 = phi <b2, b0> a2 = operation (a1) b2 = operation (b1) */ - bool slp_reduc - = !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)); - bool direct_slp_reduc; + const bool slp_reduc + = SLP_INSTANCE_KIND (slp_node_instance) != slp_inst_kind_reduc_chain; tree induction_index = NULL_TREE; unsigned int group_size = SLP_TREE_LANES (slp_node); @@ -5979,7 +5448,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (double_reduc); } - vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); + vectype = VECT_REDUC_INFO_VECTYPE (reduc_info); gcc_assert (vectype); mode = TYPE_MODE (vectype); @@ -5987,12 +5456,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree adjustment_def = NULL; /* Optimize: for induction condition reduction, if we can't use zero for induc_val, use initial_def. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) - induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); + if (VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info); else if (double_reduc) ; else - adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info); + adjustment_def = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info); stmt_vec_info single_live_out_stmt[] = { stmt_info }; array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt; @@ -6013,14 +5482,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, PR92772: This algorithm is broken for architectures that support masked vectors, but do not provide fold_extract_last. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) + if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION) { auto_vec<std::pair<tree, bool>, 2> ccompares; slp_tree cond_node = slp_node_instance->root; while (cond_node != slp_node_instance->reduc_phis) { stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node); - int slp_reduc_idx; if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) { gimple *vec_stmt @@ -6028,16 +5496,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); ccompares.safe_push (std::make_pair (gimple_assign_rhs1 (vec_stmt), - STMT_VINFO_REDUC_IDX (cond_info) == 2)); - /* ??? We probably want to have REDUC_IDX on the SLP node? - We have both three and four children COND_EXPR nodes - dependent on whether the comparison is still embedded - as GENERIC. So work backwards. */ - slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3 - + STMT_VINFO_REDUC_IDX (cond_info)); + SLP_TREE_REDUC_IDX (cond_node) == 2)); } - else - slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info); + int slp_reduc_idx = SLP_TREE_REDUC_IDX (cond_node); cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx]; } gcc_assert (ccompares.length () != 0); @@ -6193,9 +5654,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* True if we should implement SLP_REDUC using native reduction operations instead of scalar operations. */ - direct_slp_reduc = (reduc_fn != IFN_LAST - && slp_reduc - && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ()); + const bool direct_slp_reduc + = (reduc_fn != IFN_LAST + && slp_reduc + && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ()); /* In case of reduction chain, e.g., # a1 = phi <a3, a0> @@ -6216,7 +5678,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, a multiple of the SLP group size. The same is true if we couldn't use a single defuse cycle. */ - if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)) + if (!slp_reduc || direct_slp_reduc || (slp_reduc && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size))) @@ -6243,7 +5705,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, the minor(?) benefit of making the epilogue loop's scalar result independent of the main loop's scalar result. */ bool unify_with_main_loop_p = false; - if (reduc_info->reused_accumulator + if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) && loop_vinfo->skip_this_loop_edge && single_succ_p (exit_bb) && single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest) @@ -6255,7 +5717,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block); add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb), UNKNOWN_LOCATION); - add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input, + add_phi_arg (new_phi, + VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)->reduc_input, loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION); exit_gsi = gsi_after_labels (reduc_block); } @@ -6263,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Shouldn't be used beyond this point. */ exit_bb = nullptr; - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION + if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION && reduc_fn != IFN_LAST) { /* For condition reductions, we have a vector (REDUC_INPUTS 0) containing @@ -6369,7 +5832,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); scalar_results.safe_push (new_temp); } - else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION + else if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION && reduc_fn == IFN_LAST) { /* Condition reduction without supported IFN_REDUC_MAX. Generate @@ -6462,7 +5925,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, new_temp = gimple_convert (&stmts, scalar_type, new_temp); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) && induc_val) { /* Earlier we set the initial value to be a vector if induc_val @@ -6473,7 +5936,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, new_temp, induc_val); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - tree initial_def = reduc_info->reduc_initial_values[0]; + tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0]; tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, initial_def, new_temp); @@ -6485,7 +5948,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } else if (direct_slp_reduc) { - /* Here we create one vector for each of the REDUC_GROUP_SIZE results, + /* Here we create one vector for each of the GROUP_SIZE results, with the elements for other SLP statements replaced with the neutral value. We can then do a normal reduction on each vector. */ @@ -6503,7 +5966,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree mask_type = truth_type_for (index_type); /* Create a vector that, for each element, identifies which of - the REDUC_GROUP_SIZE results should use it. */ + the results should use it. */ tree index_mask = build_int_cst (index_elt_type, group_size - 1); index = gimple_build (&seq, BIT_AND_EXPR, index_type, index, build_vector_from_val (index_type, index_mask)); @@ -6512,15 +5975,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, scalar value if we have one, otherwise the initial scalar value is itself a neutral value. */ tree vector_identity = NULL_TREE; - tree neutral_op = NULL_TREE; - if (1) - { - tree initial_value = NULL_TREE; - if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))) - initial_value = reduc_info->reduc_initial_values[0]; - neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code, - initial_value, false); - } + tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code, + NULL_TREE, false); if (neutral_op) vector_identity = gimple_build_vector_from_val (&seq, vectype, neutral_op); @@ -6531,7 +5987,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, for MIN and MAX reduction, for example. */ if (!neutral_op) { - tree scalar_value = reduc_info->reduc_initial_values[i]; + tree scalar_value + = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[i]; scalar_value = gimple_convert (&seq, TREE_TYPE (vectype), scalar_value); vector_identity = gimple_build_vector_from_val (&seq, vectype, @@ -6723,10 +6180,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } } - /* The only case where we need to reduce scalar results in SLP, is - unrolling. If the size of SCALAR_RESULTS is greater than - REDUC_GROUP_SIZE, we reduce them combining elements modulo - REDUC_GROUP_SIZE. */ + /* The only case where we need to reduce scalar results in a SLP + reduction, is unrolling. If the size of SCALAR_RESULTS is + greater than GROUP_SIZE, we reduce them combining elements modulo + GROUP_SIZE. */ if (slp_reduc) { tree res, first_res, new_res; @@ -6747,7 +6204,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } else { - /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ + /* Reduction chain - we have one scalar to keep in + SCALAR_RESULTS. */ new_temp = gimple_convert (&stmts, scalar_type, new_temp); scalar_results.safe_push (new_temp); } @@ -6755,7 +6213,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } - if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) && induc_val) { /* Earlier we set the initial value to be a vector if induc_val @@ -6766,7 +6224,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, scalar_results[0], induc_val); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - tree initial_def = reduc_info->reduc_initial_values[0]; + tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0]; tree tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, initial_def, scalar_results[0]); @@ -6809,7 +6267,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } /* Record this operation if it could be reused by the epilogue loop. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION + if (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION && reduc_inputs.length () == 1) loop_vinfo->reusable_accumulators.put (scalar_results[0], { orig_reduc_input, reduc_info }); @@ -7023,14 +6481,13 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, slp_tree slp_node, - gimple *reduc_def_stmt, code_helper code, internal_fn reduc_fn, int num_ops, tree vectype_in, int reduc_index, vec_loop_masks *masks, vec_loop_lens *lens) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + tree vectype_out = SLP_TREE_VECTYPE (slp_node); internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in); gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); @@ -7048,6 +6505,13 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), TYPE_VECTOR_SUBPARTS (vectype_in))); + /* ??? We should, when transforming the cycle PHI, record the existing + scalar def as vector def so looking up the vector def works. This + would also allow generalizing this for reduction paths of length > 1 + and/or SLP reductions. */ + slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index]; + tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0); + /* The operands either come from a binary operation or an IFN_COND operation. The former is a gimple assign with binary rhs and the latter is a gimple call with four arguments. */ @@ -7068,7 +6532,6 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt; tree scalar_dest = gimple_get_lhs (sdef); tree scalar_type = TREE_TYPE (scalar_dest); - tree reduc_var = gimple_phi_result (reduc_def_stmt); int vec_num = vec_oprnds0.length (); tree vec_elem_type = TREE_TYPE (vectype_out); @@ -7303,13 +6766,13 @@ build_vect_cond_expr (code_helper code, tree vop[3], tree mask, static void vect_reduction_update_partial_vector_usage (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, + vect_reduc_info reduc_info, slp_tree slp_node, code_helper code, tree type, tree vectype_in) { - enum vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (reduc_info); - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + enum vect_reduction_type reduc_type = VECT_REDUC_INFO_TYPE (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); internal_fn cond_fn = get_conditional_internal_fn (code, type); if (reduc_type != FOLD_LEFT_REDUCTION @@ -7403,28 +6866,27 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, if (!type_has_mode_precision_p (type)) return false; - stmt_vec_info reduc_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); /* TODO: Support lane-reducing operation that does not directly participate in loop reduction. */ - if (!reduc_info || STMT_VINFO_REDUC_IDX (stmt_info) < 0) + if (!reduc_info) return false; /* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not recoginized. */ - gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_reduction_def); - gcc_assert (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION); + gcc_assert (!nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)); + gcc_assert (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION); for (int i = 0; i < (int) gimple_num_ops (stmt) - 1; i++) { - stmt_vec_info def_stmt_info; slp_tree slp_op; tree op; tree vectype; enum vect_def_type dt; - if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_node, i, &op, - &slp_op, &dt, &vectype, &def_stmt_info)) + if (!vect_is_simple_use (loop_vinfo, slp_node, i, &op, + &slp_op, &dt, &vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7456,8 +6918,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } - tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (stmt_info); - + tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]); gcc_assert (vectype_in); /* Compute number of effective vector statements for costing. */ @@ -7465,13 +6926,13 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, vectype_in); gcc_assert (ncopies_for_cost >= 1); - if (vect_is_emulated_mixed_dot_prod (stmt_info)) + if (vect_is_emulated_mixed_dot_prod (slp_node)) { /* We need extra two invariants: one that contains the minimum signed value and one that contains half of its negative. */ int prologue_stmts = 2; unsigned cost = record_stmt_cost (cost_vec, prologue_stmts, - scalar_to_vec, stmt_info, 0, + scalar_to_vec, slp_node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf (MSG_NOTE, "vectorizable_lane_reducing: " @@ -7481,7 +6942,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, ncopies_for_cost *= 4; } - record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, stmt_info, + record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, slp_node, 0, vect_body); if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) @@ -7493,7 +6954,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, } /* Transform via vect_transform_reduction. */ - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } @@ -7564,6 +7025,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, bool double_reduc = false; tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; tree cond_reduc_val = NULL_TREE; + const bool reduc_chain + = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain; /* Make sure it was already recognized as a reduction computation. */ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def @@ -7571,9 +7034,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle) return false; - /* The stmt we store reduction analysis meta on. */ - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - reduc_info->is_reduc_info = true; + /* The reduction meta. */ + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) { @@ -7595,18 +7057,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } /* Analysis for double-reduction is done on the outer loop PHI, nested cycles have no further restrictions. */ - STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type; + SLP_TREE_TYPE (slp_node) = cycle_phi_info_type; } else - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } - stmt_vec_info orig_stmt_of_analysis = stmt_info; stmt_vec_info phi_info = stmt_info; if (!is_a <gphi *> (stmt_info->stmt)) { - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + SLP_TREE_TYPE (slp_node) = reduc_vec_info_type; return true; } if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) @@ -7646,18 +7107,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo, unsigned reduc_chain_length = 0; bool only_slp_reduc_chain = true; stmt_info = NULL; - slp_tree slp_for_stmt_info = slp_node_instance->root; + slp_tree slp_for_stmt_info = NULL; + slp_tree vdef_slp = slp_node_instance->root; /* For double-reductions we start SLP analysis at the inner loop LC PHI which is the def of the outer loop live stmt. */ - if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def) - slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; + if (VECT_REDUC_INFO_DEF_TYPE (reduc_info) == vect_double_reduction_def) + vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0]; while (reduc_def != PHI_RESULT (reduc_def_phi)) { stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); stmt_vec_info vdef = vect_stmt_to_vectorize (def); int reduc_idx = STMT_VINFO_REDUC_IDX (vdef); - - if (reduc_idx == -1) + if (STMT_VINFO_REDUC_IDX (vdef) == -1 + || SLP_TREE_REDUC_IDX (vdef_slp) == -1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7666,21 +7128,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) only_slp_reduc_chain = false; - /* For epilogue generation live members of the chain need - to point back to the PHI via their original stmt for - info_for_reduction to work. For SLP we need to look at - all lanes here - even though we only will vectorize from - the SLP node with live lane zero the other live lanes also - need to be identified as part of a reduction to be able - to skip code generation for them. */ - if (slp_for_stmt_info) - { - for (auto s : SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)) - if (STMT_VINFO_LIVE_P (s)) - STMT_VINFO_REDUC_DEF (vect_orig_stmt (s)) = phi_info; - } - else if (STMT_VINFO_LIVE_P (vdef)) - STMT_VINFO_REDUC_DEF (def) = phi_info; gimple_match_op op; if (!gimple_extract_op (vdef->stmt, &op)) { @@ -7699,44 +7146,36 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "conversion in the reduction chain.\n"); return false; } + vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0]; } else { /* First non-conversion stmt. */ if (!stmt_info) - stmt_info = vdef; + { + stmt_info = vdef; + slp_for_stmt_info = vdef_slp; + } if (lane_reducing_op_p (op.code)) { - enum vect_def_type dt; - tree vectype_op; - /* The last operand of lane-reducing operation is for reduction. */ gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1); - if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op)) - return false; - + slp_tree op_node = SLP_TREE_CHILDREN (vdef_slp)[0]; + tree vectype_op = SLP_TREE_VECTYPE (op_node); tree type_op = TREE_TYPE (op.ops[0]); - if (!vectype_op) { vectype_op = get_vectype_for_scalar_type (loop_vinfo, type_op); - if (!vectype_op) + if (!vectype_op + || !vect_maybe_update_slp_op_vectype (op_node, + vectype_op)) return false; } - /* For lane-reducing operation vectorizable analysis needs the - reduction PHI information. */ - STMT_VINFO_REDUC_DEF (def) = phi_info; - - /* Each lane-reducing operation has its own input vectype, while - reduction PHI will record the input vectype with the least - lanes. */ - STMT_VINFO_REDUC_VECTYPE_IN (vdef) = vectype_op; - /* To accommodate lane-reducing operations of mixed input vectypes, choose input vectype with the least lanes for the reduction PHI statement, which would result in the most @@ -7746,14 +7185,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo, < GET_MODE_SIZE (SCALAR_TYPE_MODE (type_op)))) vectype_in = vectype_op; } - else - vectype_in = STMT_VINFO_VECTYPE (phi_info); + else if (!vectype_in) + vectype_in = SLP_TREE_VECTYPE (slp_node); + if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) + { + gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp)); + vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx]; + } } reduc_def = op.ops[reduc_idx]; reduc_chain_length++; - if (!stmt_info) - slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; } /* PHIs should not participate in patterns. */ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); @@ -7778,7 +7220,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* Not supportable if the reduction variable is used in the loop, unless it's a reduction chain. */ if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + && !reduc_chain) return false; /* Reductions that are not used even in an enclosing outer-loop, @@ -7805,9 +7247,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, inside the loop body. The last operand is the reduction variable, which is defined by the loop-header-phi. */ - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); - STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; - STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; + tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info); + VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out; gimple_match_op op; if (!gimple_extract_op (stmt_info->stmt, &op)) @@ -7827,7 +7268,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, OK to use them in a reduction chain or when the reduction group has just one element. */ if (lane_reducing - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && !reduc_chain && SLP_TREE_LANES (slp_node) > 1) { if (dump_enabled_p ()) @@ -7855,7 +7296,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, stmt_vec_info def_stmt_info; enum vect_def_type dt; - if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info, + if (!vect_is_simple_use (loop_vinfo, slp_for_stmt_info, i + opno_adjust, &op.ops[i], &slp_op[i], &dt, &vectype_op[i], &def_stmt_info)) { @@ -7899,8 +7340,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } } - enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info); - STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type; + enum vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); /* If we have a condition reduction, see if we can simplify it further. */ if (reduction_type == COND_REDUCTION) { @@ -7908,7 +7348,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; /* When the condition uses the reduction value in the condition, fail. */ - if (STMT_VINFO_REDUC_IDX (stmt_info) == 0) + if (SLP_TREE_REDUC_IDX (slp_node) == 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7927,7 +7367,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "optimizing condition reduction with" " FOLD_EXTRACT_LAST.\n"); - STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION; } else if (cond_reduc_dt == vect_induction_def) { @@ -7971,10 +7411,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_NOTE, vect_location, "condition expression based on " "integer induction.\n"); - STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code; - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) + VECT_REDUC_INFO_CODE (reduc_info) = cond_reduc_op_code; + VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info) = cond_reduc_val; - STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION; } } else if (cond_reduc_dt == vect_constant_def) @@ -7995,9 +7435,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "condition expression based on " "compile time constant.\n"); /* Record reduction code at analysis stage. */ - STMT_VINFO_REDUC_CODE (reduc_info) + VECT_REDUC_INFO_CODE (reduc_info) = integer_onep (e) ? MAX_EXPR : MIN_EXPR; - STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION; + VECT_REDUC_INFO_TYPE (reduc_info) = CONST_COND_REDUCTION; } } } @@ -8014,7 +7454,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (nested_cycle) { - gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) + gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info) == vect_double_reduction_def); double_reduc = true; } @@ -8054,7 +7494,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, (and also the same tree-code) when generating the epilog code and when generating the code inside the loop. */ - code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info); + code_helper orig_code = VECT_REDUC_INFO_CODE (reduc_info); /* If conversion might have created a conditional operation like IFN_COND_ADD already. Use the internal code for the following checks. */ @@ -8064,9 +7504,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, orig_code = new_code != ERROR_MARK ? new_code : orig_code; } - STMT_VINFO_REDUC_CODE (reduc_info) = orig_code; + VECT_REDUC_INFO_CODE (reduc_info) = orig_code; - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); + reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); if (reduction_type == TREE_CODE_REDUCTION) { /* Check whether it's ok to change the order of the computation. @@ -8078,7 +7518,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, outer-loop vectorization is safe. Likewise when we are vectorizing a series of reductions using SLP and the VF is one the reductions are performed in scalar order. */ - if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info) + if (!reduc_chain && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1u)) ; else if (needs_fold_left_reduction_p (op.type, orig_code)) @@ -8106,7 +7546,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "supported.\n"); return false; } - STMT_VINFO_REDUC_TYPE (reduc_info) + VECT_REDUC_INFO_TYPE (reduc_info) = reduction_type = FOLD_LEFT_REDUCTION; } else if (!commutative_binary_op_p (orig_code, op.type) @@ -8177,7 +7617,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, OPTIMIZE_FOR_SPEED)) reduc_fn = IFN_REDUC_MAX; } - STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn; + VECT_REDUC_INFO_FN (reduc_info) = reduc_fn; if (reduction_type != EXTRACT_LAST_REDUCTION && (!nested_cycle || double_reduc) @@ -8194,7 +7634,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* For SLP reductions, see if there is a neutral value we can use. */ tree neutral_op = NULL_TREE; tree initial_value = NULL_TREE; - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL) + if (reduc_chain) initial_value = vect_phi_initial_value (reduc_def_phi); neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), orig_code, initial_value); @@ -8224,7 +7664,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (reduction_type == FOLD_LEFT_REDUCTION && SLP_TREE_LANES (slp_node) > 1 - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + && !reduc_chain) { /* We cannot use in-order reductions in this case because there is an implicit reassociation of the operations involved. */ @@ -8252,7 +7692,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } /* Check extra constraints for variable-length unchained SLP reductions. */ - if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info) + if (!reduc_chain && !nunits_out.is_constant ()) { /* We checked above that we could build the initial vector when @@ -8346,7 +7786,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, own reduction accumulator since one of the main goals of unrolling a reduction is to reduce the aggregate loop-carried latency. */ if (ncopies > 1 - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && !reduc_chain && SLP_TREE_LANES (slp_node) == 1 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live) && reduc_chain_length == 1 @@ -8390,7 +7830,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, dump_printf_loc (MSG_NOTE, vect_location, "using single def-use cycle for reduction by reducing " "multiple vectors to one in the loop body\n"); - STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle; + VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle; /* For lane-reducing operation, the below processing related to single defuse-cycle will be done in its own vectorizable function. One more @@ -8408,19 +7848,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } - vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn, + vect_model_reduction_cost (loop_vinfo, slp_for_stmt_info, reduc_fn, reduction_type, ncopies, cost_vec); /* Cost the reduction op inside the loop if transformed via vect_transform_reduction for non-lane-reducing operation. Otherwise this is costed by the separate vectorizable_* routines. */ if (single_defuse_cycle) - record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body); + record_stmt_cost (cost_vec, ncopies, vector_stmt, + slp_for_stmt_info, 0, vect_body); if (dump_enabled_p () && reduction_type == FOLD_LEFT_REDUCTION) dump_printf_loc (MSG_NOTE, vect_location, "using an in-order (fold-left) reduction.\n"); - STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type; + SLP_TREE_TYPE (slp_node) = cycle_phi_info_type; /* All but single defuse-cycle optimized and fold-left reductions go through their own vectorizable_* routines. */ @@ -8526,17 +7967,17 @@ vect_transform_reduction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, slp_tree slp_node) { - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + tree vectype_out = SLP_TREE_VECTYPE (slp_node); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); unsigned vec_num; - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); if (nested_in_vect_loop_p (loop, stmt_info)) { loop = loop->inner; - gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def); + gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info) + == vect_double_reduction_def); } gimple_match_op op; @@ -8547,13 +7988,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, The last use is the reduction variable. In case of nested cycle this assumption is not true: we use reduc_index to record the index of the reduction variable. */ - stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)); - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); - int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); - tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (stmt_info); - - if (!vectype_in) - vectype_in = STMT_VINFO_VECTYPE (stmt_info); + int reduc_index = SLP_TREE_REDUC_IDX (slp_node); + tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]); vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in); @@ -8588,18 +8024,18 @@ vect_transform_reduction (loop_vec_info loop_vinfo, bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); - vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); + vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); if (reduction_type == FOLD_LEFT_REDUCTION) { - internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); + internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info); gcc_assert (code.is_tree_code () || cond_fn_p); return vectorize_fold_left_reduction - (loop_vinfo, stmt_info, gsi, slp_node, reduc_def_phi, + (loop_vinfo, stmt_info, gsi, slp_node, code, reduc_fn, op.num_ops, vectype_in, reduc_index, masks, lens); } - bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info); + bool single_defuse_cycle = VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info); bool lane_reducing = lane_reducing_op_p (code); gcc_assert (single_defuse_cycle || lane_reducing); @@ -8618,7 +8054,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (!cond_fn_p) { gcc_assert (reduc_index >= 0 && reduc_index <= 2); - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, + vect_get_vec_defs (loop_vinfo, slp_node, single_defuse_cycle && reduc_index == 0 ? NULL_TREE : op.ops[0], &vec_oprnds[0], single_defuse_cycle && reduc_index == 1 @@ -8633,19 +8069,19 @@ vect_transform_reduction (loop_vec_info loop_vinfo, vectype. */ gcc_assert (single_defuse_cycle && (reduc_index == 1 || reduc_index == 2)); - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, op.ops[0], - truth_type_for (vectype_in), &vec_oprnds[0], + vect_get_vec_defs (loop_vinfo, slp_node, op.ops[0], + &vec_oprnds[0], reduc_index == 1 ? NULL_TREE : op.ops[1], - NULL_TREE, &vec_oprnds[1], + &vec_oprnds[1], reduc_index == 2 ? NULL_TREE : op.ops[2], - NULL_TREE, &vec_oprnds[2]); + &vec_oprnds[2]); } /* For single def-use cycles get one copy of the vectorized reduction definition. */ if (single_defuse_cycle) { - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, + vect_get_vec_defs (loop_vinfo, slp_node, reduc_index == 0 ? op.ops[0] : NULL_TREE, &vec_oprnds[0], reduc_index == 1 ? op.ops[1] : NULL_TREE, @@ -8730,7 +8166,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, } } - tree reduc_vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info); + tree reduc_vectype_in = vectype_in; gcc_assert (reduc_vectype_in); unsigned effec_reduc_ncopies @@ -8742,11 +8178,11 @@ vect_transform_reduction (loop_vec_info loop_vinfo, { /* Find suitable def-use cycles to generate vectorized statements into, and reorder operands based on the selection. */ - unsigned curr_pos = reduc_info->reduc_result_pos; + unsigned curr_pos = VECT_REDUC_INFO_RESULT_POS (reduc_info); unsigned next_pos = (curr_pos + effec_ncopies) % effec_reduc_ncopies; gcc_assert (curr_pos < effec_reduc_ncopies); - reduc_info->reduc_result_pos = next_pos; + VECT_REDUC_INFO_RESULT_POS (reduc_info) = next_pos; if (curr_pos) { @@ -8772,7 +8208,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, } } - bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (stmt_info); + bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (slp_node); unsigned num = vec_oprnds[reduc_index == 0 ? 1 : 0].length (); unsigned mask_index = 0; @@ -8873,11 +8309,13 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, slp_tree slp_node, slp_instance slp_node_instance) { - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + tree vectype_out = SLP_TREE_VECTYPE (slp_node); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int i; bool nested_cycle = false; int vec_num; + const bool reduc_chain + = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain; if (nested_in_vect_loop_p (loop, stmt_info)) { @@ -8885,13 +8323,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, nested_cycle = true; } - stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info); - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); - - if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION - || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION) + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); + if (reduc_info + && (VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION + || VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)) /* Leave the scalar phi in place. */ return true; @@ -8899,7 +8334,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Check whether we should use a single PHI node and accumulate vectors to one before the backedge. */ - if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info)) + if (reduc_info && VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info)) vec_num = 1; /* Create the destination vector */ @@ -8914,23 +8349,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Optimize: if initial_def is for REDUC_MAX smaller than the base and we can't use zero for induc_val, use initial_def. Similarly for REDUC_MIN and initial_def larger than the base. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) + if (reduc_info + && VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) { gcc_assert (SLP_TREE_LANES (slp_node) == 1); tree initial_def = vect_phi_initial_value (phi); - reduc_info->reduc_initial_values.safe_push (initial_def); - tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); + VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).safe_push (initial_def); + tree induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info); if (TREE_CODE (initial_def) == INTEGER_CST && !integer_zerop (induc_val) - && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR + && ((VECT_REDUC_INFO_CODE (reduc_info) == MAX_EXPR && tree_int_cst_lt (initial_def, induc_val)) - || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR + || (VECT_REDUC_INFO_CODE (reduc_info) == MIN_EXPR && tree_int_cst_lt (induc_val, initial_def)))) { induc_val = initial_def; /* Communicate we used the initial_def to epilouge generation. */ - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; + VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; } vec_initial_defs.quick_push (build_vector_from_val (vectype_out, induc_val)); @@ -8944,11 +8380,11 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, else { gcc_assert (slp_node == slp_node_instance->reduc_phis); - vec<tree> &initial_values = reduc_info->reduc_initial_values; + vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info); vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node); unsigned int num_phis = stmts.length (); - if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) + if (reduc_chain) num_phis = 1; initial_values.reserve (num_phis); for (unsigned int i = 0; i < num_phis; ++i) @@ -8957,12 +8393,12 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, initial_values.quick_push (vect_phi_initial_value (this_phi)); } if (vec_num == 1) - vect_find_reusable_accumulator (loop_vinfo, reduc_info); + vect_find_reusable_accumulator (loop_vinfo, reduc_info, vectype_out); if (!initial_values.is_empty ()) { tree initial_value = (num_phis == 1 ? initial_values[0] : NULL_TREE); - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); + code_helper code = VECT_REDUC_INFO_CODE (reduc_info); tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), code, initial_value); @@ -8972,15 +8408,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, requires to keep the initial value live across the loop. */ if (neutral_op && initial_values.length () == 1 - && !reduc_info->reused_accumulator + && !VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def && !operand_equal_p (neutral_op, initial_values[0])) { - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) + VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) = initial_values[0]; initial_values[0] = neutral_op; } - get_initial_defs_for_reduction (loop_vinfo, reduc_info, + get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out, &vec_initial_defs, vec_num, stmts.length (), neutral_op); } @@ -8992,7 +8428,8 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, vec_initial_defs.quick_push (vec_initial_def); } - if (auto *accumulator = reduc_info->reused_accumulator) + if (reduc_info) + if (auto *accumulator = VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)) { tree def = accumulator->reduc_input; if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) @@ -9015,7 +8452,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, TYPE_VECTOR_SUBPARTS (vectype_out)); def = vect_create_partial_epilog (def, rvectype, - STMT_VINFO_REDUC_CODE + VECT_REDUC_INFO_CODE (reduc_info), &stmts); } @@ -9104,7 +8541,19 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo, "incompatible vector types for invariants\n"); return false; } - STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; + + /* ??? This can happen with data vs. mask uses of boolean. */ + if (!useless_type_conversion_p (SLP_TREE_VECTYPE (slp_node), + SLP_TREE_VECTYPE + (SLP_TREE_CHILDREN (slp_node)[0]))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "missed mask promotion\n"); + return false; + } + + SLP_TREE_TYPE (slp_node) = lc_phi_info_type; return true; } @@ -9114,13 +8563,13 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo, slp_tree slp_node) { - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (slp_node); tree scalar_dest = gimple_phi_result (stmt_info->stmt); basic_block bb = gimple_bb (stmt_info->stmt); edge e = single_pred_edge (bb); tree vec_dest = vect_create_destination_var (scalar_dest, vectype); auto_vec<tree> vec_oprnds; - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, + vect_get_vec_defs (loop_vinfo, slp_node, gimple_phi_arg_def (stmt_info->stmt, 0), &vec_oprnds); for (unsigned i = 0; i < vec_oprnds.length (); i++) { @@ -9136,8 +8585,8 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo, /* Vectorizes PHIs. */ bool -vectorizable_phi (vec_info *, - stmt_vec_info stmt_info, gimple **vec_stmt, +vectorizable_phi (bb_vec_info, + stmt_vec_info stmt_info, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { if (!is_a <gphi *> (stmt_info->stmt) || !slp_node) @@ -9148,7 +8597,7 @@ vectorizable_phi (vec_info *, tree vectype = SLP_TREE_VECTYPE (slp_node); - if (!vec_stmt) /* transformation not required. */ + if (cost_vec) /* transformation not required. */ { slp_tree child; unsigned i; @@ -9188,8 +8637,8 @@ vectorizable_phi (vec_info *, favoring the vector path (but may pessimize it in some cases). */ if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1) record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, vectype, 0, vect_body); - STMT_VINFO_TYPE (stmt_info) = phi_info_type; + vector_stmt, slp_node, vectype, 0, vect_body); + SLP_TREE_TYPE (slp_node) = phi_info_type; return true; } @@ -9279,8 +8728,7 @@ vectorizable_phi (vec_info *, bool vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, - gimple **vec_stmt, slp_tree slp_node, - stmt_vector_for_cost *cost_vec) + slp_tree slp_node, stmt_vector_for_cost *cost_vec) { if (!loop_vinfo || !is_a<gphi *> (stmt_info->stmt)) return false; @@ -9291,14 +8739,10 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence) return false; - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - unsigned ncopies; - if (slp_node) - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + tree vectype = SLP_TREE_VECTYPE (slp_node); + unsigned ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : 1; + unsigned dist = SLP_TREE_LANES (slp_node); /* We need to be able to make progress with a single vector. */ if (maybe_gt (dist * 2, nunits)) { @@ -9309,6 +8753,33 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } + /* We need to be able to build a { ..., a, b } init vector with + dist number of distinct trailing values. Always possible + when dist == 1 or when nunits is constant or when the initializations + are uniform. */ + tree uniform_initval = NULL_TREE; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + if (! uniform_initval) + uniform_initval = PHI_ARG_DEF_FROM_EDGE (phi, pe); + else if (! operand_equal_p (uniform_initval, + PHI_ARG_DEF_FROM_EDGE (phi, pe))) + { + uniform_initval = NULL_TREE; + break; + } + } + if (!uniform_initval && !nunits.is_constant ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "cannot build initialization vector for " + "first order recurrence\n"); + return false; + } + /* First-order recurrence autovectorization needs to handle permutation with indices = [nunits-1, nunits, nunits+1, ...]. */ vec_perm_builder sel (nunits, 1, 3); @@ -9316,48 +8787,30 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, sel.quick_push (nunits - dist + i); vec_perm_indices indices (sel, 2, nunits); - if (!vec_stmt) /* transformation not required. */ + if (cost_vec) /* transformation not required. */ { if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)) return false; - if (slp_node) - { - /* We eventually need to set a vector type on invariant - arguments. */ - unsigned j; - slp_tree child; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) - if (!vect_maybe_update_slp_op_vectype - (child, SLP_TREE_VECTYPE (slp_node))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "incompatible vector types for " - "invariants\n"); - return false; - } - } + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype (child, vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } /* Verify we have set up compatible types. */ edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo)); - tree latch_vectype = NULL_TREE; - if (slp_node) - { - slp_tree latch_def = SLP_TREE_CHILDREN (slp_node)[le->dest_idx]; - latch_vectype = SLP_TREE_VECTYPE (latch_def); - } - else - { - tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, le); - if (TREE_CODE (latch_def) == SSA_NAME) - { - stmt_vec_info latch_def_info = loop_vinfo->lookup_def (latch_def); - latch_def_info = vect_stmt_to_vectorize (latch_def_info); - latch_vectype = STMT_VINFO_VECTYPE (latch_def_info); - } - } + slp_tree latch_def = SLP_TREE_CHILDREN (slp_node)[le->dest_idx]; + tree latch_vectype = SLP_TREE_VECTYPE (latch_def); if (!types_compatible_p (latch_vectype, vectype)) return false; @@ -9365,36 +8818,50 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, for each copy. With SLP the prologue value is explicitly represented and costed separately. */ unsigned prologue_cost = 0; - if (!slp_node) - prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, 0, vect_prologue); unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizable_recurr: inside_cost = %d, " "prologue_cost = %d .\n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = recurr_info_type; + SLP_TREE_TYPE (slp_node) = recurr_info_type; return true; } - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - basic_block bb = gimple_bb (phi); - tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); - if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader))) + tree vec_init; + if (! uniform_initval) { - gimple_seq stmts = NULL; - preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader); - gsi_insert_seq_on_edge_immediate (pe, stmts); + vec<constructor_elt, va_gc> *v = NULL; + vec_alloc (v, nunits.to_constant ()); + for (unsigned i = 0; i < nunits.to_constant () - dist; ++i) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (TREE_TYPE (vectype))); + for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node)) + { + gphi *phi = as_a <gphi *> (s->stmt); + tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (preheader))) + { + gimple_seq stmts = NULL; + preheader = gimple_convert (&stmts, + TREE_TYPE (vectype), preheader); + gsi_insert_seq_on_edge_immediate (pe, stmts); + } + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader); + } + vec_init = build_constructor (vectype, v); } - tree vec_init = build_vector_from_val (vectype, preheader); + else + vec_init = uniform_initval; vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL); /* Create the vectorized first-order PHI node. */ tree vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_recur_"); + basic_block bb = gimple_bb (phi); gphi *new_phi = create_phi_node (vec_dest, bb); add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION); @@ -9419,14 +8886,9 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, NULL, perm); vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2); - if (slp_node) - slp_node->push_vec_def (vperm); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (vperm); + slp_node->push_vec_def (vperm); } - if (!slp_node) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; return true; } @@ -9738,7 +9200,7 @@ vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype, static bool vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, - gimple **vec_stmt, slp_tree slp_node, + slp_tree slp_node, stmt_vector_for_cost *cost_vec) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -9894,13 +9356,13 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, gcc_unreachable (); } - if (!vec_stmt) /* transformation not required. */ + if (cost_vec) /* transformation not required. */ { unsigned inside_cost = 0, prologue_cost = 0; /* loop cost for vec_loop. Neg induction doesn't have any inside_cost. */ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); /* loop cost for vec_loop. Neg induction doesn't have any inside_cost. */ @@ -9909,7 +9371,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, /* prologue cost for vec_init and vec_step. */ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -9917,7 +9379,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, "prologue_cost = %d. \n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; + SLP_TREE_TYPE (slp_node) = induc_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction"); return true; } @@ -10049,8 +9511,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, bool vectorizable_induction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, - gimple **vec_stmt, slp_tree slp_node, - stmt_vector_for_cost *cost_vec) + slp_tree slp_node, stmt_vector_for_cost *cost_vec) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); bool nested_in_vect_loop = false; @@ -10084,7 +9545,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, /* Handle nonlinear induction in a separate place. */ if (induction_type != vect_step_op_add) return vectorizable_nonlinear_induction (loop_vinfo, stmt_info, - vec_stmt, slp_node, cost_vec); + slp_node, cost_vec); tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); @@ -10165,6 +9626,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, } tree stept = TREE_TYPE (step_expr); tree step_vectype = get_same_sized_vectype (stept, vectype); + stept = TREE_TYPE (step_vectype); /* Check for target support of the vectorized arithmetic used here. */ if (!target_supports_op_p (step_vectype, PLUS_EXPR, optab_default) @@ -10187,7 +9649,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, } } - if (!vec_stmt) /* transformation not required. */ + if (cost_vec) /* transformation not required. */ { unsigned inside_cost = 0, prologue_cost = 0; /* We eventually need to set a vector type on invariant @@ -10207,18 +9669,18 @@ vectorizable_induction (loop_vec_info loop_vinfo, /* loop cost for vec_loop. */ inside_cost = record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, 0, vect_body); + vector_stmt, slp_node, 0, vect_body); /* prologue cost for vec_init (if not nested) and step. */ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_induction_cost: inside_cost = %d, " "prologue_cost = %d .\n", inside_cost, prologue_cost); - STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; + SLP_TREE_TYPE (slp_node) = induc_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_induction"); return true; } @@ -10468,7 +9930,12 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (peel_mul) { if (!step_mul) - step_mul = peel_mul; + { + gcc_assert (!nunits.is_constant ()); + step_mul = gimple_build (&init_stmts, + MINUS_EXPR, step_vectype, + build_zero_cst (step_vectype), peel_mul); + } else step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype, @@ -10619,9 +10086,8 @@ vectorizable_induction (loop_vec_info loop_vinfo, helper function for vectorizable_live_operation. */ static tree -vectorizable_live_operation_1 (loop_vec_info loop_vinfo, - stmt_vec_info stmt_info, basic_block exit_bb, - tree vectype, int ncopies, slp_tree slp_node, +vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb, + tree vectype, slp_tree slp_node, tree bitsize, tree bitstart, tree vec_lhs, tree lhs_type, gimple_stmt_iterator *exit_gsi) { @@ -10652,8 +10118,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, where VEC_LHS is the vectorized live-out result and MASK is the loop mask for the final iteration. */ - gcc_assert (ncopies == 1 - && (!slp_node || SLP_TREE_LANES (slp_node) == 1)); + gcc_assert (SLP_TREE_LANES (slp_node) == 1); gimple_seq tem = NULL; gimple_stmt_iterator gsi = gsi_last (tem); tree len = vect_get_loop_len (loop_vinfo, &gsi, @@ -10688,8 +10153,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, where VEC_LHS is the vectorized live-out result and MASK is the loop mask for the final iteration. */ - gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1); - tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); + gcc_assert (SLP_TREE_LANES (slp_node) == 1); + tree scalar_type = TREE_TYPE (vectype); gimple_seq tem = NULL; gimple_stmt_iterator gsi = gsi_last (tem); tree mask = vect_get_loop_mask (loop_vinfo, &gsi, @@ -10735,11 +10200,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); imm_use_iterator imm_iter; tree lhs, lhs_type, bitsize; - tree vectype = (slp_node - ? SLP_TREE_VECTYPE (slp_node) - : STMT_VINFO_VECTYPE (stmt_info)); + tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - int ncopies; gimple *use_stmt; use_operand_p use_p; auto_vec<tree> vec_oprnds; @@ -10752,18 +10214,18 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, /* If a stmt of a reduction is live, vectorize it via vect_create_epilog_for_reduction. vectorizable_reduction assessed validity so just trigger the transform here. */ - if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))) + if (vect_is_reduction (slp_node)) { if (!vec_stmt_p) return true; /* For SLP reductions we vectorize the epilogue for all involved stmts - together. */ - if (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && slp_index != 0) + together. For SLP reduction chains we only get here once. */ + if (SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_group + && slp_index != 0) return true; - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - gcc_assert (reduc_info->is_reduc_info); - if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION - || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) + vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node); + if (VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION + || VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) return true; if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo) @@ -10776,17 +10238,18 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, block, but we have to find an alternate exit first. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) { - slp_tree phis_node = slp_node ? slp_node_instance->reduc_phis : NULL; + slp_tree phis_node = slp_node_instance->reduc_phis; + stmt_info = SLP_TREE_REPRESENTATIVE (phis_node); for (auto exit : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo))) if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo)) { - vect_create_epilog_for_reduction (loop_vinfo, reduc_info, + vect_create_epilog_for_reduction (loop_vinfo, stmt_info, phis_node, slp_node_instance, exit); break; } if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) - vect_create_epilog_for_reduction (loop_vinfo, reduc_info, + vect_create_epilog_for_reduction (loop_vinfo, stmt_info, phis_node, slp_node_instance, LOOP_VINFO_IV_EXIT (loop_vinfo)); } @@ -10807,32 +10270,24 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, return true; } - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); - - if (slp_node) - { - gcc_assert (slp_index >= 0); + gcc_assert (slp_index >= 0); - /* Get the last occurrence of the scalar index from the concatenation of - all the slp vectors. Calculate which slp vector it is and the index - within. */ - int num_scalar = SLP_TREE_LANES (slp_node); - int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index; + /* Get the last occurrence of the scalar index from the concatenation of + all the slp vectors. Calculate which slp vector it is and the index + within. */ + int num_scalar = SLP_TREE_LANES (slp_node); + int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index; - /* Calculate which vector contains the result, and which lane of - that vector we need. */ - if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Cannot determine which vector holds the" - " final result.\n"); - return false; - } + /* Calculate which vector contains the result, and which lane of + that vector we need. */ + if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Cannot determine which vector holds the" + " final result.\n"); + return false; } if (!vec_stmt_p) @@ -10840,7 +10295,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, /* No transformation required. */ if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { - if (slp_node && SLP_TREE_LANES (slp_node) != 1) + if (SLP_TREE_LANES (slp_node) != 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10849,8 +10304,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, "the loop.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - else if (ncopies > 1 - || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)) + else if (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10860,8 +10314,6 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, } else { - gcc_assert (ncopies == 1 - && (!slp_node || SLP_TREE_LANES (slp_node) == 1)); if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, OPTIMIZE_FOR_SPEED)) vect_record_loop_mask (loop_vinfo, @@ -10886,7 +10338,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, } /* ??? Enable for loop costing as well. */ if (!loop_vinfo) - record_stmt_cost (cost_vec, 1, vec_to_scalar, stmt_info, NULL_TREE, + record_stmt_cost (cost_vec, 1, vec_to_scalar, slp_node, 0, vect_epilogue); return true; } @@ -10903,40 +10355,21 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, bitsize = vector_element_bits_tree (vectype); /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ - tree vec_lhs, vec_lhs0, bitstart; - gimple *vec_stmt, *vec_stmt0; - if (slp_node) - { - gcc_assert (!loop_vinfo - || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) - && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) - || SLP_TREE_LANES (slp_node) == 1)); - - /* Get the correct slp vectorized stmt. */ - vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry]; - vec_stmt = SSA_NAME_DEF_STMT (vec_lhs); + gcc_assert (!loop_vinfo + || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) + || SLP_TREE_LANES (slp_node) == 1)); - /* In case we need to early break vectorize also get the first stmt. */ - vec_lhs0 = SLP_TREE_VEC_DEFS (slp_node)[0]; - vec_stmt0 = SSA_NAME_DEF_STMT (vec_lhs0); + /* Get the correct slp vectorized stmt. */ + tree vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry]; + gimple *vec_stmt = SSA_NAME_DEF_STMT (vec_lhs); - /* Get entry to use. */ - bitstart = bitsize_int (vec_index); - bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart); - } - else - { - /* For multiple copies, get the last copy. */ - vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last (); - vec_lhs = gimple_get_lhs (vec_stmt); + /* In case we need to early break vectorize also get the first stmt. */ + tree vec_lhs0 = SLP_TREE_VEC_DEFS (slp_node)[0]; - /* In case we need to early break vectorize also get the first stmt. */ - vec_stmt0 = STMT_VINFO_VEC_STMTS (stmt_info)[0]; - vec_lhs0 = gimple_get_lhs (vec_stmt0); - - /* Get the last lane in the vector. */ - bitstart = int_const_binop (MULT_EXPR, bitsize, bitsize_int (nunits - 1)); - } + /* Get entry to use. */ + tree bitstart = bitsize_int (vec_index); + bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart); if (loop_vinfo) { @@ -10985,8 +10418,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator exit_gsi; tree new_tree - = vectorizable_live_operation_1 (loop_vinfo, stmt_info, - e->dest, vectype, ncopies, + = vectorizable_live_operation_1 (loop_vinfo, + e->dest, vectype, slp_node, bitsize, tmp_bitstart, tmp_vec_lhs, lhs_type, &exit_gsi); @@ -11412,7 +10845,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, factor = exact_div (nunits1, nunits2).to_constant (); tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); gimple_seq seq = NULL; - loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len, + loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len, build_int_cst (iv_type, factor)); if (seq) gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); @@ -11472,7 +10905,7 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl profile_count entry_count = loop_preheader_edge (loop)->count (); /* If we have unreliable loop profile avoid dropping entry - count bellow header count. This can happen since loops + count below header count. This can happen since loops has unrealistically low trip counts. */ while (vf > 1 && loop->header->count > entry_count @@ -11499,42 +10932,26 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl get_likely_max_loop_iterations_int (loop)); } -/* Helper function to pass to simplify_replace_tree to enable replacing tree's - in the hash_map with its corresponding values. */ - -static tree -find_in_mapping (tree t, void *context) -{ - hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context; - - tree *value = mapping->get (t); - return value ? *value : t; -} - /* Update EPILOGUE's loop_vec_info. EPILOGUE was constructed as a copy of the original loop that has now been vectorized. The inits of the data_references need to be advanced with the number of iterations of the main loop. This has been computed in vect_do_peeling and - is stored in parameter ADVANCE. We first restore the data_references - initial offset with the values recored in ORIG_DRS_INIT. + is stored in parameter ADVANCE. Since the loop_vec_info of this EPILOGUE was constructed for the original loop, its stmt_vec_infos all point to the original statements. These need - to be updated to point to their corresponding copies as well as the SSA_NAMES - in their PATTERN_DEF_SEQs and RELATED_STMTs. + to be updated to point to their corresponding copies. The data_reference's connections also need to be updated. Their corresponding dr_vec_info need to be reconnected to the EPILOGUE's - stmt_vec_infos, their statements need to point to their corresponding copy, - if they are gather loads or scatter stores then their reference needs to be - updated to point to its corresponding copy. */ + stmt_vec_infos, their statements need to point to their corresponding + copy. */ static void update_epilogue_loop_vinfo (class loop *epilogue, tree advance) { loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue); - auto_vec<gimple *> stmt_worklist; hash_map<tree,tree> mapping; gimple *orig_stmt, *new_stmt; gimple_stmt_iterator epilogue_gsi; @@ -11549,9 +10966,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) /* The EPILOGUE loop is a copy of the original loop so they share the same gimple UIDs. In this loop we update the loop_vec_info of the EPILOGUE to - point to the copied statements. We also create a mapping of all LHS' in - the original loop and all the LHS' in the EPILOGUE and create worklists to - update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs. */ + point to the copied statements. */ for (unsigned i = 0; i < epilogue->num_nodes; ++i) { for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]); @@ -11563,14 +10978,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1]; - orig_stmt = STMT_VINFO_STMT (stmt_vinfo); STMT_VINFO_STMT (stmt_vinfo) = new_stmt; - - mapping.put (gimple_phi_result (orig_stmt), - gimple_phi_result (new_stmt)); - /* PHI nodes can not have patterns or related statements. */ - gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL - && STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL); } for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]); @@ -11584,25 +10992,12 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1]; - orig_stmt = STMT_VINFO_STMT (stmt_vinfo); STMT_VINFO_STMT (stmt_vinfo) = new_stmt; - if (tree old_lhs = gimple_get_lhs (orig_stmt)) - mapping.put (old_lhs, gimple_get_lhs (new_stmt)); - - if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)) - { - gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo); - for (gimple_stmt_iterator gsi = gsi_start (seq); - !gsi_end_p (gsi); gsi_next (&gsi)) - stmt_worklist.safe_push (gsi_stmt (gsi)); - } - related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo); if (related_vinfo != NULL && related_vinfo != stmt_vinfo) { gimple *stmt = STMT_VINFO_STMT (related_vinfo); - stmt_worklist.safe_push (stmt); /* Set BB such that the assert in 'get_initial_defs_for_reduction' is able to determine that the BB of the related stmt is inside this loop. */ @@ -11615,33 +11010,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) } } - /* The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed - using the original main loop and thus need to be updated to refer to the - cloned variables used in the epilogue. */ - for (unsigned i = 0; i < stmt_worklist.length (); ++i) - { - gimple *stmt = stmt_worklist[i]; - tree *new_op; - - for (unsigned j = 1; j < gimple_num_ops (stmt); ++j) - { - tree op = gimple_op (stmt, j); - if ((new_op = mapping.get(op))) - gimple_set_op (stmt, j, *new_op); - else - { - /* PR92429: The last argument of simplify_replace_tree disables - folding when replacing arguments. This is required as - otherwise you might end up with different statements than the - ones analyzed in vect_loop_analyze, leading to different - vectorization. */ - op = simplify_replace_tree (op, NULL_TREE, NULL_TREE, - &find_in_mapping, &mapping, false); - gimple_set_op (stmt, j, op); - } - } - } - struct data_reference *dr; vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo); FOR_EACH_VEC_ELT (datarefs, i, dr) @@ -11649,27 +11017,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) orig_stmt = DR_STMT (dr); gcc_assert (gimple_uid (orig_stmt) > 0); stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (orig_stmt) - 1]; - /* Data references for gather loads and scatter stores do not use the - updated offset we set using ADVANCE. Instead we have to make sure the - reference in the data references point to the corresponding copy of - the original in the epilogue. Make sure to update both - gather/scatters recognized by dataref analysis and also other - refs that get_load_store_type classified as VMAT_GATHER_SCATTER. */ - auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo); - if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER - || STMT_VINFO_STRIDED_P (vstmt_vinfo) - || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo)) - { - /* ??? As we copy epilogues from the main loop incremental - replacement from an already replaced DR_REF from vectorizing - the first epilogue will fail. */ - DR_REF (dr) - = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE, - &find_in_mapping, &mapping); - DR_BASE_ADDRESS (dr) - = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE, - &find_in_mapping, &mapping); - } DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo); } @@ -11679,9 +11026,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) /* Remember the advancement made. */ LOOP_VINFO_DRS_ADVANCED_BY (epilogue_vinfo) = advance; - - epilogue_vinfo->shared->datarefs_copy.release (); - epilogue_vinfo->shared->save_datarefs (); } /* When vectorizing early break statements instructions that happen before @@ -11787,7 +11131,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) DUMP_VECT_SCOPE ("vec_transform_loop"); - loop_vinfo->shared->check_datarefs (); + if (! LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + loop_vinfo->shared->check_datarefs (); /* Use the more conservative vectorization threshold. If the number of iterations is constant assume the cost check has been performed |