aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc1635
1 files changed, 490 insertions, 1145 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2782d61..d623672 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "case-cfn-macros.h"
#include "langhooks.h"
+#include "opts.h"
/* Loop Vectorization Pass.
@@ -162,218 +163,6 @@ static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
bool *, bool *, bool);
-/* Subroutine of vect_determine_vf_for_stmt that handles only one
- statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
- may already be set for general statements (not just data refs). */
-
-static opt_result
-vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
- bool vectype_maybe_set_p,
- poly_uint64 *vf)
-{
- gimple *stmt = stmt_info->stmt;
-
- if ((!STMT_VINFO_RELEVANT_P (stmt_info)
- && !STMT_VINFO_LIVE_P (stmt_info))
- || gimple_clobber_p (stmt))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
- return opt_result::success ();
- }
-
- tree stmt_vectype, nunits_vectype;
- opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
- &stmt_vectype,
- &nunits_vectype);
- if (!res)
- return res;
-
- if (stmt_vectype)
- {
- if (STMT_VINFO_VECTYPE (stmt_info))
- /* The only case when a vectype had been already set is for stmts
- that contain a data ref, or for "pattern-stmts" (stmts generated
- by the vectorizer to represent/replace a certain idiom). */
- gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
- || vectype_maybe_set_p)
- && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
- else
- STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
- }
-
- if (nunits_vectype)
- vect_update_max_nunits (vf, nunits_vectype);
-
- return opt_result::success ();
-}
-
-/* Subroutine of vect_determine_vectorization_factor. Set the vector
- types of STMT_INFO and all attached pattern statements and update
- the vectorization factor VF accordingly. Return true on success
- or false if something prevented vectorization. */
-
-static opt_result
-vect_determine_vf_for_stmt (vec_info *vinfo,
- stmt_vec_info stmt_info, poly_uint64 *vf)
-{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
- stmt_info->stmt);
- opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
- if (!res)
- return res;
-
- if (STMT_VINFO_IN_PATTERN_P (stmt_info)
- && STMT_VINFO_RELATED_STMT (stmt_info))
- {
- gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
- stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
-
- /* If a pattern statement has def stmts, analyze them too. */
- for (gimple_stmt_iterator si = gsi_start (pattern_def_seq);
- !gsi_end_p (si); gsi_next (&si))
- {
- stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "==> examining pattern def stmt: %G",
- def_stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
- if (!res)
- return res;
- }
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "==> examining pattern statement: %G",
- stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
- if (!res)
- return res;
- }
-
- return opt_result::success ();
-}
-
-/* Function vect_determine_vectorization_factor
-
- Determine the vectorization factor (VF). VF is the number of data elements
- that are operated upon in parallel in a single iteration of the vectorized
- loop. For example, when vectorizing a loop that operates on 4byte elements,
- on a target with vector size (VS) 16byte, the VF is set to 4, since 4
- elements can fit in a single vector register.
-
- We currently support vectorization of loops in which all types operated upon
- are of the same size. Therefore this function currently sets VF according to
- the size of the types operated upon, and fails if there are multiple sizes
- in the loop.
-
- VF is also the factor by which the loop iterations are strip-mined, e.g.:
- original loop:
- for (i=0; i<N; i++){
- a[i] = b[i] + c[i];
- }
-
- vectorized loop:
- for (i=0; i<N; i+=VF){
- a[i:VF] = b[i:VF] + c[i:VF];
- }
-*/
-
-static opt_result
-vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- unsigned nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor = 1;
- tree scalar_type = NULL_TREE;
- gphi *phi;
- tree vectype;
- stmt_vec_info stmt_info;
- unsigned i;
-
- DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
-
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
-
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- phi = si.phi ();
- stmt_info = loop_vinfo->lookup_stmt (phi);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
- (gimple *) phi);
-
- gcc_assert (stmt_info);
-
- if (STMT_VINFO_RELEVANT_P (stmt_info)
- || STMT_VINFO_LIVE_P (stmt_info))
- {
- gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
- scalar_type = TREE_TYPE (PHI_RESULT (phi));
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "get vectype for scalar type: %T\n",
- scalar_type);
-
- vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
- if (!vectype)
- return opt_result::failure_at (phi,
- "not vectorized: unsupported "
- "data-type %T\n",
- scalar_type);
- STMT_VINFO_VECTYPE (stmt_info) = vectype;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
- vectype);
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
- dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
- dump_printf (MSG_NOTE, "\n");
- }
-
- vect_update_max_nunits (&vectorization_factor, vectype);
- }
- }
-
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- if (is_gimple_debug (gsi_stmt (si)))
- continue;
- stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- opt_result res
- = vect_determine_vf_for_stmt (loop_vinfo,
- stmt_info, &vectorization_factor);
- if (!res)
- return res;
- }
- }
-
- /* TODO: Analyze cost. Decide if worth while to vectorize. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, "\n");
- }
-
- if (known_le (vectorization_factor, 1U))
- return opt_result::failure_at (vect_location,
- "not vectorized: unsupported data-type\n");
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- return opt_result::success ();
-}
-
/* Function vect_is_simple_iv_evolution.
@@ -381,8 +170,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
considered a polynomial evolution. */
static bool
-vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
- tree * step)
+vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn,
+ stmt_vec_info stmt_info)
{
tree init_expr;
tree step_expr;
@@ -406,8 +195,8 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
step_expr, init_expr);
- *init = init_expr;
- *step = step_expr;
+ STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = init_expr;
+ STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = step_expr;
if (TREE_CODE (step_expr) != INTEGER_CST
&& (TREE_CODE (step_expr) != SSA_NAME
@@ -438,7 +227,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
For neg induction, return a fake step as integer -1. */
static bool
vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
- gphi* loop_phi_node, tree *init, tree *step)
+ gphi* loop_phi_node)
{
tree init_expr, ev_expr, result, op1, op2;
gimple* def;
@@ -453,7 +242,6 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr)))
return false;
- *init = init_expr;
result = PHI_RESULT (loop_phi_node);
if (TREE_CODE (ev_expr) != SSA_NAME
@@ -462,12 +250,13 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
return false;
enum tree_code t_code = gimple_assign_rhs_code (def);
+ tree step;
switch (t_code)
{
case NEGATE_EXPR:
if (gimple_assign_rhs1 (def) != result)
return false;
- *step = build_int_cst (TREE_TYPE (init_expr), -1);
+ step = build_int_cst (TREE_TYPE (init_expr), -1);
STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_neg;
break;
@@ -479,7 +268,7 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
if (TREE_CODE (op2) != INTEGER_CST
|| op1 != result)
return false;
- *step = op2;
+ step = op2;
if (t_code == LSHIFT_EXPR)
STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shl;
else if (t_code == RSHIFT_EXPR)
@@ -493,8 +282,8 @@ vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
return false;
}
- STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = *init;
- STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = *step;
+ STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = init_expr;
+ STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = step;
return true;
}
@@ -589,7 +378,6 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
bool slp)
{
basic_block bb = loop->header;
- tree init, step;
auto_vec<stmt_vec_info, 64> worklist;
gphi_iterator gsi;
bool double_reduc, reduc_chain;
@@ -619,28 +407,21 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
/* Analyze the evolution function. */
access_fn = analyze_scalar_evolution (loop, def);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Access function of PHI: %T\n", access_fn);
if (access_fn)
- {
- STRIP_NOPS (access_fn);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Access function of PHI: %T\n", access_fn);
- STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
- = initial_condition_in_loop_num (access_fn, loop->num);
- STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
- = evolution_part_in_loop_num (access_fn, loop->num);
- }
+ STRIP_NOPS (access_fn);
if ((!access_fn
|| vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
- || !vect_is_simple_iv_evolution (loop->num, access_fn,
- &init, &step)
+ || !vect_is_simple_iv_evolution (loop->num, access_fn, stmt_vinfo)
|| (LOOP_VINFO_LOOP (loop_vinfo) != loop
- && TREE_CODE (step) != INTEGER_CST))
+ && (TREE_CODE (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo))
+ != INTEGER_CST)))
/* Only handle nonlinear iv for same loop. */
&& (LOOP_VINFO_LOOP (loop_vinfo) != loop
- || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo,
- phi, &init, &step)))
+ || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo, phi)))
{
worklist.safe_push (stmt_vinfo);
continue;
@@ -1057,6 +838,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
unaligned_dr (NULL),
peeling_for_alignment (0),
ptr_mask (0),
+ max_spec_read_amount (0),
nonlinear_iv (false),
ivexpr_map (NULL),
scan_map (NULL),
@@ -1165,6 +947,8 @@ _loop_vec_info::~_loop_vec_info ()
delete scan_map;
delete scalar_costs;
delete vector_costs;
+ for (auto reduc_info : reduc_infos)
+ delete reduc_info;
/* When we release an epiloge vinfo that we do not intend to use
avoid clearing AUX of the main loop which should continue to
@@ -1939,6 +1723,17 @@ vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call,
}
}
+ if (!integer_onep (info->assumptions))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Loop to be versioned with niter assumption ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, info->assumptions);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+
return opt_result::success ();
}
@@ -1967,7 +1762,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
for (gcond *cond : info->conds)
{
stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond);
- STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
/* Mark the statement as a condition. */
STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def;
}
@@ -1984,9 +1778,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
if (info->inner_loop_cond)
{
- stmt_vec_info inner_loop_cond_info
- = loop_vinfo->lookup_stmt (info->inner_loop_cond);
- STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type;
/* If we have an estimate on the number of iterations of the inner
loop use that to limit the scale for costing, otherwise use
--param vect-inner-loop-cost-factor literally. */
@@ -2001,234 +1792,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
-/* Scan the loop stmts and dependent on whether there are any (non-)SLP
- statements update the vectorization factor. */
-
-static void
-vect_update_vf_for_slp (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor;
- int i;
-
- DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
-
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- gcc_assert (known_ne (vectorization_factor, 0U));
-
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
- the SLP instances. If that unrolling factor is 1, we say, that we
- perform pure SLP on loop - cross iteration parallelism is not
- exploited. */
- bool only_slp_in_loop = true;
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
- if (!stmt_info)
- continue;
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- if (is_gimple_debug (gsi_stmt (si)))
- continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- stmt_info = vect_stmt_to_vectorize (stmt_info);
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- }
-
- if (only_slp_in_loop)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains only SLP stmts\n");
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
- /* Both the vectorization factor and unroll factor have the form
- GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
- so they must have a common multiple. */
- vectorization_factor
- = force_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
- }
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, ".\n");
- }
-}
-
-/* Return true if STMT_INFO describes a double reduction phi and if
- the other phi in the reduction is also relevant for vectorization.
- This rejects cases such as:
-
- outer1:
- x_1 = PHI <x_3(outer2), ...>;
- ...
-
- inner:
- x_2 = ...;
- ...
-
- outer2:
- x_3 = PHI <x_2(inner)>;
-
- if nothing in x_2 or elsewhere makes x_1 relevant. */
-
-static bool
-vect_active_double_reduction_p (stmt_vec_info stmt_info)
-{
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
- return false;
-
- return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
-}
-
-/* Function vect_analyze_loop_operations.
-
- Scan the loop stmts and make sure they are all vectorizable. */
-
-static opt_result
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- int i;
- stmt_vec_info stmt_info;
-
- DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
-
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
-
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gphi *phi = si.phi ();
-
- stmt_info = loop_vinfo->lookup_stmt (phi);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
- (gimple *) phi);
- if (virtual_operand_p (gimple_phi_result (phi)))
- continue;
-
- /* ??? All of the below unconditional FAILs should be in
- done earlier after analyzing cycles, possibly when
- determining stmt relevancy? */
-
- /* Inner-loop loop-closed exit phi in outer-loop vectorization
- (i.e., a phi in the tail of the outer-loop). */
- if (! is_loop_header_bb_p (bb))
- {
- /* FORNOW: we currently don't support the case that these phis
- are not used in the outerloop (unless it is double reduction,
- i.e., this phi is vect_reduction_def), cause this case
- requires to actually do something here. */
- if (STMT_VINFO_LIVE_P (stmt_info)
- && !vect_active_double_reduction_p (stmt_info))
- return opt_result::failure_at (phi,
- "Unsupported loop-closed phi"
- " in outer-loop.\n");
-
- /* If PHI is used in the outer loop, we check that its operand
- is defined in the inner loop. */
- if (STMT_VINFO_RELEVANT_P (stmt_info))
- {
- tree phi_op;
-
- if (gimple_phi_num_args (phi) != 1)
- return opt_result::failure_at (phi, "unsupported phi");
-
- phi_op = PHI_ARG_DEF (phi, 0);
- stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
- if (!op_def_info)
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
- && (STMT_VINFO_RELEVANT (op_def_info)
- != vect_used_in_outer_by_reduction))
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
- || (STMT_VINFO_DEF_TYPE (stmt_info)
- == vect_double_reduction_def))
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi, "unsupported phi\n");
- }
-
- continue;
- }
-
- gcc_assert (stmt_info);
-
- if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
- || STMT_VINFO_LIVE_P (stmt_info))
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
- /* A scalar-dependence cycle that we don't support. */
- return opt_result::failure_at (phi,
- "not vectorized:"
- " scalar dependence cycle.\n");
-
- if (STMT_VINFO_RELEVANT_P (stmt_info)
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi,
- "not vectorized: relevant phi not "
- "supported: %G",
- static_cast <gimple *> (phi));
- }
-
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gimple *stmt = gsi_stmt (si);
- if (!gimple_clobber_p (stmt)
- && !is_gimple_debug (stmt))
- {
- bool need_to_vectorize = false;
- opt_result res
- = vect_analyze_stmt (loop_vinfo,
- loop_vinfo->lookup_stmt (stmt),
- &need_to_vectorize,
- NULL, NULL, NULL);
- if (!res)
- return res;
- }
- }
- } /* bbs */
-
- return opt_result::success ();
-}
-
/* Return true if we know that the iteration count is smaller than the
vectorization factor. Return false if it isn't, or if we can't be sure
either way. */
@@ -2529,78 +2092,6 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
return opt_result::success ();
}
-/* Look for SLP-only access groups and turn each individual access into its own
- group. */
-static void
-vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
-{
- unsigned int i;
- struct data_reference *dr;
-
- DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
-
- vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- gcc_assert (DR_REF (dr));
- stmt_vec_info stmt_info
- = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (DR_STMT (dr)));
-
- /* Check if the load is a part of an interleaving chain. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
- dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element);
- unsigned int group_size = DR_GROUP_SIZE (first_element);
-
- /* Check if SLP-only groups. */
- if (!STMT_SLP_TYPE (stmt_info)
- && STMT_VINFO_SLP_VECT_ONLY (first_element))
- {
- /* Dissolve the group. */
- STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
-
- stmt_vec_info vinfo = first_element;
- while (vinfo)
- {
- stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
- DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
- DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- DR_GROUP_SIZE (vinfo) = 1;
- if (STMT_VINFO_STRIDED_P (first_element)
- /* We cannot handle stores with gaps. */
- || DR_IS_WRITE (dr_info->dr))
- {
- STMT_VINFO_STRIDED_P (vinfo) = true;
- DR_GROUP_GAP (vinfo) = 0;
- }
- else
- DR_GROUP_GAP (vinfo) = group_size - 1;
- /* Duplicate and adjust alignment info, it needs to
- be present on each group leader, see dr_misalignment. */
- if (vinfo != first_element)
- {
- dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo);
- dr_info2->target_alignment = dr_info->target_alignment;
- int misalignment = dr_info->misalignment;
- if (misalignment != DR_MISALIGNMENT_UNKNOWN)
- {
- HOST_WIDE_INT diff
- = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))
- - TREE_INT_CST_LOW (DR_INIT (dr_info->dr)));
- unsigned HOST_WIDE_INT align_c
- = dr_info->target_alignment.to_constant ();
- misalignment = (misalignment + diff) % align_c;
- }
- dr_info2->misalignment = misalignment;
- }
- vinfo = next;
- }
- }
- }
- }
-}
-
/* Determine if operating on full vectors for LOOP_VINFO might leave
some scalar iterations still to do. If so, decide how we should
handle those scalar iterations. The possibilities are:
@@ -2710,18 +2201,17 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
indicates if some analysis meets fatal error. If one non-NULL pointer
SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one
worked out suggested unroll factor, while one NULL pointer shows it's
- going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF
- is to hold the slp decision when the suggested unroll factor is worked
- out. */
+ going to apply the suggested unroll factor.
+ SINGLE_LANE_SLP_DONE_FOR_SUGGESTED_UF is to hold whether single-lane
+ slp was forced when the suggested unroll factor was worked out. */
static opt_result
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
unsigned *suggested_unroll_factor,
- unsigned& slp_done_for_suggested_uf)
+ bool& single_lane_slp_done_for_suggested_uf)
{
opt_result ok = opt_result::success ();
int res;
unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
- poly_uint64 min_vf = 2;
loop_vec_info orig_loop_vinfo = NULL;
/* If we are dealing with an epilogue then orig_loop_vinfo points to the
@@ -2768,7 +2258,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
/* Analyze the data references and also adjust the minimal
vectorization factor according to the loads and stores. */
- ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
+ ok = vect_analyze_data_refs (loop_vinfo, &fatal);
if (!ok)
{
if (dump_enabled_p ())
@@ -2781,14 +2271,14 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1;
gcc_assert (!applying_suggested_uf || !suggested_unroll_factor);
- /* If the slp decision is false when suggested unroll factor is worked
- out, and we are applying suggested unroll factor, we can simply skip
- all slp related analyses this time. */
- unsigned slp = !applying_suggested_uf ? 2 : slp_done_for_suggested_uf;
+ /* When single-lane SLP was forced and we are applying suggested unroll
+ factor, keep that decision here. */
+ bool force_single_lane = (applying_suggested_uf
+ && single_lane_slp_done_for_suggested_uf);
/* Classify all cross-iteration scalar data-flow cycles.
Cross-iteration cycles caused by virtual phis are analyzed separately. */
- vect_analyze_scalar_cycles (loop_vinfo, slp == 2);
+ vect_analyze_scalar_cycles (loop_vinfo, !force_single_lane);
vect_pattern_recog (loop_vinfo);
@@ -2833,24 +2323,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
"bad data dependence.\n");
return ok;
}
- if (max_vf != MAX_VECTORIZATION_FACTOR
- && maybe_lt (max_vf, min_vf))
- return opt_result::failure_at (vect_location, "bad data dependence.\n");
LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
- ok = vect_determine_vectorization_factor (loop_vinfo);
- if (!ok)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine vectorization factor.\n");
- return ok;
- }
-
/* Compute the scalar iteration cost. */
vect_compute_single_scalar_iteration_cost (loop_vinfo);
- poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool saved_can_use_partial_vectors_p
= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
@@ -2861,26 +2338,33 @@ start_over:
/* Check the SLP opportunities in the loop, analyze and build
SLP trees. */
ok = vect_analyze_slp (loop_vinfo, loop_vinfo->stmt_vec_infos.length (),
- slp == 1);
+ force_single_lane);
if (!ok)
return ok;
/* If there are any SLP instances mark them as pure_slp. */
- if (vect_make_slp_decision (loop_vinfo))
- {
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
+ if (!vect_make_slp_decision (loop_vinfo))
+ return opt_result::failure_at (vect_location, "no stmts to vectorize.\n");
- /* Update the vectorization factor based on the SLP decision. */
- vect_update_vf_for_slp (loop_vinfo);
-
- /* Optimize the SLP graph with the vectorization factor fixed. */
- vect_optimize_slp (loop_vinfo);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "Loop contains only SLP stmts\n");
- /* Gather the loads reachable from the SLP graph entries. */
- vect_gather_slp_loads (loop_vinfo);
+ /* Determine the vectorization factor from the SLP decision. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
+ dump_dec (MSG_NOTE, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ dump_printf (MSG_NOTE, "\n");
}
+ /* Optimize the SLP graph with the vectorization factor fixed. */
+ vect_optimize_slp (loop_vinfo);
+
+ /* Gather the loads reachable from the SLP graph entries. */
+ vect_gather_slp_loads (loop_vinfo);
+
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
@@ -2949,19 +2433,6 @@ start_over:
goto again;
}
- /* Dissolve SLP-only groups. */
- vect_dissolve_slp_only_groups (loop_vinfo);
-
- /* Scan all the remaining operations in the loop that we did not catch
- during SLP build and make sure we fail. */
- ok = vect_analyze_loop_operations (loop_vinfo);
- if (!ok)
- {
- ok = opt_result::failure_at (vect_location,
- "bad operation or unsupported loop bound\n");
- goto again;
- }
-
/* For now, we don't expect to mix both masking and length approaches for one
loop, disable it if both are recorded. */
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
@@ -3202,7 +2673,7 @@ start_over:
gcc_assert (known_eq (vectorization_factor,
LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
- slp_done_for_suggested_uf = slp;
+ single_lane_slp_done_for_suggested_uf = force_single_lane;
/* Ok to vectorize! */
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
@@ -3213,7 +2684,7 @@ again:
gcc_assert (!ok);
/* Try again with single-lane SLP. */
- if (slp == 1)
+ if (force_single_lane)
return ok;
/* If we are applying suggested unroll factor, we don't need to
@@ -3241,7 +2712,7 @@ again:
continue;
vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
unsigned int size = DR_GROUP_SIZE (vinfo);
- tree vectype = STMT_VINFO_VECTYPE (vinfo);
+ tree vectype = SLP_TREE_VECTYPE (SLP_INSTANCE_TREE (instance));
if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST
&& ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
&& ! vect_grouped_store_supported (vectype, size))
@@ -3255,7 +2726,7 @@ again:
vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
size = DR_GROUP_SIZE (vinfo);
- vectype = STMT_VINFO_VECTYPE (vinfo);
+ vectype = SLP_TREE_VECTYPE (node);
if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
&& ! vect_grouped_load_supported (vectype, single_element_p,
size))
@@ -3266,13 +2737,13 @@ again:
}
/* Roll back state appropriately. Force single-lane SLP this time. */
- slp = 1;
+ force_single_lane = true;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"re-trying with single-lane SLP\n");
- /* Restore vectorization factor as it were without SLP. */
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
+ /* Reset the vectorization factor. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = 0;
/* Free the SLP instances. */
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
vect_free_slp_instance (instance);
@@ -3285,7 +2756,7 @@ again:
!gsi_end_p (si); gsi_next (&si))
{
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- STMT_SLP_TYPE (stmt_info) = loop_vect;
+ STMT_SLP_TYPE (stmt_info) = not_vect;
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
{
@@ -3304,7 +2775,7 @@ again:
if (is_gimple_debug (gsi_stmt (si)))
continue;
stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- STMT_SLP_TYPE (stmt_info) = loop_vect;
+ STMT_SLP_TYPE (stmt_info) = not_vect;
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
{
stmt_vec_info pattern_stmt_info
@@ -3313,11 +2784,11 @@ again:
STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
- STMT_SLP_TYPE (pattern_stmt_info) = loop_vect;
+ STMT_SLP_TYPE (pattern_stmt_info) = not_vect;
for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq);
!gsi_end_p (pi); gsi_next (&pi))
STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
- = loop_vect;
+ = not_vect;
}
}
}
@@ -3400,8 +2871,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
}
/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is
- not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
- MODE_I to the next mode useful to analyze.
+ not NULL. When MASKED_P is not -1 override the default
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
+ Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next
+ mode useful to analyze.
Return the loop_vinfo on success and wrapped null on failure. */
static opt_loop_vec_info
@@ -3409,6 +2882,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
const vect_loop_form_info *loop_form_info,
loop_vec_info orig_loop_vinfo,
const vector_modes &vector_modes, unsigned &mode_i,
+ int masked_p,
machine_mode &autodetected_vector_mode,
bool &fatal)
{
@@ -3417,13 +2891,15 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
machine_mode vector_mode = vector_modes[mode_i];
loop_vinfo->vector_mode = vector_mode;
+ if (masked_p != -1)
+ loop_vinfo->can_use_partial_vectors_p = masked_p;
unsigned int suggested_unroll_factor = 1;
- unsigned slp_done_for_suggested_uf = 0;
+ bool single_lane_slp_done_for_suggested_uf = false;
/* Run the main analysis. */
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
&suggested_unroll_factor,
- slp_done_for_suggested_uf);
+ single_lane_slp_done_for_suggested_uf);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"***** Analysis %s with vector mode %s\n",
@@ -3455,16 +2931,17 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"***** Re-trying analysis for unrolling"
- " with unroll factor %d and slp %s.\n",
+ " with unroll factor %d and %s slp.\n",
suggested_unroll_factor,
- slp_done_for_suggested_uf ? "on" : "off");
+ single_lane_slp_done_for_suggested_uf
+ ? "single-lane" : "");
loop_vec_info unroll_vinfo
= vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
unroll_vinfo->vector_mode = vector_mode;
unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
opt_result new_res
= vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
- slp_done_for_suggested_uf);
+ single_lane_slp_done_for_suggested_uf);
if (new_res)
{
delete loop_vinfo;
@@ -3600,7 +3077,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
cached_vf_per_mode[last_mode_i] = -1;
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
- NULL, vector_modes, mode_i,
+ NULL, vector_modes, mode_i, -1,
autodetected_vector_mode, fatal);
if (fatal)
break;
@@ -3685,18 +3162,21 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
array may contain length-agnostic and length-specific modes. Their
ordering is not guaranteed, so we could end up picking a mode for the main
loop that is after the epilogue's optimal mode. */
+ int masked_p = -1;
if (!unlimited_cost_model (loop)
- && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != VOIDmode)
+ && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
+ != VOIDmode))
{
vector_modes[0]
- = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+ = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
cached_vf_per_mode[0] = 0;
}
else
vector_modes[0] = autodetected_vector_mode;
mode_i = 0;
- bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
+ bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
+ || masked_p == 1);
machine_mode mask_mode;
if (supports_partial_vectors
&& !partial_vectors_supported_p ()
@@ -3710,6 +3190,10 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
loop_vec_info orig_loop_vinfo = first_loop_vinfo;
do
{
+ /* Let the user override what the target suggests. */
+ if (OPTION_SET_P (param_vect_partial_vector_usage))
+ masked_p = -1;
+
while (1)
{
/* If the target does not support partial vectors we can shorten the
@@ -3750,7 +3234,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
orig_loop_vinfo,
- vector_modes, mode_i,
+ vector_modes, mode_i, masked_p,
autodetected_vector_mode, fatal);
if (fatal)
break;
@@ -3781,6 +3265,9 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
break;
}
+ /* Revert back to the default from the suggested prefered
+ epilogue vectorization mode. */
+ masked_p = -1;
if (mode_i == vector_modes.length ())
break;
}
@@ -3791,13 +3278,14 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
/* When we selected a first vectorized epilogue, see if the target
suggests to have another one. */
+ masked_p = -1;
if (!unlimited_cost_model (loop)
&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo)
- && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
+ && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
!= VOIDmode))
{
vector_modes[0]
- = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+ = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
cached_vf_per_mode[0] = 0;
mode_i = 0;
}
@@ -5303,8 +4791,9 @@ have_whole_vector_shift (machine_mode mode)
See vect_emulate_mixed_dot_prod for the actual sequence used. */
static bool
-vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
+vect_is_emulated_mixed_dot_prod (slp_tree slp_node)
{
+ stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (slp_node);
gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
if (!assign || gimple_assign_rhs_code (assign) != DOT_PROD_EXPR)
return false;
@@ -5314,10 +4803,10 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
if (TYPE_SIGN (TREE_TYPE (rhs1)) == TYPE_SIGN (TREE_TYPE (rhs2)))
return false;
- gcc_assert (STMT_VINFO_REDUC_VECTYPE_IN (stmt_info));
return !directly_supported_p (DOT_PROD_EXPR,
- STMT_VINFO_VECTYPE (stmt_info),
- STMT_VINFO_REDUC_VECTYPE_IN (stmt_info),
+ SLP_TREE_VECTYPE (slp_node),
+ SLP_TREE_VECTYPE
+ (SLP_TREE_CHILDREN (slp_node)[0]),
optab_vector_mixed_sign);
}
@@ -5332,7 +4821,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
static void
vect_model_reduction_cost (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, internal_fn reduc_fn,
+ slp_tree node, internal_fn reduc_fn,
vect_reduction_type reduction_type,
int ncopies, stmt_vector_for_cost *cost_vec)
{
@@ -5348,9 +4837,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduction_type == COND_REDUCTION)
ncopies *= 2;
- vectype = STMT_VINFO_VECTYPE (stmt_info);
+ vectype = SLP_TREE_VECTYPE (node);
mode = TYPE_MODE (vectype);
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+ stmt_vec_info orig_stmt_info
+ = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
gimple_match_op op;
if (!gimple_extract_op (orig_stmt_info->stmt, &op))
@@ -5368,16 +4858,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduc_fn != IFN_LAST)
/* Count one reduction-like operation per vector. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
- stmt_info, 0, vect_body);
+ node, 0, vect_body);
else
{
/* Use NELEMENTS extracts and NELEMENTS scalar ops. */
unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
inside_cost = record_stmt_cost (cost_vec, nelements,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_body);
inside_cost += record_stmt_cost (cost_vec, nelements,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_body);
}
}
@@ -5394,7 +4884,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* We need the initial reduction value. */
prologue_stmts = 1;
prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_prologue);
}
@@ -5411,24 +4901,24 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
{
/* An EQ stmt and an COND_EXPR stmt. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
/* Reduction of the max index and a reduction of the found
values. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* A broadcast of the max value. */
epilogue_cost += record_stmt_cost (cost_vec, 1,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_epilogue);
}
else
{
epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_epilogue);
+ node, 0, vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
}
@@ -5438,12 +4928,12 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* Extraction of scalar elements. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits - 3,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_epilogue);
}
else if (reduction_type == EXTRACT_LAST_REDUCTION
@@ -5469,10 +4959,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
Also requires scalar extract. */
epilogue_cost += record_stmt_cost (cost_vec,
exact_log2 (nelements) * 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
else
@@ -5480,7 +4970,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
elements, we have N extracts and N-1 reduction ops. */
epilogue_cost += record_stmt_cost (cost_vec,
nelements + nelements - 1,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
}
}
@@ -5497,9 +4987,9 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
static void
vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
- stmt_vec_info reduc_info, gimple *seq)
+ vect_reduc_info reduc_info, gimple *seq)
{
- if (reduc_info->reused_accumulator)
+ if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info))
{
/* When reusing an accumulator from the main loop, we only need
initialization instructions if the main loop can be skipped.
@@ -5527,15 +5017,15 @@ vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
static void
get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
- stmt_vec_info reduc_info,
+ vect_reduc_info reduc_info,
+ tree vector_type,
vec<tree> *vec_oprnds,
unsigned int number_of_vectors,
unsigned int group_size, tree neutral_op)
{
- vec<tree> &initial_values = reduc_info->reduc_initial_values;
+ vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info);
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
- tree vector_type = STMT_VINFO_VECTYPE (reduc_info);
unsigned int i;
gcc_assert (group_size == initial_values.length () || neutral_op);
@@ -5549,7 +5039,7 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
will be 2).
- If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
+ If GROUP_SIZE > NUNITS, the scalars will be split into several
vectors containing the operands.
For example, NUNITS is four as before, and the group size is 8
@@ -5639,30 +5129,12 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq);
}
-/* For a statement STMT_INFO taking part in a reduction operation return
- the stmt_vec_info the meta information is stored on. */
-
-stmt_vec_info
-info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
+vect_reduc_info
+info_for_reduction (loop_vec_info loop_vinfo, slp_tree node)
{
- stmt_info = vect_orig_stmt (stmt_info);
- gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
- if (!is_a <gphi *> (stmt_info->stmt)
- || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
- gphi *phi = as_a <gphi *> (stmt_info->stmt);
- if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
- {
- if (gimple_phi_num_args (phi) == 1)
- stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
- }
- else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
- {
- stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi));
- if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
- stmt_info = info;
- }
- return stmt_info;
+ if (node->cycle_info.id == -1)
+ return NULL;
+ return loop_vinfo->reduc_infos[node->cycle_info.id];
}
/* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that
@@ -5671,16 +5143,16 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
static bool
vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
- stmt_vec_info reduc_info)
+ vect_reduc_info reduc_info, tree vectype)
{
loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
if (!main_loop_vinfo)
return false;
- if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION)
+ if (VECT_REDUC_INFO_TYPE (reduc_info) != TREE_CODE_REDUCTION)
return false;
- unsigned int num_phis = reduc_info->reduc_initial_values.length ();
+ unsigned int num_phis = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).length ();
auto_vec<tree, 16> main_loop_results (num_phis);
auto_vec<tree, 16> initial_values (num_phis);
if (edge main_loop_edge = loop_vinfo->main_loop_edge)
@@ -5688,7 +5160,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
/* The epilogue loop can be entered either from the main loop or
from an earlier guard block. */
edge skip_edge = loop_vinfo->skip_main_loop_edge;
- for (tree incoming_value : reduc_info->reduc_initial_values)
+ for (tree incoming_value : VECT_REDUC_INFO_INITIAL_VALUES (reduc_info))
{
/* Look for:
@@ -5708,19 +5180,18 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
}
else
/* The main loop dominates the epilogue loop. */
- main_loop_results.splice (reduc_info->reduc_initial_values);
+ main_loop_results.splice (VECT_REDUC_INFO_INITIAL_VALUES (reduc_info));
/* See if the main loop has the kind of accumulator we need. */
vect_reusable_accumulator *accumulator
= main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]);
if (!accumulator
- || num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
+ || num_phis != VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).length ()
|| !std::equal (main_loop_results.begin (), main_loop_results.end (),
- accumulator->reduc_info->reduc_scalar_results.begin ()))
+ VECT_REDUC_INFO_SCALAR_RESULTS (accumulator->reduc_info).begin ()))
return false;
/* Handle the case where we can reduce wider vectors to narrower ones. */
- tree vectype = STMT_VINFO_VECTYPE (reduc_info);
tree old_vectype = TREE_TYPE (accumulator->reduc_input);
unsigned HOST_WIDE_INT m;
if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
@@ -5735,7 +5206,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
tree intermediate_vectype = get_related_vectype_for_scalar_type
(TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits);
if (!intermediate_vectype
- || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
+ || !directly_supported_p (VECT_REDUC_INFO_CODE (reduc_info),
intermediate_vectype)
|| !can_vec_extract (TYPE_MODE (prev_vectype),
TYPE_MODE (intermediate_vectype)))
@@ -5754,7 +5225,7 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
to select the correct adjustment, but in practice that shouldn't be
necessary.) */
tree main_adjustment
- = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info);
+ = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (accumulator->reduc_info);
if (loop_vinfo->main_loop_edge && main_adjustment)
{
gcc_assert (num_phis == 1);
@@ -5763,14 +5234,14 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
initialize the accumulator with a neutral value instead. */
if (!operand_equal_p (initial_value, main_adjustment))
return false;
- code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
+ code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value),
code, initial_value);
}
- STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment;
- reduc_info->reduc_initial_values.truncate (0);
- reduc_info->reduc_initial_values.splice (initial_values);
- reduc_info->reused_accumulator = accumulator;
+ VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment;
+ VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).truncate (0);
+ VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).splice (initial_values);
+ VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info) = accumulator;
return true;
}
@@ -5923,8 +5394,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
slp_instance slp_node_instance,
edge loop_exit)
{
- stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
- gcc_assert (reduc_info->is_reduc_info);
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
/* For double reductions we need to get at the inner loop reduction
stmt which has the meta info attached. Our stmt_info is that of the
loop-closed PHI of the inner loop which we remember as
@@ -5937,8 +5407,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
(stmt_info->stmt, 0));
stmt_info = vect_stmt_to_vectorize (stmt_info);
}
- code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
- internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
+ code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
+ internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info);
tree vectype;
machine_mode mode;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
@@ -5958,16 +5428,15 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gimple *use_stmt;
auto_vec<tree> reduc_inputs;
int j, i;
- vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
+ vec<tree> &scalar_results = VECT_REDUC_INFO_SCALAR_RESULTS (reduc_info);
unsigned int k;
/* SLP reduction without reduction chain, e.g.,
# a1 = phi <a2, a0>
# b1 = phi <b2, b0>
a2 = operation (a1)
b2 = operation (b1) */
- bool slp_reduc
- = !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info));
- bool direct_slp_reduc;
+ const bool slp_reduc
+ = SLP_INSTANCE_KIND (slp_node_instance) != slp_inst_kind_reduc_chain;
tree induction_index = NULL_TREE;
unsigned int group_size = SLP_TREE_LANES (slp_node);
@@ -5979,7 +5448,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gcc_assert (double_reduc);
}
- vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
+ vectype = VECT_REDUC_INFO_VECTYPE (reduc_info);
gcc_assert (vectype);
mode = TYPE_MODE (vectype);
@@ -5987,12 +5456,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
tree adjustment_def = NULL;
/* Optimize: for induction condition reduction, if we can't use zero
for induc_val, use initial_def. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
- induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
+ if (VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
+ induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info);
else if (double_reduc)
;
else
- adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
+ adjustment_def = VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info);
stmt_vec_info single_live_out_stmt[] = { stmt_info };
array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
@@ -6013,14 +5482,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
PR92772: This algorithm is broken for architectures that support
masked vectors, but do not provide fold_extract_last. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
+ if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION)
{
auto_vec<std::pair<tree, bool>, 2> ccompares;
slp_tree cond_node = slp_node_instance->root;
while (cond_node != slp_node_instance->reduc_phis)
{
stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
- int slp_reduc_idx;
if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
{
gimple *vec_stmt
@@ -6028,16 +5496,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
ccompares.safe_push
(std::make_pair (gimple_assign_rhs1 (vec_stmt),
- STMT_VINFO_REDUC_IDX (cond_info) == 2));
- /* ??? We probably want to have REDUC_IDX on the SLP node?
- We have both three and four children COND_EXPR nodes
- dependent on whether the comparison is still embedded
- as GENERIC. So work backwards. */
- slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3
- + STMT_VINFO_REDUC_IDX (cond_info));
+ SLP_TREE_REDUC_IDX (cond_node) == 2));
}
- else
- slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info);
+ int slp_reduc_idx = SLP_TREE_REDUC_IDX (cond_node);
cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx];
}
gcc_assert (ccompares.length () != 0);
@@ -6193,9 +5654,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* True if we should implement SLP_REDUC using native reduction operations
instead of scalar operations. */
- direct_slp_reduc = (reduc_fn != IFN_LAST
- && slp_reduc
- && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
+ const bool direct_slp_reduc
+ = (reduc_fn != IFN_LAST
+ && slp_reduc
+ && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
/* In case of reduction chain, e.g.,
# a1 = phi <a3, a0>
@@ -6216,7 +5678,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
a multiple of the SLP group size.
The same is true if we couldn't use a single defuse cycle. */
- if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))
+ if (!slp_reduc
|| direct_slp_reduc
|| (slp_reduc
&& constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)))
@@ -6243,7 +5705,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
the minor(?) benefit of making the epilogue loop's scalar result
independent of the main loop's scalar result. */
bool unify_with_main_loop_p = false;
- if (reduc_info->reused_accumulator
+ if (VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
&& loop_vinfo->skip_this_loop_edge
&& single_succ_p (exit_bb)
&& single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest)
@@ -6255,7 +5717,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block);
add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb),
UNKNOWN_LOCATION);
- add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input,
+ add_phi_arg (new_phi,
+ VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)->reduc_input,
loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION);
exit_gsi = gsi_after_labels (reduc_block);
}
@@ -6263,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* Shouldn't be used beyond this point. */
exit_bb = nullptr;
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
+ if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn != IFN_LAST)
{
/* For condition reductions, we have a vector (REDUC_INPUTS 0) containing
@@ -6369,7 +5832,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
scalar_results.safe_push (new_temp);
}
- else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
+ else if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn == IFN_LAST)
{
/* Condition reduction without supported IFN_REDUC_MAX. Generate
@@ -6462,7 +5925,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
- if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
+ if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
&& induc_val)
{
/* Earlier we set the initial value to be a vector if induc_val
@@ -6473,7 +5936,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR,
new_temp, induc_val);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- tree initial_def = reduc_info->reduc_initial_values[0];
+ tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0];
tmp = make_ssa_name (new_scalar_dest);
epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
initial_def, new_temp);
@@ -6485,7 +5948,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
}
else if (direct_slp_reduc)
{
- /* Here we create one vector for each of the REDUC_GROUP_SIZE results,
+ /* Here we create one vector for each of the GROUP_SIZE results,
with the elements for other SLP statements replaced with the
neutral value. We can then do a normal reduction on each vector. */
@@ -6503,7 +5966,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
tree mask_type = truth_type_for (index_type);
/* Create a vector that, for each element, identifies which of
- the REDUC_GROUP_SIZE results should use it. */
+ the results should use it. */
tree index_mask = build_int_cst (index_elt_type, group_size - 1);
index = gimple_build (&seq, BIT_AND_EXPR, index_type, index,
build_vector_from_val (index_type, index_mask));
@@ -6512,15 +5975,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
scalar value if we have one, otherwise the initial scalar value
is itself a neutral value. */
tree vector_identity = NULL_TREE;
- tree neutral_op = NULL_TREE;
- if (1)
- {
- tree initial_value = NULL_TREE;
- if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)))
- initial_value = reduc_info->reduc_initial_values[0];
- neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
- initial_value, false);
- }
+ tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
+ NULL_TREE, false);
if (neutral_op)
vector_identity = gimple_build_vector_from_val (&seq, vectype,
neutral_op);
@@ -6531,7 +5987,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
for MIN and MAX reduction, for example. */
if (!neutral_op)
{
- tree scalar_value = reduc_info->reduc_initial_values[i];
+ tree scalar_value
+ = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[i];
scalar_value = gimple_convert (&seq, TREE_TYPE (vectype),
scalar_value);
vector_identity = gimple_build_vector_from_val (&seq, vectype,
@@ -6723,10 +6180,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
}
}
- /* The only case where we need to reduce scalar results in SLP, is
- unrolling. If the size of SCALAR_RESULTS is greater than
- REDUC_GROUP_SIZE, we reduce them combining elements modulo
- REDUC_GROUP_SIZE. */
+ /* The only case where we need to reduce scalar results in a SLP
+ reduction, is unrolling. If the size of SCALAR_RESULTS is
+ greater than GROUP_SIZE, we reduce them combining elements modulo
+ GROUP_SIZE. */
if (slp_reduc)
{
tree res, first_res, new_res;
@@ -6747,7 +6204,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
}
else
{
- /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
+ /* Reduction chain - we have one scalar to keep in
+ SCALAR_RESULTS. */
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
scalar_results.safe_push (new_temp);
}
@@ -6755,7 +6213,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
}
- if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
+ if ((VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
&& induc_val)
{
/* Earlier we set the initial value to be a vector if induc_val
@@ -6766,7 +6224,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR,
scalar_results[0], induc_val);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- tree initial_def = reduc_info->reduc_initial_values[0];
+ tree initial_def = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info)[0];
tree tmp = make_ssa_name (new_scalar_dest);
epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
initial_def, scalar_results[0]);
@@ -6809,7 +6267,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
}
/* Record this operation if it could be reused by the epilogue loop. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION
+ if (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION
&& reduc_inputs.length () == 1)
loop_vinfo->reusable_accumulators.put (scalar_results[0],
{ orig_reduc_input, reduc_info });
@@ -7023,14 +6481,13 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
slp_tree slp_node,
- gimple *reduc_def_stmt,
code_helper code, internal_fn reduc_fn,
int num_ops, tree vectype_in,
int reduc_index, vec_loop_masks *masks,
vec_loop_lens *lens)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = SLP_TREE_VECTYPE (slp_node);
internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
@@ -7048,6 +6505,13 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
TYPE_VECTOR_SUBPARTS (vectype_in)));
+ /* ??? We should, when transforming the cycle PHI, record the existing
+ scalar def as vector def so looking up the vector def works. This
+ would also allow generalizing this for reduction paths of length > 1
+ and/or SLP reductions. */
+ slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index];
+ tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0);
+
/* The operands either come from a binary operation or an IFN_COND operation.
The former is a gimple assign with binary rhs and the latter is a
gimple call with four arguments. */
@@ -7068,7 +6532,6 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
tree scalar_dest = gimple_get_lhs (sdef);
tree scalar_type = TREE_TYPE (scalar_dest);
- tree reduc_var = gimple_phi_result (reduc_def_stmt);
int vec_num = vec_oprnds0.length ();
tree vec_elem_type = TREE_TYPE (vectype_out);
@@ -7303,13 +6766,13 @@ build_vect_cond_expr (code_helper code, tree vop[3], tree mask,
static void
vect_reduction_update_partial_vector_usage (loop_vec_info loop_vinfo,
- stmt_vec_info reduc_info,
+ vect_reduc_info reduc_info,
slp_tree slp_node,
code_helper code, tree type,
tree vectype_in)
{
- enum vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (reduc_info);
- internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
+ enum vect_reduction_type reduc_type = VECT_REDUC_INFO_TYPE (reduc_info);
+ internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info);
internal_fn cond_fn = get_conditional_internal_fn (code, type);
if (reduc_type != FOLD_LEFT_REDUCTION
@@ -7403,28 +6866,27 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
if (!type_has_mode_precision_p (type))
return false;
- stmt_vec_info reduc_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
/* TODO: Support lane-reducing operation that does not directly participate
in loop reduction. */
- if (!reduc_info || STMT_VINFO_REDUC_IDX (stmt_info) < 0)
+ if (!reduc_info)
return false;
/* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not
recoginized. */
- gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_reduction_def);
- gcc_assert (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION);
+ gcc_assert (!nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
+ gcc_assert (VECT_REDUC_INFO_TYPE (reduc_info) == TREE_CODE_REDUCTION);
for (int i = 0; i < (int) gimple_num_ops (stmt) - 1; i++)
{
- stmt_vec_info def_stmt_info;
slp_tree slp_op;
tree op;
tree vectype;
enum vect_def_type dt;
- if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_node, i, &op,
- &slp_op, &dt, &vectype, &def_stmt_info))
+ if (!vect_is_simple_use (loop_vinfo, slp_node, i, &op,
+ &slp_op, &dt, &vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7456,8 +6918,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
return false;
}
- tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (stmt_info);
-
+ tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]);
gcc_assert (vectype_in);
/* Compute number of effective vector statements for costing. */
@@ -7465,13 +6926,13 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
vectype_in);
gcc_assert (ncopies_for_cost >= 1);
- if (vect_is_emulated_mixed_dot_prod (stmt_info))
+ if (vect_is_emulated_mixed_dot_prod (slp_node))
{
/* We need extra two invariants: one that contains the minimum signed
value and one that contains half of its negative. */
int prologue_stmts = 2;
unsigned cost = record_stmt_cost (cost_vec, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, slp_node, 0,
vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE, "vectorizable_lane_reducing: "
@@ -7481,7 +6942,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
ncopies_for_cost *= 4;
}
- record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, stmt_info,
+ record_stmt_cost (cost_vec, (int) ncopies_for_cost, vector_stmt, slp_node,
0, vect_body);
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -7493,7 +6954,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
}
/* Transform via vect_transform_reduction. */
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = reduc_vec_info_type;
return true;
}
@@ -7564,6 +7025,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
bool double_reduc = false;
tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
tree cond_reduc_val = NULL_TREE;
+ const bool reduc_chain
+ = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
/* Make sure it was already recognized as a reduction computation. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
@@ -7571,9 +7034,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
return false;
- /* The stmt we store reduction analysis meta on. */
- stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
- reduc_info->is_reduc_info = true;
+ /* The reduction meta. */
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
{
@@ -7595,18 +7057,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
/* Analysis for double-reduction is done on the outer
loop PHI, nested cycles have no further restrictions. */
- STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+ SLP_TREE_TYPE (slp_node) = cycle_phi_info_type;
}
else
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = reduc_vec_info_type;
return true;
}
- stmt_vec_info orig_stmt_of_analysis = stmt_info;
stmt_vec_info phi_info = stmt_info;
if (!is_a <gphi *> (stmt_info->stmt))
{
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = reduc_vec_info_type;
return true;
}
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
@@ -7646,18 +7107,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
unsigned reduc_chain_length = 0;
bool only_slp_reduc_chain = true;
stmt_info = NULL;
- slp_tree slp_for_stmt_info = slp_node_instance->root;
+ slp_tree slp_for_stmt_info = NULL;
+ slp_tree vdef_slp = slp_node_instance->root;
/* For double-reductions we start SLP analysis at the inner loop LC PHI
which is the def of the outer loop live stmt. */
- if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def)
- slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
+ if (VECT_REDUC_INFO_DEF_TYPE (reduc_info) == vect_double_reduction_def)
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0];
while (reduc_def != PHI_RESULT (reduc_def_phi))
{
stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
stmt_vec_info vdef = vect_stmt_to_vectorize (def);
int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
-
- if (reduc_idx == -1)
+ if (STMT_VINFO_REDUC_IDX (vdef) == -1
+ || SLP_TREE_REDUC_IDX (vdef_slp) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7666,21 +7128,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
only_slp_reduc_chain = false;
- /* For epilogue generation live members of the chain need
- to point back to the PHI via their original stmt for
- info_for_reduction to work. For SLP we need to look at
- all lanes here - even though we only will vectorize from
- the SLP node with live lane zero the other live lanes also
- need to be identified as part of a reduction to be able
- to skip code generation for them. */
- if (slp_for_stmt_info)
- {
- for (auto s : SLP_TREE_SCALAR_STMTS (slp_for_stmt_info))
- if (STMT_VINFO_LIVE_P (s))
- STMT_VINFO_REDUC_DEF (vect_orig_stmt (s)) = phi_info;
- }
- else if (STMT_VINFO_LIVE_P (vdef))
- STMT_VINFO_REDUC_DEF (def) = phi_info;
gimple_match_op op;
if (!gimple_extract_op (vdef->stmt, &op))
{
@@ -7699,44 +7146,36 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"conversion in the reduction chain.\n");
return false;
}
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0];
}
else
{
/* First non-conversion stmt. */
if (!stmt_info)
- stmt_info = vdef;
+ {
+ stmt_info = vdef;
+ slp_for_stmt_info = vdef_slp;
+ }
if (lane_reducing_op_p (op.code))
{
- enum vect_def_type dt;
- tree vectype_op;
-
/* The last operand of lane-reducing operation is for
reduction. */
gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1);
- if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op))
- return false;
-
+ slp_tree op_node = SLP_TREE_CHILDREN (vdef_slp)[0];
+ tree vectype_op = SLP_TREE_VECTYPE (op_node);
tree type_op = TREE_TYPE (op.ops[0]);
-
if (!vectype_op)
{
vectype_op = get_vectype_for_scalar_type (loop_vinfo,
type_op);
- if (!vectype_op)
+ if (!vectype_op
+ || !vect_maybe_update_slp_op_vectype (op_node,
+ vectype_op))
return false;
}
- /* For lane-reducing operation vectorizable analysis needs the
- reduction PHI information. */
- STMT_VINFO_REDUC_DEF (def) = phi_info;
-
- /* Each lane-reducing operation has its own input vectype, while
- reduction PHI will record the input vectype with the least
- lanes. */
- STMT_VINFO_REDUC_VECTYPE_IN (vdef) = vectype_op;
-
/* To accommodate lane-reducing operations of mixed input
vectypes, choose input vectype with the least lanes for the
reduction PHI statement, which would result in the most
@@ -7746,14 +7185,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
< GET_MODE_SIZE (SCALAR_TYPE_MODE (type_op))))
vectype_in = vectype_op;
}
- else
- vectype_in = STMT_VINFO_VECTYPE (phi_info);
+ else if (!vectype_in)
+ vectype_in = SLP_TREE_VECTYPE (slp_node);
+ if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
+ {
+ gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp));
+ vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+ }
}
reduc_def = op.ops[reduc_idx];
reduc_chain_length++;
- if (!stmt_info)
- slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
}
/* PHIs should not participate in patterns. */
gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
@@ -7778,7 +7220,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* Not supportable if the reduction variable is used in the loop, unless
it's a reduction chain. */
if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ && !reduc_chain)
return false;
/* Reductions that are not used even in an enclosing outer-loop,
@@ -7805,9 +7247,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
inside the loop body. The last operand is the reduction variable,
which is defined by the loop-header-phi. */
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
- STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
- STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
+ tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
+ VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out;
gimple_match_op op;
if (!gimple_extract_op (stmt_info->stmt, &op))
@@ -7827,7 +7268,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
OK to use them in a reduction chain or when the reduction group
has just one element. */
if (lane_reducing
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && !reduc_chain
&& SLP_TREE_LANES (slp_node) > 1)
{
if (dump_enabled_p ())
@@ -7855,7 +7296,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
stmt_vec_info def_stmt_info;
enum vect_def_type dt;
- if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
+ if (!vect_is_simple_use (loop_vinfo, slp_for_stmt_info,
i + opno_adjust, &op.ops[i], &slp_op[i], &dt,
&vectype_op[i], &def_stmt_info))
{
@@ -7899,8 +7340,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
}
- enum vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (phi_info);
- STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type;
+ enum vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
/* If we have a condition reduction, see if we can simplify it further. */
if (reduction_type == COND_REDUCTION)
{
@@ -7908,7 +7348,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
/* When the condition uses the reduction value in the condition, fail. */
- if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
+ if (SLP_TREE_REDUC_IDX (slp_node) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7927,7 +7367,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"optimizing condition reduction with"
" FOLD_EXTRACT_LAST.\n");
- STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
+ VECT_REDUC_INFO_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
}
else if (cond_reduc_dt == vect_induction_def)
{
@@ -7971,10 +7411,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"condition expression based on "
"integer induction.\n");
- STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
- STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
+ VECT_REDUC_INFO_CODE (reduc_info) = cond_reduc_op_code;
+ VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info)
= cond_reduc_val;
- STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
+ VECT_REDUC_INFO_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
}
}
else if (cond_reduc_dt == vect_constant_def)
@@ -7995,9 +7435,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"condition expression based on "
"compile time constant.\n");
/* Record reduction code at analysis stage. */
- STMT_VINFO_REDUC_CODE (reduc_info)
+ VECT_REDUC_INFO_CODE (reduc_info)
= integer_onep (e) ? MAX_EXPR : MIN_EXPR;
- STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
+ VECT_REDUC_INFO_TYPE (reduc_info) = CONST_COND_REDUCTION;
}
}
}
@@ -8014,7 +7454,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (nested_cycle)
{
- gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
+ gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info)
== vect_double_reduction_def);
double_reduc = true;
}
@@ -8054,7 +7494,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
(and also the same tree-code) when generating the epilog code and
when generating the code inside the loop. */
- code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info);
+ code_helper orig_code = VECT_REDUC_INFO_CODE (reduc_info);
/* If conversion might have created a conditional operation like
IFN_COND_ADD already. Use the internal code for the following checks. */
@@ -8064,9 +7504,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
orig_code = new_code != ERROR_MARK ? new_code : orig_code;
}
- STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
+ VECT_REDUC_INFO_CODE (reduc_info) = orig_code;
- reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
+ reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
if (reduction_type == TREE_CODE_REDUCTION)
{
/* Check whether it's ok to change the order of the computation.
@@ -8078,7 +7518,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
outer-loop vectorization is safe. Likewise when we are vectorizing
a series of reductions using SLP and the VF is one the reductions
are performed in scalar order. */
- if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ if (!reduc_chain
&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1u))
;
else if (needs_fold_left_reduction_p (op.type, orig_code))
@@ -8106,7 +7546,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"supported.\n");
return false;
}
- STMT_VINFO_REDUC_TYPE (reduc_info)
+ VECT_REDUC_INFO_TYPE (reduc_info)
= reduction_type = FOLD_LEFT_REDUCTION;
}
else if (!commutative_binary_op_p (orig_code, op.type)
@@ -8177,7 +7617,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
OPTIMIZE_FOR_SPEED))
reduc_fn = IFN_REDUC_MAX;
}
- STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
+ VECT_REDUC_INFO_FN (reduc_info) = reduc_fn;
if (reduction_type != EXTRACT_LAST_REDUCTION
&& (!nested_cycle || double_reduc)
@@ -8194,7 +7634,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* For SLP reductions, see if there is a neutral value we can use. */
tree neutral_op = NULL_TREE;
tree initial_value = NULL_TREE;
- if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL)
+ if (reduc_chain)
initial_value = vect_phi_initial_value (reduc_def_phi);
neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
orig_code, initial_value);
@@ -8224,7 +7664,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (reduction_type == FOLD_LEFT_REDUCTION
&& SLP_TREE_LANES (slp_node) > 1
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ && !reduc_chain)
{
/* We cannot use in-order reductions in this case because there is
an implicit reassociation of the operations involved. */
@@ -8252,7 +7692,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
/* Check extra constraints for variable-length unchained SLP reductions. */
- if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ if (!reduc_chain
&& !nunits_out.is_constant ())
{
/* We checked above that we could build the initial vector when
@@ -8346,7 +7786,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
own reduction accumulator since one of the main goals of unrolling a
reduction is to reduce the aggregate loop-carried latency. */
if (ncopies > 1
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && !reduc_chain
&& SLP_TREE_LANES (slp_node) == 1
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
&& reduc_chain_length == 1
@@ -8390,7 +7830,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"using single def-use cycle for reduction by reducing "
"multiple vectors to one in the loop body\n");
- STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
+ VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
/* For lane-reducing operation, the below processing related to single
defuse-cycle will be done in its own vectorizable function. One more
@@ -8408,19 +7848,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
- vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
+ vect_model_reduction_cost (loop_vinfo, slp_for_stmt_info, reduc_fn,
reduction_type, ncopies, cost_vec);
/* Cost the reduction op inside the loop if transformed via
vect_transform_reduction for non-lane-reducing operation. Otherwise
this is costed by the separate vectorizable_* routines. */
if (single_defuse_cycle)
- record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body);
+ record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ slp_for_stmt_info, 0, vect_body);
if (dump_enabled_p ()
&& reduction_type == FOLD_LEFT_REDUCTION)
dump_printf_loc (MSG_NOTE, vect_location,
"using an in-order (fold-left) reduction.\n");
- STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
+ SLP_TREE_TYPE (slp_node) = cycle_phi_info_type;
/* All but single defuse-cycle optimized and fold-left reductions go
through their own vectorizable_* routines. */
@@ -8526,17 +7967,17 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
slp_tree slp_node)
{
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = SLP_TREE_VECTYPE (slp_node);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
unsigned vec_num;
- stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
- gcc_assert (reduc_info->is_reduc_info);
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
if (nested_in_vect_loop_p (loop, stmt_info))
{
loop = loop->inner;
- gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
+ gcc_assert (VECT_REDUC_INFO_DEF_TYPE (reduc_info)
+ == vect_double_reduction_def);
}
gimple_match_op op;
@@ -8547,13 +7988,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the
reduction variable. */
- stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
- gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
- int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
- tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (stmt_info);
-
- if (!vectype_in)
- vectype_in = STMT_VINFO_VECTYPE (stmt_info);
+ int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
+ tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]);
vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in);
@@ -8588,18 +8024,18 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
- vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
+ vect_reduction_type reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
if (reduction_type == FOLD_LEFT_REDUCTION)
{
- internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
+ internal_fn reduc_fn = VECT_REDUC_INFO_FN (reduc_info);
gcc_assert (code.is_tree_code () || cond_fn_p);
return vectorize_fold_left_reduction
- (loop_vinfo, stmt_info, gsi, slp_node, reduc_def_phi,
+ (loop_vinfo, stmt_info, gsi, slp_node,
code, reduc_fn, op.num_ops, vectype_in,
reduc_index, masks, lens);
}
- bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
+ bool single_defuse_cycle = VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info);
bool lane_reducing = lane_reducing_op_p (code);
gcc_assert (single_defuse_cycle || lane_reducing);
@@ -8618,7 +8054,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
if (!cond_fn_p)
{
gcc_assert (reduc_index >= 0 && reduc_index <= 2);
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1,
+ vect_get_vec_defs (loop_vinfo, slp_node,
single_defuse_cycle && reduc_index == 0
? NULL_TREE : op.ops[0], &vec_oprnds[0],
single_defuse_cycle && reduc_index == 1
@@ -8633,19 +8069,19 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
vectype. */
gcc_assert (single_defuse_cycle
&& (reduc_index == 1 || reduc_index == 2));
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, op.ops[0],
- truth_type_for (vectype_in), &vec_oprnds[0],
+ vect_get_vec_defs (loop_vinfo, slp_node, op.ops[0],
+ &vec_oprnds[0],
reduc_index == 1 ? NULL_TREE : op.ops[1],
- NULL_TREE, &vec_oprnds[1],
+ &vec_oprnds[1],
reduc_index == 2 ? NULL_TREE : op.ops[2],
- NULL_TREE, &vec_oprnds[2]);
+ &vec_oprnds[2]);
}
/* For single def-use cycles get one copy of the vectorized reduction
definition. */
if (single_defuse_cycle)
{
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1,
+ vect_get_vec_defs (loop_vinfo, slp_node,
reduc_index == 0 ? op.ops[0] : NULL_TREE,
&vec_oprnds[0],
reduc_index == 1 ? op.ops[1] : NULL_TREE,
@@ -8730,7 +8166,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
}
}
- tree reduc_vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
+ tree reduc_vectype_in = vectype_in;
gcc_assert (reduc_vectype_in);
unsigned effec_reduc_ncopies
@@ -8742,11 +8178,11 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
{
/* Find suitable def-use cycles to generate vectorized statements
into, and reorder operands based on the selection. */
- unsigned curr_pos = reduc_info->reduc_result_pos;
+ unsigned curr_pos = VECT_REDUC_INFO_RESULT_POS (reduc_info);
unsigned next_pos = (curr_pos + effec_ncopies) % effec_reduc_ncopies;
gcc_assert (curr_pos < effec_reduc_ncopies);
- reduc_info->reduc_result_pos = next_pos;
+ VECT_REDUC_INFO_RESULT_POS (reduc_info) = next_pos;
if (curr_pos)
{
@@ -8772,7 +8208,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
}
}
- bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (stmt_info);
+ bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (slp_node);
unsigned num = vec_oprnds[reduc_index == 0 ? 1 : 0].length ();
unsigned mask_index = 0;
@@ -8873,11 +8309,13 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
slp_tree slp_node, slp_instance slp_node_instance)
{
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = SLP_TREE_VECTYPE (slp_node);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i;
bool nested_cycle = false;
int vec_num;
+ const bool reduc_chain
+ = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
if (nested_in_vect_loop_p (loop, stmt_info))
{
@@ -8885,13 +8323,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
nested_cycle = true;
}
- stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
- reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
- stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
- gcc_assert (reduc_info->is_reduc_info);
-
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
- || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
+ if (reduc_info
+ && (VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
+ || VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION))
/* Leave the scalar phi in place. */
return true;
@@ -8899,7 +8334,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Check whether we should use a single PHI node and accumulate
vectors to one before the backedge. */
- if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
+ if (reduc_info && VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info))
vec_num = 1;
/* Create the destination vector */
@@ -8914,23 +8349,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Optimize: if initial_def is for REDUC_MAX smaller than the base
and we can't use zero for induc_val, use initial_def. Similarly
for REDUC_MIN and initial_def larger than the base. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
+ if (reduc_info
+ && VECT_REDUC_INFO_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
{
gcc_assert (SLP_TREE_LANES (slp_node) == 1);
tree initial_def = vect_phi_initial_value (phi);
- reduc_info->reduc_initial_values.safe_push (initial_def);
- tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
+ VECT_REDUC_INFO_INITIAL_VALUES (reduc_info).safe_push (initial_def);
+ tree induc_val = VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info);
if (TREE_CODE (initial_def) == INTEGER_CST
&& !integer_zerop (induc_val)
- && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
+ && ((VECT_REDUC_INFO_CODE (reduc_info) == MAX_EXPR
&& tree_int_cst_lt (initial_def, induc_val))
- || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
+ || (VECT_REDUC_INFO_CODE (reduc_info) == MIN_EXPR
&& tree_int_cst_lt (induc_val, initial_def))))
{
induc_val = initial_def;
/* Communicate we used the initial_def to epilouge
generation. */
- STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
+ VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
}
vec_initial_defs.quick_push
(build_vector_from_val (vectype_out, induc_val));
@@ -8944,11 +8380,11 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
else
{
gcc_assert (slp_node == slp_node_instance->reduc_phis);
- vec<tree> &initial_values = reduc_info->reduc_initial_values;
+ vec<tree> &initial_values = VECT_REDUC_INFO_INITIAL_VALUES (reduc_info);
vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
unsigned int num_phis = stmts.length ();
- if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
+ if (reduc_chain)
num_phis = 1;
initial_values.reserve (num_phis);
for (unsigned int i = 0; i < num_phis; ++i)
@@ -8957,12 +8393,12 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
initial_values.quick_push (vect_phi_initial_value (this_phi));
}
if (vec_num == 1)
- vect_find_reusable_accumulator (loop_vinfo, reduc_info);
+ vect_find_reusable_accumulator (loop_vinfo, reduc_info, vectype_out);
if (!initial_values.is_empty ())
{
tree initial_value
= (num_phis == 1 ? initial_values[0] : NULL_TREE);
- code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
+ code_helper code = VECT_REDUC_INFO_CODE (reduc_info);
tree neutral_op
= neutral_op_for_reduction (TREE_TYPE (vectype_out),
code, initial_value);
@@ -8972,15 +8408,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
requires to keep the initial value live across the loop. */
if (neutral_op
&& initial_values.length () == 1
- && !reduc_info->reused_accumulator
+ && !VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info)
&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
&& !operand_equal_p (neutral_op, initial_values[0]))
{
- STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
+ VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT (reduc_info)
= initial_values[0];
initial_values[0] = neutral_op;
}
- get_initial_defs_for_reduction (loop_vinfo, reduc_info,
+ get_initial_defs_for_reduction (loop_vinfo, reduc_info, vectype_out,
&vec_initial_defs, vec_num,
stmts.length (), neutral_op);
}
@@ -8992,7 +8428,8 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
vec_initial_defs.quick_push (vec_initial_def);
}
- if (auto *accumulator = reduc_info->reused_accumulator)
+ if (reduc_info)
+ if (auto *accumulator = VECT_REDUC_INFO_REUSED_ACCUMULATOR (reduc_info))
{
tree def = accumulator->reduc_input;
if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
@@ -9015,7 +8452,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
TYPE_VECTOR_SUBPARTS
(vectype_out));
def = vect_create_partial_epilog (def, rvectype,
- STMT_VINFO_REDUC_CODE
+ VECT_REDUC_INFO_CODE
(reduc_info),
&stmts);
}
@@ -9104,7 +8541,19 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
"incompatible vector types for invariants\n");
return false;
}
- STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
+
+ /* ??? This can happen with data vs. mask uses of boolean. */
+ if (!useless_type_conversion_p (SLP_TREE_VECTYPE (slp_node),
+ SLP_TREE_VECTYPE
+ (SLP_TREE_CHILDREN (slp_node)[0])))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "missed mask promotion\n");
+ return false;
+ }
+
+ SLP_TREE_TYPE (slp_node) = lc_phi_info_type;
return true;
}
@@ -9114,13 +8563,13 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo,
slp_tree slp_node)
{
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
tree scalar_dest = gimple_phi_result (stmt_info->stmt);
basic_block bb = gimple_bb (stmt_info->stmt);
edge e = single_pred_edge (bb);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
auto_vec<tree> vec_oprnds;
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1,
+ vect_get_vec_defs (loop_vinfo, slp_node,
gimple_phi_arg_def (stmt_info->stmt, 0), &vec_oprnds);
for (unsigned i = 0; i < vec_oprnds.length (); i++)
{
@@ -9136,8 +8585,8 @@ vect_transform_lc_phi (loop_vec_info loop_vinfo,
/* Vectorizes PHIs. */
bool
-vectorizable_phi (vec_info *,
- stmt_vec_info stmt_info, gimple **vec_stmt,
+vectorizable_phi (bb_vec_info,
+ stmt_vec_info stmt_info,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
@@ -9148,7 +8597,7 @@ vectorizable_phi (vec_info *,
tree vectype = SLP_TREE_VECTYPE (slp_node);
- if (!vec_stmt) /* transformation not required. */
+ if (cost_vec) /* transformation not required. */
{
slp_tree child;
unsigned i;
@@ -9188,8 +8637,8 @@ vectorizable_phi (vec_info *,
favoring the vector path (but may pessimize it in some cases). */
if (gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) > 1)
record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
- vector_stmt, stmt_info, vectype, 0, vect_body);
- STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+ vector_stmt, slp_node, vectype, 0, vect_body);
+ SLP_TREE_TYPE (slp_node) = phi_info_type;
return true;
}
@@ -9279,8 +8728,7 @@ vectorizable_phi (vec_info *,
bool
vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
- gimple **vec_stmt, slp_tree slp_node,
- stmt_vector_for_cost *cost_vec)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
if (!loop_vinfo || !is_a<gphi *> (stmt_info->stmt))
return false;
@@ -9291,14 +8739,10 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
return false;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- unsigned ncopies;
- if (slp_node)
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
+ unsigned ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : 1;
+ unsigned dist = SLP_TREE_LANES (slp_node);
/* We need to be able to make progress with a single vector. */
if (maybe_gt (dist * 2, nunits))
{
@@ -9309,6 +8753,33 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
return false;
}
+ /* We need to be able to build a { ..., a, b } init vector with
+ dist number of distinct trailing values. Always possible
+ when dist == 1 or when nunits is constant or when the initializations
+ are uniform. */
+ tree uniform_initval = NULL_TREE;
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ gphi *phi = as_a <gphi *> (s->stmt);
+ if (! uniform_initval)
+ uniform_initval = PHI_ARG_DEF_FROM_EDGE (phi, pe);
+ else if (! operand_equal_p (uniform_initval,
+ PHI_ARG_DEF_FROM_EDGE (phi, pe)))
+ {
+ uniform_initval = NULL_TREE;
+ break;
+ }
+ }
+ if (!uniform_initval && !nunits.is_constant ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot build initialization vector for "
+ "first order recurrence\n");
+ return false;
+ }
+
/* First-order recurrence autovectorization needs to handle permutation
with indices = [nunits-1, nunits, nunits+1, ...]. */
vec_perm_builder sel (nunits, 1, 3);
@@ -9316,48 +8787,30 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
sel.quick_push (nunits - dist + i);
vec_perm_indices indices (sel, 2, nunits);
- if (!vec_stmt) /* transformation not required. */
+ if (cost_vec) /* transformation not required. */
{
if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
indices))
return false;
- if (slp_node)
- {
- /* We eventually need to set a vector type on invariant
- arguments. */
- unsigned j;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
- if (!vect_maybe_update_slp_op_vectype
- (child, SLP_TREE_VECTYPE (slp_node)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "incompatible vector types for "
- "invariants\n");
- return false;
- }
- }
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype (child, vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
/* Verify we have set up compatible types. */
edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
- tree latch_vectype = NULL_TREE;
- if (slp_node)
- {
- slp_tree latch_def = SLP_TREE_CHILDREN (slp_node)[le->dest_idx];
- latch_vectype = SLP_TREE_VECTYPE (latch_def);
- }
- else
- {
- tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, le);
- if (TREE_CODE (latch_def) == SSA_NAME)
- {
- stmt_vec_info latch_def_info = loop_vinfo->lookup_def (latch_def);
- latch_def_info = vect_stmt_to_vectorize (latch_def_info);
- latch_vectype = STMT_VINFO_VECTYPE (latch_def_info);
- }
- }
+ slp_tree latch_def = SLP_TREE_CHILDREN (slp_node)[le->dest_idx];
+ tree latch_vectype = SLP_TREE_VECTYPE (latch_def);
if (!types_compatible_p (latch_vectype, vectype))
return false;
@@ -9365,36 +8818,50 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
for each copy. With SLP the prologue value is explicitly
represented and costed separately. */
unsigned prologue_cost = 0;
- if (!slp_node)
- prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec,
- stmt_info, 0, vect_prologue);
unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
+ slp_node, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizable_recurr: inside_cost = %d, "
"prologue_cost = %d .\n", inside_cost,
prologue_cost);
- STMT_VINFO_TYPE (stmt_info) = recurr_info_type;
+ SLP_TREE_TYPE (slp_node) = recurr_info_type;
return true;
}
- edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
- basic_block bb = gimple_bb (phi);
- tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
- if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader)))
+ tree vec_init;
+ if (! uniform_initval)
{
- gimple_seq stmts = NULL;
- preheader = gimple_convert (&stmts, TREE_TYPE (vectype), preheader);
- gsi_insert_seq_on_edge_immediate (pe, stmts);
+ vec<constructor_elt, va_gc> *v = NULL;
+ vec_alloc (v, nunits.to_constant ());
+ for (unsigned i = 0; i < nunits.to_constant () - dist; ++i)
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (TREE_TYPE (vectype)));
+ for (stmt_vec_info s : SLP_TREE_SCALAR_STMTS (slp_node))
+ {
+ gphi *phi = as_a <gphi *> (s->stmt);
+ tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (preheader)))
+ {
+ gimple_seq stmts = NULL;
+ preheader = gimple_convert (&stmts,
+ TREE_TYPE (vectype), preheader);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, preheader);
+ }
+ vec_init = build_constructor (vectype, v);
}
- tree vec_init = build_vector_from_val (vectype, preheader);
+ else
+ vec_init = uniform_initval;
vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
/* Create the vectorized first-order PHI node. */
tree vec_dest = vect_get_new_vect_var (vectype,
vect_simple_var, "vec_recur_");
+ basic_block bb = gimple_bb (phi);
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);
@@ -9419,14 +8886,9 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
NULL, perm);
vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2);
- if (slp_node)
- slp_node->push_vec_def (vperm);
- else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (vperm);
+ slp_node->push_vec_def (vperm);
}
- if (!slp_node)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
@@ -9738,7 +9200,7 @@ vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
static bool
vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
- gimple **vec_stmt, slp_tree slp_node,
+ slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -9894,13 +9356,13 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
gcc_unreachable ();
}
- if (!vec_stmt) /* transformation not required. */
+ if (cost_vec) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
/* loop cost for vec_loop. Neg induction doesn't have any
inside_cost. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
+ slp_node, 0, vect_body);
/* loop cost for vec_loop. Neg induction doesn't have any
inside_cost. */
@@ -9909,7 +9371,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
/* prologue cost for vec_init and vec_step. */
prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ slp_node, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -9917,7 +9379,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
"prologue_cost = %d. \n", inside_cost,
prologue_cost);
- STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction");
return true;
}
@@ -10049,8 +9511,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
bool
vectorizable_induction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
- gimple **vec_stmt, slp_tree slp_node,
- stmt_vector_for_cost *cost_vec)
+ slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
bool nested_in_vect_loop = false;
@@ -10084,7 +9545,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Handle nonlinear induction in a separate place. */
if (induction_type != vect_step_op_add)
return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
- vec_stmt, slp_node, cost_vec);
+ slp_node, cost_vec);
tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -10165,6 +9626,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
}
tree stept = TREE_TYPE (step_expr);
tree step_vectype = get_same_sized_vectype (stept, vectype);
+ stept = TREE_TYPE (step_vectype);
/* Check for target support of the vectorized arithmetic used here. */
if (!target_supports_op_p (step_vectype, PLUS_EXPR, optab_default)
@@ -10187,7 +9649,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
}
}
- if (!vec_stmt) /* transformation not required. */
+ if (cost_vec) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
/* We eventually need to set a vector type on invariant
@@ -10207,18 +9669,18 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* loop cost for vec_loop. */
inside_cost = record_stmt_cost (cost_vec,
SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
- vector_stmt, stmt_info, 0, vect_body);
+ vector_stmt, slp_node, 0, vect_body);
/* prologue cost for vec_init (if not nested) and step. */
prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
scalar_to_vec,
- stmt_info, 0, vect_prologue);
+ slp_node, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
"prologue_cost = %d .\n", inside_cost,
prologue_cost);
- STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+ SLP_TREE_TYPE (slp_node) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_induction");
return true;
}
@@ -10468,7 +9930,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (peel_mul)
{
if (!step_mul)
- step_mul = peel_mul;
+ {
+ gcc_assert (!nunits.is_constant ());
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ build_zero_cst (step_vectype), peel_mul);
+ }
else
step_mul = gimple_build (&init_stmts,
MINUS_EXPR, step_vectype,
@@ -10619,9 +10086,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
helper function for vectorizable_live_operation. */
static tree
-vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, basic_block exit_bb,
- tree vectype, int ncopies, slp_tree slp_node,
+vectorizable_live_operation_1 (loop_vec_info loop_vinfo, basic_block exit_bb,
+ tree vectype, slp_tree slp_node,
tree bitsize, tree bitstart, tree vec_lhs,
tree lhs_type, gimple_stmt_iterator *exit_gsi)
{
@@ -10652,8 +10118,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (ncopies == 1
- && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
tree len = vect_get_loop_len (loop_vinfo, &gsi,
@@ -10688,8 +10153,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1);
- tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ tree scalar_type = TREE_TYPE (vectype);
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
tree mask = vect_get_loop_mask (loop_vinfo, &gsi,
@@ -10735,11 +10200,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
imm_use_iterator imm_iter;
tree lhs, lhs_type, bitsize;
- tree vectype = (slp_node
- ? SLP_TREE_VECTYPE (slp_node)
- : STMT_VINFO_VECTYPE (stmt_info));
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies;
gimple *use_stmt;
use_operand_p use_p;
auto_vec<tree> vec_oprnds;
@@ -10752,18 +10214,18 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
/* If a stmt of a reduction is live, vectorize it via
vect_create_epilog_for_reduction. vectorizable_reduction assessed
validity so just trigger the transform here. */
- if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
+ if (vect_is_reduction (slp_node))
{
if (!vec_stmt_p)
return true;
/* For SLP reductions we vectorize the epilogue for all involved stmts
- together. */
- if (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && slp_index != 0)
+ together. For SLP reduction chains we only get here once. */
+ if (SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_group
+ && slp_index != 0)
return true;
- stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
- gcc_assert (reduc_info->is_reduc_info);
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
- || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
+ vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
+ if (VECT_REDUC_INFO_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
+ || VECT_REDUC_INFO_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
return true;
if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
@@ -10776,17 +10238,18 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
block, but we have to find an alternate exit first. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- slp_tree phis_node = slp_node ? slp_node_instance->reduc_phis : NULL;
+ slp_tree phis_node = slp_node_instance->reduc_phis;
+ stmt_info = SLP_TREE_REPRESENTATIVE (phis_node);
for (auto exit : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo)))
if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo))
{
- vect_create_epilog_for_reduction (loop_vinfo, reduc_info,
+ vect_create_epilog_for_reduction (loop_vinfo, stmt_info,
phis_node, slp_node_instance,
exit);
break;
}
if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
- vect_create_epilog_for_reduction (loop_vinfo, reduc_info,
+ vect_create_epilog_for_reduction (loop_vinfo, stmt_info,
phis_node, slp_node_instance,
LOOP_VINFO_IV_EXIT (loop_vinfo));
}
@@ -10807,32 +10270,24 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
return true;
}
- if (slp_node)
- ncopies = 1;
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
-
- if (slp_node)
- {
- gcc_assert (slp_index >= 0);
+ gcc_assert (slp_index >= 0);
- /* Get the last occurrence of the scalar index from the concatenation of
- all the slp vectors. Calculate which slp vector it is and the index
- within. */
- int num_scalar = SLP_TREE_LANES (slp_node);
- int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
+ /* Get the last occurrence of the scalar index from the concatenation of
+ all the slp vectors. Calculate which slp vector it is and the index
+ within. */
+ int num_scalar = SLP_TREE_LANES (slp_node);
+ int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
- /* Calculate which vector contains the result, and which lane of
- that vector we need. */
- if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Cannot determine which vector holds the"
- " final result.\n");
- return false;
- }
+ /* Calculate which vector contains the result, and which lane of
+ that vector we need. */
+ if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Cannot determine which vector holds the"
+ " final result.\n");
+ return false;
}
if (!vec_stmt_p)
@@ -10840,7 +10295,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
/* No transformation required. */
if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (slp_node && SLP_TREE_LANES (slp_node) != 1)
+ if (SLP_TREE_LANES (slp_node) != 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10849,8 +10304,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else if (ncopies > 1
- || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
+ else if (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10860,8 +10314,6 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
}
else
{
- gcc_assert (ncopies == 1
- && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
vect_record_loop_mask (loop_vinfo,
@@ -10886,7 +10338,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
}
/* ??? Enable for loop costing as well. */
if (!loop_vinfo)
- record_stmt_cost (cost_vec, 1, vec_to_scalar, stmt_info, NULL_TREE,
+ record_stmt_cost (cost_vec, 1, vec_to_scalar, slp_node,
0, vect_epilogue);
return true;
}
@@ -10903,40 +10355,21 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
bitsize = vector_element_bits_tree (vectype);
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
- tree vec_lhs, vec_lhs0, bitstart;
- gimple *vec_stmt, *vec_stmt0;
- if (slp_node)
- {
- gcc_assert (!loop_vinfo
- || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
- || SLP_TREE_LANES (slp_node) == 1));
-
- /* Get the correct slp vectorized stmt. */
- vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
- vec_stmt = SSA_NAME_DEF_STMT (vec_lhs);
+ gcc_assert (!loop_vinfo
+ || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ || SLP_TREE_LANES (slp_node) == 1));
- /* In case we need to early break vectorize also get the first stmt. */
- vec_lhs0 = SLP_TREE_VEC_DEFS (slp_node)[0];
- vec_stmt0 = SSA_NAME_DEF_STMT (vec_lhs0);
+ /* Get the correct slp vectorized stmt. */
+ tree vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
+ gimple *vec_stmt = SSA_NAME_DEF_STMT (vec_lhs);
- /* Get entry to use. */
- bitstart = bitsize_int (vec_index);
- bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
- }
- else
- {
- /* For multiple copies, get the last copy. */
- vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last ();
- vec_lhs = gimple_get_lhs (vec_stmt);
+ /* In case we need to early break vectorize also get the first stmt. */
+ tree vec_lhs0 = SLP_TREE_VEC_DEFS (slp_node)[0];
- /* In case we need to early break vectorize also get the first stmt. */
- vec_stmt0 = STMT_VINFO_VEC_STMTS (stmt_info)[0];
- vec_lhs0 = gimple_get_lhs (vec_stmt0);
-
- /* Get the last lane in the vector. */
- bitstart = int_const_binop (MULT_EXPR, bitsize, bitsize_int (nunits - 1));
- }
+ /* Get entry to use. */
+ tree bitstart = bitsize_int (vec_index);
+ bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
if (loop_vinfo)
{
@@ -10985,8 +10418,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
gimple_stmt_iterator exit_gsi;
tree new_tree
- = vectorizable_live_operation_1 (loop_vinfo, stmt_info,
- e->dest, vectype, ncopies,
+ = vectorizable_live_operation_1 (loop_vinfo,
+ e->dest, vectype,
slp_node, bitsize,
tmp_bitstart, tmp_vec_lhs,
lhs_type, &exit_gsi);
@@ -11412,7 +10845,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
factor = exact_div (nunits1, nunits2).to_constant ();
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
gimple_seq seq = NULL;
- loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len,
+ loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
build_int_cst (iv_type, factor));
if (seq)
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
@@ -11472,7 +10905,7 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
profile_count entry_count = loop_preheader_edge (loop)->count ();
/* If we have unreliable loop profile avoid dropping entry
- count bellow header count. This can happen since loops
+ count below header count. This can happen since loops
has unrealistically low trip counts. */
while (vf > 1
&& loop->header->count > entry_count
@@ -11499,42 +10932,26 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
get_likely_max_loop_iterations_int (loop));
}
-/* Helper function to pass to simplify_replace_tree to enable replacing tree's
- in the hash_map with its corresponding values. */
-
-static tree
-find_in_mapping (tree t, void *context)
-{
- hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context;
-
- tree *value = mapping->get (t);
- return value ? *value : t;
-}
-
/* Update EPILOGUE's loop_vec_info. EPILOGUE was constructed as a copy of the
original loop that has now been vectorized.
The inits of the data_references need to be advanced with the number of
iterations of the main loop. This has been computed in vect_do_peeling and
- is stored in parameter ADVANCE. We first restore the data_references
- initial offset with the values recored in ORIG_DRS_INIT.
+ is stored in parameter ADVANCE.
Since the loop_vec_info of this EPILOGUE was constructed for the original
loop, its stmt_vec_infos all point to the original statements. These need
- to be updated to point to their corresponding copies as well as the SSA_NAMES
- in their PATTERN_DEF_SEQs and RELATED_STMTs.
+ to be updated to point to their corresponding copies.
The data_reference's connections also need to be updated. Their
corresponding dr_vec_info need to be reconnected to the EPILOGUE's
- stmt_vec_infos, their statements need to point to their corresponding copy,
- if they are gather loads or scatter stores then their reference needs to be
- updated to point to its corresponding copy. */
+ stmt_vec_infos, their statements need to point to their corresponding
+ copy. */
static void
update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
{
loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue);
- auto_vec<gimple *> stmt_worklist;
hash_map<tree,tree> mapping;
gimple *orig_stmt, *new_stmt;
gimple_stmt_iterator epilogue_gsi;
@@ -11549,9 +10966,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
/* The EPILOGUE loop is a copy of the original loop so they share the same
gimple UIDs. In this loop we update the loop_vec_info of the EPILOGUE to
- point to the copied statements. We also create a mapping of all LHS' in
- the original loop and all the LHS' in the EPILOGUE and create worklists to
- update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs. */
+ point to the copied statements. */
for (unsigned i = 0; i < epilogue->num_nodes; ++i)
{
for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
@@ -11563,14 +10978,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
stmt_vinfo
= epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
- orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
-
- mapping.put (gimple_phi_result (orig_stmt),
- gimple_phi_result (new_stmt));
- /* PHI nodes can not have patterns or related statements. */
- gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
- && STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
}
for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]);
@@ -11584,25 +10992,12 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
stmt_vinfo
= epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
- orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
- if (tree old_lhs = gimple_get_lhs (orig_stmt))
- mapping.put (old_lhs, gimple_get_lhs (new_stmt));
-
- if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
- {
- gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
- for (gimple_stmt_iterator gsi = gsi_start (seq);
- !gsi_end_p (gsi); gsi_next (&gsi))
- stmt_worklist.safe_push (gsi_stmt (gsi));
- }
-
related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
{
gimple *stmt = STMT_VINFO_STMT (related_vinfo);
- stmt_worklist.safe_push (stmt);
/* Set BB such that the assert in
'get_initial_defs_for_reduction' is able to determine that
the BB of the related stmt is inside this loop. */
@@ -11615,33 +11010,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
}
}
- /* The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed
- using the original main loop and thus need to be updated to refer to the
- cloned variables used in the epilogue. */
- for (unsigned i = 0; i < stmt_worklist.length (); ++i)
- {
- gimple *stmt = stmt_worklist[i];
- tree *new_op;
-
- for (unsigned j = 1; j < gimple_num_ops (stmt); ++j)
- {
- tree op = gimple_op (stmt, j);
- if ((new_op = mapping.get(op)))
- gimple_set_op (stmt, j, *new_op);
- else
- {
- /* PR92429: The last argument of simplify_replace_tree disables
- folding when replacing arguments. This is required as
- otherwise you might end up with different statements than the
- ones analyzed in vect_loop_analyze, leading to different
- vectorization. */
- op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
- &find_in_mapping, &mapping, false);
- gimple_set_op (stmt, j, op);
- }
- }
- }
-
struct data_reference *dr;
vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo);
FOR_EACH_VEC_ELT (datarefs, i, dr)
@@ -11649,27 +11017,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
orig_stmt = DR_STMT (dr);
gcc_assert (gimple_uid (orig_stmt) > 0);
stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (orig_stmt) - 1];
- /* Data references for gather loads and scatter stores do not use the
- updated offset we set using ADVANCE. Instead we have to make sure the
- reference in the data references point to the corresponding copy of
- the original in the epilogue. Make sure to update both
- gather/scatters recognized by dataref analysis and also other
- refs that get_load_store_type classified as VMAT_GATHER_SCATTER. */
- auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER
- || STMT_VINFO_STRIDED_P (vstmt_vinfo)
- || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo))
- {
- /* ??? As we copy epilogues from the main loop incremental
- replacement from an already replaced DR_REF from vectorizing
- the first epilogue will fail. */
- DR_REF (dr)
- = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
- &find_in_mapping, &mapping);
- DR_BASE_ADDRESS (dr)
- = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
- &find_in_mapping, &mapping);
- }
DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
}
@@ -11679,9 +11026,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
/* Remember the advancement made. */
LOOP_VINFO_DRS_ADVANCED_BY (epilogue_vinfo) = advance;
-
- epilogue_vinfo->shared->datarefs_copy.release ();
- epilogue_vinfo->shared->save_datarefs ();
}
/* When vectorizing early break statements instructions that happen before
@@ -11787,7 +11131,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
DUMP_VECT_SCOPE ("vec_transform_loop");
- loop_vinfo->shared->check_datarefs ();
+ if (! LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ loop_vinfo->shared->check_datarefs ();
/* Use the more conservative vectorization threshold. If the number
of iterations is constant assume the cost check has been performed