aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc486
1 files changed, 68 insertions, 418 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d5044d5..8ea0f45 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -168,9 +168,8 @@ static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
may already be set for general statements (not just data refs). */
static opt_result
-vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
- bool vectype_maybe_set_p,
- poly_uint64 *vf)
+vect_determine_vectype_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
+ bool vectype_maybe_set_p)
{
gimple *stmt = stmt_info->stmt;
@@ -192,6 +191,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
if (stmt_vectype)
{
+ if (known_le (TYPE_VECTOR_SUBPARTS (stmt_vectype), 1U))
+ return opt_result::failure_at (STMT_VINFO_STMT (stmt_info),
+ "not vectorized: unsupported "
+ "data-type in %G",
+ STMT_VINFO_STMT (stmt_info));
+
if (STMT_VINFO_VECTYPE (stmt_info))
/* The only case when a vectype had been already set is for stmts
that contain a data ref, or for "pattern-stmts" (stmts generated
@@ -203,9 +208,6 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
}
- if (nunits_vectype)
- vect_update_max_nunits (vf, nunits_vectype);
-
return opt_result::success ();
}
@@ -215,13 +217,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
or false if something prevented vectorization. */
static opt_result
-vect_determine_vf_for_stmt (vec_info *vinfo,
- stmt_vec_info stmt_info, poly_uint64 *vf)
+vect_determine_vectype_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
stmt_info->stmt);
- opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
+ opt_result res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, false);
if (!res)
return res;
@@ -240,7 +241,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern def stmt: %G",
def_stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
+ res = vect_determine_vectype_for_stmt_1 (vinfo, def_stmt_info, true);
if (!res)
return res;
}
@@ -249,7 +250,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern statement: %G",
stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
+ res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, true);
if (!res)
return res;
}
@@ -257,45 +258,23 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
return opt_result::success ();
}
-/* Function vect_determine_vectorization_factor
-
- Determine the vectorization factor (VF). VF is the number of data elements
- that are operated upon in parallel in a single iteration of the vectorized
- loop. For example, when vectorizing a loop that operates on 4byte elements,
- on a target with vector size (VS) 16byte, the VF is set to 4, since 4
- elements can fit in a single vector register.
-
- We currently support vectorization of loops in which all types operated upon
- are of the same size. Therefore this function currently sets VF according to
- the size of the types operated upon, and fails if there are multiple sizes
- in the loop.
-
- VF is also the factor by which the loop iterations are strip-mined, e.g.:
- original loop:
- for (i=0; i<N; i++){
- a[i] = b[i] + c[i];
- }
+/* Function vect_set_stmts_vectype
- vectorized loop:
- for (i=0; i<N; i+=VF){
- a[i:VF] = b[i:VF] + c[i:VF];
- }
-*/
+ Set STMT_VINFO_VECTYPE of all stmts. */
static opt_result
-vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
+vect_set_stmts_vectype (loop_vec_info loop_vinfo)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
unsigned nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor = 1;
tree scalar_type = NULL_TREE;
gphi *phi;
tree vectype;
stmt_vec_info stmt_info;
unsigned i;
- DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
+ DUMP_VECT_SCOPE ("vect_set_stmts_vectype");
for (i = 0; i < nbbs; i++)
{
@@ -324,7 +303,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
scalar_type);
vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
- if (!vectype)
+ if (!vectype
+ || known_le (TYPE_VECTOR_SUBPARTS (vectype), 1U))
return opt_result::failure_at (phi,
"not vectorized: unsupported "
"data-type %T\n",
@@ -334,15 +314,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
vectype);
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
- dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
- dump_printf (MSG_NOTE, "\n");
- }
-
- vect_update_max_nunits (&vectorization_factor, vectype);
}
}
@@ -353,25 +324,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
continue;
stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
opt_result res
- = vect_determine_vf_for_stmt (loop_vinfo,
- stmt_info, &vectorization_factor);
+ = vect_determine_vectype_for_stmt (loop_vinfo, stmt_info);
if (!res)
return res;
}
}
- /* TODO: Analyze cost. Decide if worth while to vectorize. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, "\n");
- }
-
- if (known_le (vectorization_factor, 1U))
- return opt_result::failure_at (vect_location,
- "not vectorized: unsupported data-type\n");
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
return opt_result::success ();
}
@@ -2002,234 +1960,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
-/* Scan the loop stmts and dependent on whether there are any (non-)SLP
- statements update the vectorization factor. */
-
-static void
-vect_update_vf_for_slp (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor;
- int i;
-
- DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
-
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- gcc_assert (known_ne (vectorization_factor, 0U));
-
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
- the SLP instances. If that unrolling factor is 1, we say, that we
- perform pure SLP on loop - cross iteration parallelism is not
- exploited. */
- bool only_slp_in_loop = true;
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
- if (!stmt_info)
- continue;
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- if (is_gimple_debug (gsi_stmt (si)))
- continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- stmt_info = vect_stmt_to_vectorize (stmt_info);
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- }
-
- if (only_slp_in_loop)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains only SLP stmts\n");
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
- /* Both the vectorization factor and unroll factor have the form
- GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
- so they must have a common multiple. */
- vectorization_factor
- = force_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
- }
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, ".\n");
- }
-}
-
-/* Return true if STMT_INFO describes a double reduction phi and if
- the other phi in the reduction is also relevant for vectorization.
- This rejects cases such as:
-
- outer1:
- x_1 = PHI <x_3(outer2), ...>;
- ...
-
- inner:
- x_2 = ...;
- ...
-
- outer2:
- x_3 = PHI <x_2(inner)>;
-
- if nothing in x_2 or elsewhere makes x_1 relevant. */
-
-static bool
-vect_active_double_reduction_p (stmt_vec_info stmt_info)
-{
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
- return false;
-
- return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
-}
-
-/* Function vect_analyze_loop_operations.
-
- Scan the loop stmts and make sure they are all vectorizable. */
-
-static opt_result
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- int i;
- stmt_vec_info stmt_info;
-
- DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
-
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
-
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gphi *phi = si.phi ();
-
- stmt_info = loop_vinfo->lookup_stmt (phi);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
- (gimple *) phi);
- if (virtual_operand_p (gimple_phi_result (phi)))
- continue;
-
- /* ??? All of the below unconditional FAILs should be in
- done earlier after analyzing cycles, possibly when
- determining stmt relevancy? */
-
- /* Inner-loop loop-closed exit phi in outer-loop vectorization
- (i.e., a phi in the tail of the outer-loop). */
- if (! is_loop_header_bb_p (bb))
- {
- /* FORNOW: we currently don't support the case that these phis
- are not used in the outerloop (unless it is double reduction,
- i.e., this phi is vect_reduction_def), cause this case
- requires to actually do something here. */
- if (STMT_VINFO_LIVE_P (stmt_info)
- && !vect_active_double_reduction_p (stmt_info))
- return opt_result::failure_at (phi,
- "Unsupported loop-closed phi"
- " in outer-loop.\n");
-
- /* If PHI is used in the outer loop, we check that its operand
- is defined in the inner loop. */
- if (STMT_VINFO_RELEVANT_P (stmt_info))
- {
- tree phi_op;
-
- if (gimple_phi_num_args (phi) != 1)
- return opt_result::failure_at (phi, "unsupported phi");
-
- phi_op = PHI_ARG_DEF (phi, 0);
- stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
- if (!op_def_info)
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
- && (STMT_VINFO_RELEVANT (op_def_info)
- != vect_used_in_outer_by_reduction))
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
- || (STMT_VINFO_DEF_TYPE (stmt_info)
- == vect_double_reduction_def))
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi, "unsupported phi\n");
- }
-
- continue;
- }
-
- gcc_assert (stmt_info);
-
- if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
- || STMT_VINFO_LIVE_P (stmt_info))
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
- /* A scalar-dependence cycle that we don't support. */
- return opt_result::failure_at (phi,
- "not vectorized:"
- " scalar dependence cycle.\n");
-
- if (STMT_VINFO_RELEVANT_P (stmt_info)
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi,
- "not vectorized: relevant phi not "
- "supported: %G",
- static_cast <gimple *> (phi));
- }
-
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gimple *stmt = gsi_stmt (si);
- if (!gimple_clobber_p (stmt)
- && !is_gimple_debug (stmt))
- {
- bool need_to_vectorize = false;
- opt_result res
- = vect_analyze_stmt (loop_vinfo,
- loop_vinfo->lookup_stmt (stmt),
- &need_to_vectorize,
- NULL, NULL, NULL);
- if (!res)
- return res;
- }
- }
- } /* bbs */
-
- return opt_result::success ();
-}
-
/* Return true if we know that the iteration count is smaller than the
vectorization factor. Return false if it isn't, or if we can't be sure
either way. */
@@ -2530,78 +2260,6 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
return opt_result::success ();
}
-/* Look for SLP-only access groups and turn each individual access into its own
- group. */
-static void
-vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
-{
- unsigned int i;
- struct data_reference *dr;
-
- DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
-
- vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- gcc_assert (DR_REF (dr));
- stmt_vec_info stmt_info
- = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (DR_STMT (dr)));
-
- /* Check if the load is a part of an interleaving chain. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
- dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element);
- unsigned int group_size = DR_GROUP_SIZE (first_element);
-
- /* Check if SLP-only groups. */
- if (!STMT_SLP_TYPE (stmt_info)
- && STMT_VINFO_SLP_VECT_ONLY (first_element))
- {
- /* Dissolve the group. */
- STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
-
- stmt_vec_info vinfo = first_element;
- while (vinfo)
- {
- stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
- DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
- DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- DR_GROUP_SIZE (vinfo) = 1;
- if (STMT_VINFO_STRIDED_P (first_element)
- /* We cannot handle stores with gaps. */
- || DR_IS_WRITE (dr_info->dr))
- {
- STMT_VINFO_STRIDED_P (vinfo) = true;
- DR_GROUP_GAP (vinfo) = 0;
- }
- else
- DR_GROUP_GAP (vinfo) = group_size - 1;
- /* Duplicate and adjust alignment info, it needs to
- be present on each group leader, see dr_misalignment. */
- if (vinfo != first_element)
- {
- dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo);
- dr_info2->target_alignment = dr_info->target_alignment;
- int misalignment = dr_info->misalignment;
- if (misalignment != DR_MISALIGNMENT_UNKNOWN)
- {
- HOST_WIDE_INT diff
- = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))
- - TREE_INT_CST_LOW (DR_INIT (dr_info->dr)));
- unsigned HOST_WIDE_INT align_c
- = dr_info->target_alignment.to_constant ();
- misalignment = (misalignment + diff) % align_c;
- }
- dr_info2->misalignment = misalignment;
- }
- vinfo = next;
- }
- }
- }
- }
-}
-
/* Determine if operating on full vectors for LOOP_VINFO might leave
some scalar iterations still to do. If so, decide how we should
handle those scalar iterations. The possibilities are:
@@ -2839,19 +2497,18 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
return opt_result::failure_at (vect_location, "bad data dependence.\n");
LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
- ok = vect_determine_vectorization_factor (loop_vinfo);
+ ok = vect_set_stmts_vectype (loop_vinfo);
if (!ok)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine vectorization factor.\n");
+ "cannot determine vector types.\n");
return ok;
}
/* Compute the scalar iteration cost. */
vect_compute_single_scalar_iteration_cost (loop_vinfo);
- poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool saved_can_use_partial_vectors_p
= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
@@ -2867,21 +2524,29 @@ start_over:
return ok;
/* If there are any SLP instances mark them as pure_slp. */
- if (vect_make_slp_decision (loop_vinfo))
- {
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
-
- /* Update the vectorization factor based on the SLP decision. */
- vect_update_vf_for_slp (loop_vinfo);
+ if (!vect_make_slp_decision (loop_vinfo))
+ return opt_result::failure_at (vect_location, "no stmts to vectorize.\n");
- /* Optimize the SLP graph with the vectorization factor fixed. */
- vect_optimize_slp (loop_vinfo);
+ /* Find stmts that need to be both vectorized and SLPed. */
+ if (!vect_detect_hybrid_slp (loop_vinfo))
+ return opt_result::failure_at (vect_location, "needs non-SLP handling\n");
- /* Gather the loads reachable from the SLP graph entries. */
- vect_gather_slp_loads (loop_vinfo);
+ /* Determine the vectorization factor from the SLP decision. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
+ dump_dec (MSG_NOTE, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ dump_printf (MSG_NOTE, "\n");
}
+ /* Optimize the SLP graph with the vectorization factor fixed. */
+ vect_optimize_slp (loop_vinfo);
+
+ /* Gather the loads reachable from the SLP graph entries. */
+ vect_gather_slp_loads (loop_vinfo);
+
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
@@ -2950,19 +2615,6 @@ start_over:
goto again;
}
- /* Dissolve SLP-only groups. */
- vect_dissolve_slp_only_groups (loop_vinfo);
-
- /* Scan all the remaining operations in the loop that we did not catch
- during SLP build and make sure we fail. */
- ok = vect_analyze_loop_operations (loop_vinfo);
- if (!ok)
- {
- ok = opt_result::failure_at (vect_location,
- "bad operation or unsupported loop bound\n");
- goto again;
- }
-
/* For now, we don't expect to mix both masking and length approaches for one
loop, disable it if both are recorded. */
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
@@ -3272,8 +2924,8 @@ again:
dump_printf_loc (MSG_NOTE, vect_location,
"re-trying with single-lane SLP\n");
- /* Restore vectorization factor as it were without SLP. */
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
+ /* Reset the vectorization factor. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = 0;
/* Free the SLP instances. */
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
vect_free_slp_instance (instance);
@@ -5349,7 +5001,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
static void
vect_model_reduction_cost (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, internal_fn reduc_fn,
+ slp_tree node, internal_fn reduc_fn,
vect_reduction_type reduction_type,
int ncopies, stmt_vector_for_cost *cost_vec)
{
@@ -5365,9 +5017,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduction_type == COND_REDUCTION)
ncopies *= 2;
- vectype = STMT_VINFO_VECTYPE (stmt_info);
+ vectype = SLP_TREE_VECTYPE (node);
mode = TYPE_MODE (vectype);
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+ stmt_vec_info orig_stmt_info
+ = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
gimple_match_op op;
if (!gimple_extract_op (orig_stmt_info->stmt, &op))
@@ -5385,16 +5038,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduc_fn != IFN_LAST)
/* Count one reduction-like operation per vector. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
- stmt_info, 0, vect_body);
+ node, 0, vect_body);
else
{
/* Use NELEMENTS extracts and NELEMENTS scalar ops. */
unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
inside_cost = record_stmt_cost (cost_vec, nelements,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_body);
inside_cost += record_stmt_cost (cost_vec, nelements,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_body);
}
}
@@ -5411,7 +5064,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* We need the initial reduction value. */
prologue_stmts = 1;
prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_prologue);
}
@@ -5428,24 +5081,24 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
{
/* An EQ stmt and an COND_EXPR stmt. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
/* Reduction of the max index and a reduction of the found
values. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* A broadcast of the max value. */
epilogue_cost += record_stmt_cost (cost_vec, 1,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_epilogue);
}
else
{
epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_epilogue);
+ node, 0, vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
}
@@ -5455,12 +5108,12 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* Extraction of scalar elements. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits - 3,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_epilogue);
}
else if (reduction_type == EXTRACT_LAST_REDUCTION
@@ -5486,10 +5139,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
Also requires scalar extract. */
epilogue_cost += record_stmt_cost (cost_vec,
exact_log2 (nelements) * 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
else
@@ -5497,7 +5150,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
elements, we have N extracts and N-1 reduction ops. */
epilogue_cost += record_stmt_cost (cost_vec,
nelements + nelements - 1,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
}
}
@@ -7725,23 +7378,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (lane_reducing_op_p (op.code))
{
- enum vect_def_type dt;
- tree vectype_op;
-
/* The last operand of lane-reducing operation is for
reduction. */
gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1);
- if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op))
- return false;
-
+ slp_tree op_node = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
+ tree vectype_op = SLP_TREE_VECTYPE (op_node);
tree type_op = TREE_TYPE (op.ops[0]);
-
if (!vectype_op)
{
vectype_op = get_vectype_for_scalar_type (loop_vinfo,
type_op);
- if (!vectype_op)
+ if (!vectype_op
+ || !vect_maybe_update_slp_op_vectype (op_node,
+ vectype_op))
return false;
}
@@ -7822,7 +7472,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
inside the loop body. The last operand is the reduction variable,
which is defined by the loop-header-phi. */
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
@@ -8425,7 +8075,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
- vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
+ vect_model_reduction_cost (loop_vinfo, slp_for_stmt_info, reduc_fn,
reduction_type, ncopies, cost_vec);
/* Cost the reduction op inside the loop if transformed via
vect_transform_reduction for non-lane-reducing operation. Otherwise
@@ -11489,7 +11139,7 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
profile_count entry_count = loop_preheader_edge (loop)->count ();
/* If we have unreliable loop profile avoid dropping entry
- count bellow header count. This can happen since loops
+ count below header count. This can happen since loops
has unrealistically low trip counts. */
while (vf > 1
&& loop->header->count > entry_count