aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc1665
1 files changed, 534 insertions, 1131 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index fe6f3cf..56f80db 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "case-cfn-macros.h"
#include "langhooks.h"
+#include "opts.h"
/* Loop Vectorization Pass.
@@ -167,9 +168,8 @@ static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
may already be set for general statements (not just data refs). */
static opt_result
-vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
- bool vectype_maybe_set_p,
- poly_uint64 *vf)
+vect_determine_vectype_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
+ bool vectype_maybe_set_p)
{
gimple *stmt = stmt_info->stmt;
@@ -191,6 +191,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
if (stmt_vectype)
{
+ if (known_le (TYPE_VECTOR_SUBPARTS (stmt_vectype), 1U))
+ return opt_result::failure_at (STMT_VINFO_STMT (stmt_info),
+ "not vectorized: unsupported "
+ "data-type in %G",
+ STMT_VINFO_STMT (stmt_info));
+
if (STMT_VINFO_VECTYPE (stmt_info))
/* The only case when a vectype had been already set is for stmts
that contain a data ref, or for "pattern-stmts" (stmts generated
@@ -202,9 +208,6 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
}
- if (nunits_vectype)
- vect_update_max_nunits (vf, nunits_vectype);
-
return opt_result::success ();
}
@@ -214,13 +217,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
or false if something prevented vectorization. */
static opt_result
-vect_determine_vf_for_stmt (vec_info *vinfo,
- stmt_vec_info stmt_info, poly_uint64 *vf)
+vect_determine_vectype_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
stmt_info->stmt);
- opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf);
+ opt_result res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, false);
if (!res)
return res;
@@ -239,7 +241,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern def stmt: %G",
def_stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf);
+ res = vect_determine_vectype_for_stmt_1 (vinfo, def_stmt_info, true);
if (!res)
return res;
}
@@ -248,7 +250,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
"==> examining pattern statement: %G",
stmt_info->stmt);
- res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf);
+ res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, true);
if (!res)
return res;
}
@@ -256,45 +258,23 @@ vect_determine_vf_for_stmt (vec_info *vinfo,
return opt_result::success ();
}
-/* Function vect_determine_vectorization_factor
-
- Determine the vectorization factor (VF). VF is the number of data elements
- that are operated upon in parallel in a single iteration of the vectorized
- loop. For example, when vectorizing a loop that operates on 4byte elements,
- on a target with vector size (VS) 16byte, the VF is set to 4, since 4
- elements can fit in a single vector register.
-
- We currently support vectorization of loops in which all types operated upon
- are of the same size. Therefore this function currently sets VF according to
- the size of the types operated upon, and fails if there are multiple sizes
- in the loop.
-
- VF is also the factor by which the loop iterations are strip-mined, e.g.:
- original loop:
- for (i=0; i<N; i++){
- a[i] = b[i] + c[i];
- }
+/* Function vect_set_stmts_vectype
- vectorized loop:
- for (i=0; i<N; i+=VF){
- a[i:VF] = b[i:VF] + c[i:VF];
- }
-*/
+ Set STMT_VINFO_VECTYPE of all stmts. */
static opt_result
-vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
+vect_set_stmts_vectype (loop_vec_info loop_vinfo)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
unsigned nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor = 1;
tree scalar_type = NULL_TREE;
gphi *phi;
tree vectype;
stmt_vec_info stmt_info;
unsigned i;
- DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
+ DUMP_VECT_SCOPE ("vect_set_stmts_vectype");
for (i = 0; i < nbbs; i++)
{
@@ -323,7 +303,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
scalar_type);
vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
- if (!vectype)
+ if (!vectype
+ || known_le (TYPE_VECTOR_SUBPARTS (vectype), 1U))
return opt_result::failure_at (phi,
"not vectorized: unsupported "
"data-type %T\n",
@@ -333,15 +314,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
vectype);
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
- dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
- dump_printf (MSG_NOTE, "\n");
- }
-
- vect_update_max_nunits (&vectorization_factor, vectype);
}
}
@@ -352,25 +324,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
continue;
stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
opt_result res
- = vect_determine_vf_for_stmt (loop_vinfo,
- stmt_info, &vectorization_factor);
+ = vect_determine_vectype_for_stmt (loop_vinfo, stmt_info);
if (!res)
return res;
}
}
- /* TODO: Analyze cost. Decide if worth while to vectorize. */
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, "\n");
- }
-
- if (known_le (vectorization_factor, 1U))
- return opt_result::failure_at (vect_location,
- "not vectorized: unsupported data-type\n");
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
return opt_result::success ();
}
@@ -1069,10 +1028,12 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
using_decrementing_iv_p (false),
using_select_vl_p (false),
epil_using_partial_vectors_p (false),
+ allow_mutual_alignment (false),
partial_load_store_bias (0),
peeling_for_gaps (false),
peeling_for_niter (false),
early_breaks (false),
+ user_unroll (false),
no_data_dependencies (false),
has_mask_store (false),
scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -1999,234 +1960,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
-/* Scan the loop stmts and dependent on whether there are any (non-)SLP
- statements update the vectorization factor. */
-
-static void
-vect_update_vf_for_slp (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- poly_uint64 vectorization_factor;
- int i;
-
- DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
-
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- gcc_assert (known_ne (vectorization_factor, 0U));
-
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
- the SLP instances. If that unrolling factor is 1, we say, that we
- perform pure SLP on loop - cross iteration parallelism is not
- exploited. */
- bool only_slp_in_loop = true;
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
- if (!stmt_info)
- continue;
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- if (is_gimple_debug (gsi_stmt (si)))
- continue;
- stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
- stmt_info = vect_stmt_to_vectorize (stmt_info);
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- }
-
- if (only_slp_in_loop)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains only SLP stmts\n");
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- }
- else
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Loop contains SLP and non-SLP stmts\n");
- /* Both the vectorization factor and unroll factor have the form
- GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
- so they must have a common multiple. */
- vectorization_factor
- = force_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
- }
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to ");
- dump_dec (MSG_NOTE, vectorization_factor);
- dump_printf (MSG_NOTE, ".\n");
- }
-}
-
-/* Return true if STMT_INFO describes a double reduction phi and if
- the other phi in the reduction is also relevant for vectorization.
- This rejects cases such as:
-
- outer1:
- x_1 = PHI <x_3(outer2), ...>;
- ...
-
- inner:
- x_2 = ...;
- ...
-
- outer2:
- x_3 = PHI <x_2(inner)>;
-
- if nothing in x_2 or elsewhere makes x_1 relevant. */
-
-static bool
-vect_active_double_reduction_p (stmt_vec_info stmt_info)
-{
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
- return false;
-
- return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
-}
-
-/* Function vect_analyze_loop_operations.
-
- Scan the loop stmts and make sure they are all vectorizable. */
-
-static opt_result
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
- int i;
- stmt_vec_info stmt_info;
-
- DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
-
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
-
- for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gphi *phi = si.phi ();
-
- stmt_info = loop_vinfo->lookup_stmt (phi);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
- (gimple *) phi);
- if (virtual_operand_p (gimple_phi_result (phi)))
- continue;
-
- /* ??? All of the below unconditional FAILs should be in
- done earlier after analyzing cycles, possibly when
- determining stmt relevancy? */
-
- /* Inner-loop loop-closed exit phi in outer-loop vectorization
- (i.e., a phi in the tail of the outer-loop). */
- if (! is_loop_header_bb_p (bb))
- {
- /* FORNOW: we currently don't support the case that these phis
- are not used in the outerloop (unless it is double reduction,
- i.e., this phi is vect_reduction_def), cause this case
- requires to actually do something here. */
- if (STMT_VINFO_LIVE_P (stmt_info)
- && !vect_active_double_reduction_p (stmt_info))
- return opt_result::failure_at (phi,
- "Unsupported loop-closed phi"
- " in outer-loop.\n");
-
- /* If PHI is used in the outer loop, we check that its operand
- is defined in the inner loop. */
- if (STMT_VINFO_RELEVANT_P (stmt_info))
- {
- tree phi_op;
-
- if (gimple_phi_num_args (phi) != 1)
- return opt_result::failure_at (phi, "unsupported phi");
-
- phi_op = PHI_ARG_DEF (phi, 0);
- stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
- if (!op_def_info)
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
- && (STMT_VINFO_RELEVANT (op_def_info)
- != vect_used_in_outer_by_reduction))
- return opt_result::failure_at (phi, "unsupported phi\n");
-
- if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
- || (STMT_VINFO_DEF_TYPE (stmt_info)
- == vect_double_reduction_def))
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi, "unsupported phi\n");
- }
-
- continue;
- }
-
- gcc_assert (stmt_info);
-
- if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
- || STMT_VINFO_LIVE_P (stmt_info))
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
- /* A scalar-dependence cycle that we don't support. */
- return opt_result::failure_at (phi,
- "not vectorized:"
- " scalar dependence cycle.\n");
-
- if (STMT_VINFO_RELEVANT_P (stmt_info)
- && ! PURE_SLP_STMT (stmt_info))
- return opt_result::failure_at (phi,
- "not vectorized: relevant phi not "
- "supported: %G",
- static_cast <gimple *> (phi));
- }
-
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gimple *stmt = gsi_stmt (si);
- if (!gimple_clobber_p (stmt)
- && !is_gimple_debug (stmt))
- {
- bool need_to_vectorize = false;
- opt_result res
- = vect_analyze_stmt (loop_vinfo,
- loop_vinfo->lookup_stmt (stmt),
- &need_to_vectorize,
- NULL, NULL, NULL);
- if (!res)
- return res;
- }
- }
- } /* bbs */
-
- return opt_result::success ();
-}
-
/* Return true if we know that the iteration count is smaller than the
vectorization factor. Return false if it isn't, or if we can't be sure
either way. */
@@ -2527,78 +2260,6 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
return opt_result::success ();
}
-/* Look for SLP-only access groups and turn each individual access into its own
- group. */
-static void
-vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
-{
- unsigned int i;
- struct data_reference *dr;
-
- DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
-
- vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
- FOR_EACH_VEC_ELT (datarefs, i, dr)
- {
- gcc_assert (DR_REF (dr));
- stmt_vec_info stmt_info
- = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (DR_STMT (dr)));
-
- /* Check if the load is a part of an interleaving chain. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- {
- stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
- dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element);
- unsigned int group_size = DR_GROUP_SIZE (first_element);
-
- /* Check if SLP-only groups. */
- if (!STMT_SLP_TYPE (stmt_info)
- && STMT_VINFO_SLP_VECT_ONLY (first_element))
- {
- /* Dissolve the group. */
- STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
-
- stmt_vec_info vinfo = first_element;
- while (vinfo)
- {
- stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
- DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
- DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
- DR_GROUP_SIZE (vinfo) = 1;
- if (STMT_VINFO_STRIDED_P (first_element)
- /* We cannot handle stores with gaps. */
- || DR_IS_WRITE (dr_info->dr))
- {
- STMT_VINFO_STRIDED_P (vinfo) = true;
- DR_GROUP_GAP (vinfo) = 0;
- }
- else
- DR_GROUP_GAP (vinfo) = group_size - 1;
- /* Duplicate and adjust alignment info, it needs to
- be present on each group leader, see dr_misalignment. */
- if (vinfo != first_element)
- {
- dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo);
- dr_info2->target_alignment = dr_info->target_alignment;
- int misalignment = dr_info->misalignment;
- if (misalignment != DR_MISALIGNMENT_UNKNOWN)
- {
- HOST_WIDE_INT diff
- = (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))
- - TREE_INT_CST_LOW (DR_INIT (dr_info->dr)));
- unsigned HOST_WIDE_INT align_c
- = dr_info->target_alignment.to_constant ();
- misalignment = (misalignment + diff) % align_c;
- }
- dr_info2->misalignment = misalignment;
- }
- vinfo = next;
- }
- }
- }
- }
-}
-
/* Determine if operating on full vectors for LOOP_VINFO might leave
some scalar iterations still to do. If so, decide how we should
handle those scalar iterations. The possibilities are:
@@ -2836,19 +2497,18 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
return opt_result::failure_at (vect_location, "bad data dependence.\n");
LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
- ok = vect_determine_vectorization_factor (loop_vinfo);
+ ok = vect_set_stmts_vectype (loop_vinfo);
if (!ok)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't determine vectorization factor.\n");
+ "cannot determine vector types.\n");
return ok;
}
/* Compute the scalar iteration cost. */
vect_compute_single_scalar_iteration_cost (loop_vinfo);
- poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool saved_can_use_partial_vectors_p
= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
@@ -2864,21 +2524,29 @@ start_over:
return ok;
/* If there are any SLP instances mark them as pure_slp. */
- if (vect_make_slp_decision (loop_vinfo))
- {
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
+ if (!vect_make_slp_decision (loop_vinfo))
+ return opt_result::failure_at (vect_location, "no stmts to vectorize.\n");
- /* Update the vectorization factor based on the SLP decision. */
- vect_update_vf_for_slp (loop_vinfo);
+ /* Find stmts that need to be both vectorized and SLPed. */
+ if (!vect_detect_hybrid_slp (loop_vinfo))
+ return opt_result::failure_at (vect_location, "needs non-SLP handling\n");
- /* Optimize the SLP graph with the vectorization factor fixed. */
- vect_optimize_slp (loop_vinfo);
-
- /* Gather the loads reachable from the SLP graph entries. */
- vect_gather_slp_loads (loop_vinfo);
+ /* Determine the vectorization factor from the SLP decision. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
+ dump_dec (MSG_NOTE, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ dump_printf (MSG_NOTE, "\n");
}
+ /* Optimize the SLP graph with the vectorization factor fixed. */
+ vect_optimize_slp (loop_vinfo);
+
+ /* Gather the loads reachable from the SLP graph entries. */
+ vect_gather_slp_loads (loop_vinfo);
+
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
@@ -2947,19 +2615,6 @@ start_over:
goto again;
}
- /* Dissolve SLP-only groups. */
- vect_dissolve_slp_only_groups (loop_vinfo);
-
- /* Scan all the remaining operations in the loop that we did not catch
- during SLP build and make sure we fail. */
- ok = vect_analyze_loop_operations (loop_vinfo);
- if (!ok)
- {
- ok = opt_result::failure_at (vect_location,
- "bad operation or unsupported loop bound\n");
- goto again;
- }
-
/* For now, we don't expect to mix both masking and length approaches for one
loop, disable it if both are recorded. */
if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
@@ -3269,8 +2924,8 @@ again:
dump_printf_loc (MSG_NOTE, vect_location,
"re-trying with single-lane SLP\n");
- /* Restore vectorization factor as it were without SLP. */
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
+ /* Reset the vectorization factor. */
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = 0;
/* Free the SLP instances. */
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
vect_free_slp_instance (instance);
@@ -3398,8 +3053,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
}
/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is
- not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
- MODE_I to the next mode useful to analyze.
+ not NULL. When MASKED_P is not -1 override the default
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it.
+ Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next
+ mode useful to analyze.
Return the loop_vinfo on success and wrapped null on failure. */
static opt_loop_vec_info
@@ -3407,6 +3064,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
const vect_loop_form_info *loop_form_info,
loop_vec_info orig_loop_vinfo,
const vector_modes &vector_modes, unsigned &mode_i,
+ int masked_p,
machine_mode &autodetected_vector_mode,
bool &fatal)
{
@@ -3415,6 +3073,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
machine_mode vector_mode = vector_modes[mode_i];
loop_vinfo->vector_mode = vector_mode;
+ if (masked_p != -1)
+ loop_vinfo->can_use_partial_vectors_p = masked_p;
unsigned int suggested_unroll_factor = 1;
unsigned slp_done_for_suggested_uf = 0;
@@ -3428,27 +3088,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
res ? "succeeded" : "failed",
GET_MODE_NAME (loop_vinfo->vector_mode));
- if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1)
+ auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+ if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ /* Check to see if the user wants to unroll or if the target wants to. */
+ && (suggested_unroll_factor > 1 || user_unroll > 1))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ if (suggested_unroll_factor == 1)
+ {
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
+ suggested_unroll_factor = user_unroll / assumed_vf;
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "setting unroll factor to %d based on user requested "
+ "unroll factor %d and suggested vectorization "
+ "factor: %d\n",
+ suggested_unroll_factor, user_unroll, assumed_vf);
+ }
+ }
+
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"***** Re-trying analysis for unrolling"
" with unroll factor %d and slp %s.\n",
suggested_unroll_factor,
slp_done_for_suggested_uf ? "on" : "off");
- loop_vec_info unroll_vinfo
- = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
- unroll_vinfo->vector_mode = vector_mode;
- unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
- opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
- slp_done_for_suggested_uf);
- if (new_res)
- {
- delete loop_vinfo;
- loop_vinfo = unroll_vinfo;
- }
- else
- delete unroll_vinfo;
+ loop_vec_info unroll_vinfo
+ = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
+ unroll_vinfo->vector_mode = vector_mode;
+ unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
+ opt_result new_res
+ = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
+ slp_done_for_suggested_uf);
+ if (new_res)
+ {
+ delete loop_vinfo;
+ loop_vinfo = unroll_vinfo;
+ LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1;
+ }
+ else
+ delete unroll_vinfo;
+ }
}
/* Remember the autodetected vector mode. */
@@ -3469,13 +3152,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
mode_i += 1;
}
if (mode_i + 1 < vector_modes.length ()
- && VECTOR_MODE_P (autodetected_vector_mode)
- && (related_vector_mode (vector_modes[mode_i + 1],
- GET_MODE_INNER (autodetected_vector_mode))
- == autodetected_vector_mode)
- && (related_vector_mode (autodetected_vector_mode,
- GET_MODE_INNER (vector_modes[mode_i + 1]))
- == vector_modes[mode_i + 1]))
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i + 1]))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -3580,7 +3258,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
cached_vf_per_mode[last_mode_i] = -1;
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
- NULL, vector_modes, mode_i,
+ NULL, vector_modes, mode_i, -1,
autodetected_vector_mode, fatal);
if (fatal)
break;
@@ -3665,24 +3343,38 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
array may contain length-agnostic and length-specific modes. Their
ordering is not guaranteed, so we could end up picking a mode for the main
loop that is after the epilogue's optimal mode. */
+ int masked_p = -1;
if (!unlimited_cost_model (loop)
- && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != VOIDmode)
+ && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
+ != VOIDmode))
{
vector_modes[0]
- = first_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+ = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
cached_vf_per_mode[0] = 0;
}
else
vector_modes[0] = autodetected_vector_mode;
mode_i = 0;
- bool supports_partial_vectors =
- partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
+ bool supports_partial_vectors = (param_vect_partial_vector_usage != 0
+ || masked_p == 1);
+ machine_mode mask_mode;
+ if (supports_partial_vectors
+ && !partial_vectors_supported_p ()
+ && !(VECTOR_MODE_P (first_loop_vinfo->vector_mode)
+ && targetm.vectorize.get_mask_mode
+ (first_loop_vinfo->vector_mode).exists (&mask_mode)
+ && SCALAR_INT_MODE_P (mask_mode)))
+ supports_partial_vectors = false;
poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
loop_vec_info orig_loop_vinfo = first_loop_vinfo;
do
{
+ /* Let the user override what the target suggests. */
+ if (OPTION_SET_P (param_vect_partial_vector_usage))
+ masked_p = -1;
+
while (1)
{
/* If the target does not support partial vectors we can shorten the
@@ -3697,6 +3389,22 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
break;
continue;
}
+ /* We would need an exhaustive search to find all modes we
+ skipped but that would lead to the same result as the
+ analysis it was skipped for and where we'd could check
+ cached_vf_per_mode against.
+ Check for the autodetected mode, which is the common
+ situation on x86 which does not perform cost comparison. */
+ if (!supports_partial_vectors
+ && maybe_ge (cached_vf_per_mode[0], first_vinfo_vf)
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i]))
+ {
+ mode_i++;
+ if (mode_i == vector_modes.length ())
+ break;
+ continue;
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -3707,7 +3415,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
orig_loop_vinfo,
- vector_modes, mode_i,
+ vector_modes, mode_i, masked_p,
autodetected_vector_mode, fatal);
if (fatal)
break;
@@ -3738,6 +3446,9 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
break;
}
+ /* Revert back to the default from the suggested prefered
+ epilogue vectorization mode. */
+ masked_p = -1;
if (mode_i == vector_modes.length ())
break;
}
@@ -3748,12 +3459,14 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
/* When we selected a first vectorized epilogue, see if the target
suggests to have another one. */
+ masked_p = -1;
if (!unlimited_cost_model (loop)
- && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode ()
+ && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo)
+ && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p)
!= VOIDmode))
{
vector_modes[0]
- = orig_loop_vinfo->vector_costs->suggested_epilogue_mode ();
+ = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p);
cached_vf_per_mode[0] = 0;
mode_i = 0;
}
@@ -4101,6 +3814,10 @@ pop:
if (op.ops[2] == op.ops[opi])
neg = ! neg;
}
+ /* For an FMA the reduction code is the PLUS if the addition chain
+ is the reduction. */
+ else if (op.code == IFN_FMA && opi == 2)
+ op.code = PLUS_EXPR;
if (CONVERT_EXPR_CODE_P (op.code)
&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
;
@@ -4646,7 +4363,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
TODO: Consider assigning different costs to different scalar
statements. */
- scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
+ scalar_single_iter_cost = (loop_vinfo->scalar_costs->total_cost ()
+ * param_vect_scalar_cost_multiplier) / 100;
/* Add additional cost for the peeled instructions in prologue and epilogue
loop. (For fully-masked loops there will be no peeling.)
@@ -5283,7 +5001,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
static void
vect_model_reduction_cost (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, internal_fn reduc_fn,
+ slp_tree node, internal_fn reduc_fn,
vect_reduction_type reduction_type,
int ncopies, stmt_vector_for_cost *cost_vec)
{
@@ -5299,9 +5017,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduction_type == COND_REDUCTION)
ncopies *= 2;
- vectype = STMT_VINFO_VECTYPE (stmt_info);
+ vectype = SLP_TREE_VECTYPE (node);
mode = TYPE_MODE (vectype);
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+ stmt_vec_info orig_stmt_info
+ = vect_orig_stmt (SLP_TREE_REPRESENTATIVE (node));
gimple_match_op op;
if (!gimple_extract_op (orig_stmt_info->stmt, &op))
@@ -5319,16 +5038,16 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
if (reduc_fn != IFN_LAST)
/* Count one reduction-like operation per vector. */
inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
- stmt_info, 0, vect_body);
+ node, 0, vect_body);
else
{
/* Use NELEMENTS extracts and NELEMENTS scalar ops. */
unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
inside_cost = record_stmt_cost (cost_vec, nelements,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_body);
inside_cost += record_stmt_cost (cost_vec, nelements,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_body);
}
}
@@ -5345,7 +5064,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* We need the initial reduction value. */
prologue_stmts = 1;
prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_prologue);
}
@@ -5362,24 +5081,24 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
{
/* An EQ stmt and an COND_EXPR stmt. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
/* Reduction of the max index and a reduction of the found
values. */
epilogue_cost += record_stmt_cost (cost_vec, 2,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* A broadcast of the max value. */
epilogue_cost += record_stmt_cost (cost_vec, 1,
- scalar_to_vec, stmt_info, 0,
+ scalar_to_vec, node, 0,
vect_epilogue);
}
else
{
epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
- stmt_info, 0, vect_epilogue);
+ node, 0, vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
}
@@ -5389,12 +5108,12 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
/* Extraction of scalar elements. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
epilogue_cost += record_stmt_cost (cost_vec,
2 * estimated_nunits - 3,
- scalar_stmt, stmt_info, 0,
+ scalar_stmt, node, 0,
vect_epilogue);
}
else if (reduction_type == EXTRACT_LAST_REDUCTION
@@ -5420,10 +5139,10 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
Also requires scalar extract. */
epilogue_cost += record_stmt_cost (cost_vec,
exact_log2 (nelements) * 2,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
epilogue_cost += record_stmt_cost (cost_vec, 1,
- vec_to_scalar, stmt_info, 0,
+ vec_to_scalar, node, 0,
vect_epilogue);
}
else
@@ -5431,7 +5150,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
elements, we have N extracts and N-1 reduction ops. */
epilogue_cost += record_stmt_cost (cost_vec,
nelements + nelements - 1,
- vector_stmt, stmt_info, 0,
+ vector_stmt, node, 0,
vect_epilogue);
}
}
@@ -6016,7 +5735,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* Create an induction variable. */
gimple_stmt_iterator incr_gsi;
bool insert_after;
- vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after);
+ vect_iv_increment_position (LOOP_VINFO_IV_EXIT (loop_vinfo),
+ &incr_gsi, &insert_after);
create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi,
insert_after, &indx_before_incr, &indx_after_incr);
@@ -7658,23 +7378,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (lane_reducing_op_p (op.code))
{
- enum vect_def_type dt;
- tree vectype_op;
-
/* The last operand of lane-reducing operation is for
reduction. */
gcc_assert (reduc_idx > 0 && reduc_idx == (int) op.num_ops - 1);
- if (!vect_is_simple_use (op.ops[0], loop_vinfo, &dt, &vectype_op))
- return false;
-
+ slp_tree op_node = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
+ tree vectype_op = SLP_TREE_VECTYPE (op_node);
tree type_op = TREE_TYPE (op.ops[0]);
-
if (!vectype_op)
{
vectype_op = get_vectype_for_scalar_type (loop_vinfo,
type_op);
- if (!vectype_op)
+ if (!vectype_op
+ || !vect_maybe_update_slp_op_vectype (op_node,
+ vectype_op))
return false;
}
@@ -7755,7 +7472,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
inside the loop body. The last operand is the reduction variable,
which is defined by the loop-header-phi. */
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
@@ -8043,6 +7760,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"in-order reduction chain without SLP.\n");
return false;
}
+ /* Code generation doesn't support function calls other
+ than .COND_*. */
+ if (!op.code.is_tree_code ()
+ && !(op.code.is_internal_fn ()
+ && conditional_internal_fn_code (internal_fn (op.code))
+ != ERROR_MARK))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "in-order reduction chain operation not "
+ "supported.\n");
+ return false;
+ }
STMT_VINFO_REDUC_TYPE (reduc_info)
= reduction_type = FOLD_LEFT_REDUCTION;
}
@@ -8345,7 +8075,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
- vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
+ vect_model_reduction_cost (loop_vinfo, slp_for_stmt_info, reduc_fn,
reduction_type, ncopies, cost_vec);
/* Cost the reduction op inside the loop if transformed via
vect_transform_reduction for non-lane-reducing operation. Otherwise
@@ -9698,7 +9428,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
enum vect_induction_op_type induction_type
= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
@@ -9723,7 +9453,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
/* TODO: Support multi-lane SLP for nonlinear iv. There should be separate
vector iv update for each iv and a permutation to generate wanted
vector iv. */
- if (slp_node && SLP_TREE_LANES (slp_node) > 1)
+ if (SLP_TREE_LANES (slp_node) > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9934,13 +9664,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
- if (slp_node)
- slp_node->push_vec_def (induction_phi);
- else
- {
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
- *vec_stmt = induction_phi;
- }
+ slp_node->push_vec_def (induction_phi);
/* In case that vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
@@ -9970,10 +9694,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
induction_type);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
new_stmt = SSA_NAME_DEF_STMT (vec_def);
- if (slp_node)
- slp_node->push_vec_def (new_stmt);
- else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ slp_node->push_vec_def (new_stmt);
}
}
@@ -9999,15 +9720,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
stmt_vector_for_cost *cost_vec)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- unsigned ncopies;
bool nested_in_vect_loop = false;
class loop *iv_loop;
tree vec_def;
edge pe = loop_preheader_edge (loop);
basic_block new_bb;
- tree new_vec, vec_init = NULL_TREE, vec_step, t;
+ tree vec_init = NULL_TREE, vec_step, t;
tree new_name;
- gimple *new_stmt;
gphi *induction_phi;
tree induc_def, vec_dest;
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -10034,15 +9753,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
vec_stmt, slp_node, cost_vec);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- if (slp_node)
- ncopies = 1;
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
- gcc_assert (ncopies >= 1);
-
/* FORNOW. These restrictions should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt_info))
{
@@ -10052,14 +9765,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
edge latch_e;
tree loop_arg;
- if (ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in nested loop.\n");
- return false;
- }
-
exit_phi = NULL;
latch_e = loop_latch_edge (loop->inner);
loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
@@ -10096,7 +9801,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
iv_loop = loop;
gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
- if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
+ if (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1)
{
/* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
@@ -10152,41 +9857,28 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!vec_stmt) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
- if (slp_node)
- {
- /* We eventually need to set a vector type on invariant
- arguments. */
- unsigned j;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
- if (!vect_maybe_update_slp_op_vectype
- (child, SLP_TREE_VECTYPE (slp_node)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "incompatible vector types for "
- "invariants\n");
- return false;
- }
- /* loop cost for vec_loop. */
- inside_cost
- = record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
- vector_stmt, stmt_info, 0, vect_body);
- /* prologue cost for vec_init (if not nested) and step. */
- prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
- scalar_to_vec,
- stmt_info, 0, vect_prologue);
- }
- else /* if (!slp_node) */
- {
- /* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
- /* prologue cost for vec_init and vec_step. */
- prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
- }
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ /* loop cost for vec_loop. */
+ inside_cost = record_stmt_cost (cost_vec,
+ SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, 0, vect_body);
+ /* prologue cost for vec_init (if not nested) and step. */
+ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+ scalar_to_vec,
+ stmt_info, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
@@ -10217,670 +9909,374 @@ vectorizable_induction (loop_vec_info loop_vinfo,
with group size 3 we need
[i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
- if (slp_node)
+ gimple_stmt_iterator incr_si;
+ bool insert_after;
+ standard_iv_increment_position (iv_loop, &incr_si, &insert_after);
+
+ /* The initial values are vectorized, but any lanes > group_size
+ need adjustment. */
+ slp_tree init_node
+ = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
+
+ /* Gather steps. Since we do not vectorize inductions as
+ cycles we have to reconstruct the step from SCEV data. */
+ unsigned group_size = SLP_TREE_LANES (slp_node);
+ tree *steps = XALLOCAVEC (tree, group_size);
+ tree *inits = XALLOCAVEC (tree, group_size);
+ stmt_vec_info phi_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
+ {
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ if (!init_node)
+ inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+ pe->dest_idx);
+ }
+
+ /* Now generate the IVs. */
+ unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ gcc_assert (multiple_p (nunits * nvects, group_size));
+ unsigned nivs;
+ unsigned HOST_WIDE_INT const_nunits;
+ if (nested_in_vect_loop)
+ nivs = nvects;
+ else if (nunits.is_constant (&const_nunits))
{
- gimple_stmt_iterator incr_si;
- bool insert_after;
- standard_iv_increment_position (iv_loop, &incr_si, &insert_after);
-
- /* The initial values are vectorized, but any lanes > group_size
- need adjustment. */
- slp_tree init_node
- = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
-
- /* Gather steps. Since we do not vectorize inductions as
- cycles we have to reconstruct the step from SCEV data. */
- unsigned group_size = SLP_TREE_LANES (slp_node);
- tree *steps = XALLOCAVEC (tree, group_size);
- tree *inits = XALLOCAVEC (tree, group_size);
- stmt_vec_info phi_info;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
- {
- steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
- if (!init_node)
- inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
- pe->dest_idx);
- }
-
- /* Now generate the IVs. */
- unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (multiple_p (nunits * nvects, group_size));
- unsigned nivs;
- unsigned HOST_WIDE_INT const_nunits;
- if (nested_in_vect_loop)
- nivs = nvects;
- else if (nunits.is_constant (&const_nunits))
- {
- /* Compute the number of distinct IVs we need. First reduce
- group_size if it is a multiple of const_nunits so we get
- one IV for a group_size of 4 but const_nunits 2. */
- unsigned group_sizep = group_size;
- if (group_sizep % const_nunits == 0)
- group_sizep = group_sizep / const_nunits;
- nivs = least_common_multiple (group_sizep,
- const_nunits) / const_nunits;
- }
- else
- {
- gcc_assert (SLP_TREE_LANES (slp_node) == 1);
- nivs = 1;
- }
- gimple_seq init_stmts = NULL;
- tree lupdate_mul = NULL_TREE;
- if (!nested_in_vect_loop)
+ /* Compute the number of distinct IVs we need. First reduce
+ group_size if it is a multiple of const_nunits so we get
+ one IV for a group_size of 4 but const_nunits 2. */
+ unsigned group_sizep = group_size;
+ if (group_sizep % const_nunits == 0)
+ group_sizep = group_sizep / const_nunits;
+ nivs = least_common_multiple (group_sizep, const_nunits) / const_nunits;
+ }
+ else
+ {
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ nivs = 1;
+ }
+ gimple_seq init_stmts = NULL;
+ tree lupdate_mul = NULL_TREE;
+ if (!nested_in_vect_loop)
+ {
+ if (nunits.is_constant (&const_nunits))
{
- if (nunits.is_constant (&const_nunits))
- {
- /* The number of iterations covered in one vector iteration. */
- unsigned lup_mul = (nvects * const_nunits) / group_size;
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, lup_mul,
- UNSIGNED)
- : build_int_cstu (stept, lup_mul));
- }
- else
- {
- if (SCALAR_FLOAT_TYPE_P (stept))
- {
- tree tem = build_int_cst (integer_type_node, vf);
- lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
- stept, tem);
- }
- else
- lupdate_mul = build_int_cst (stept, vf);
- lupdate_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- lupdate_mul);
- }
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
}
- tree peel_mul = NULL_TREE;
- if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ else
{
if (SCALAR_FLOAT_TYPE_P (stept))
- peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
- LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
- else
- peel_mul = gimple_convert (&init_stmts, stept,
- LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
- peel_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype, peel_mul);
-
- /* If early break then we have to create a new PHI which we can use as
- an offset to adjust the induction reduction in early exits.
-
- This is because when peeling for alignment using masking, the first
- few elements of the vector can be inactive. As such if we find the
- entry in the first iteration we have adjust the starting point of
- the scalar code.
-
- We do this by creating a new scalar PHI that keeps track of whether
- we are the first iteration of the loop (with the additional masking)
- or whether we have taken a loop iteration already.
-
- The generated sequence:
-
- pre-header:
- bb1:
- i_1 = <number of leading inactive elements>
-
- header:
- bb2:
- i_2 = PHI <i_1(bb1), 0(latch)>
- …
-
- early-exit:
- bb3:
- i_3 = iv_step * i_2 + PHI<vector-iv>
-
- The first part of the adjustment to create i_1 and i_2 are done here
- and the last part creating i_3 is done in
- vectorizable_live_operations when the induction extraction is
- materialized. */
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
{
- auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- tree ty_skip_niters = TREE_TYPE (skip_niters);
- tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
- vect_scalar_var,
- "pfa_iv_offset");
- gphi *nphi = create_phi_node (break_lhs_phi, bb);
- add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
- add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
- loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
-
- LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)
- = PHI_RESULT (nphi);
+ tree tem = build_int_cst (integer_type_node, vf);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
}
+ else
+ lupdate_mul = build_int_cst (stept, vf);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ lupdate_mul);
}
- tree step_mul = NULL_TREE;
- unsigned ivn;
- auto_vec<tree> vec_steps;
- for (ivn = 0; ivn < nivs; ++ivn)
+ }
+ tree peel_mul = NULL_TREE;
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ else
+ peel_mul = gimple_convert (&init_stmts, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ peel_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, peel_mul);
+
+ /* If early break then we have to create a new PHI which we can use as
+ an offset to adjust the induction reduction in early exits.
+
+ This is because when peeling for alignment using masking, the first
+ few elements of the vector can be inactive. As such if we find the
+ entry in the first iteration we have adjust the starting point of
+ the scalar code.
+
+ We do this by creating a new scalar PHI that keeps track of whether
+ we are the first iteration of the loop (with the additional masking)
+ or whether we have taken a loop iteration already.
+
+ The generated sequence:
+
+ pre-header:
+ bb1:
+ i_1 = <number of leading inactive elements>
+
+ header:
+ bb2:
+ i_2 = PHI <i_1(bb1), 0(latch)>
+ …
+
+ early-exit:
+ bb3:
+ i_3 = iv_step * i_2 + PHI<vector-iv>
+
+ The first part of the adjustment to create i_1 and i_2 are done here
+ and the last part creating i_3 is done in
+ vectorizable_live_operations when the induction extraction is
+ materialized. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
+ {
+ auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ tree ty_skip_niters = TREE_TYPE (skip_niters);
+ tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
+ vect_scalar_var,
+ "pfa_iv_offset");
+ gphi *nphi = create_phi_node (break_lhs_phi, bb);
+ add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
+ add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
+ loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
+
+ LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi);
+ }
+ }
+ tree step_mul = NULL_TREE;
+ unsigned ivn;
+ auto_vec<tree> vec_steps;
+ for (ivn = 0; ivn < nivs; ++ivn)
+ {
+ gimple_seq stmts = NULL;
+ bool invariant = true;
+ if (nunits.is_constant (&const_nunits))
{
- gimple_seq stmts = NULL;
- bool invariant = true;
- if (nunits.is_constant (&const_nunits))
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
+ for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
- tree_vector_builder step_elts (step_vectype, const_nunits, 1);
- tree_vector_builder init_elts (vectype, const_nunits, 1);
- tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
- for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
- {
- /* The scalar steps of the IVs. */
- tree elt = steps[(ivn*const_nunits + eltn) % group_size];
- elt = gimple_convert (&init_stmts,
- TREE_TYPE (step_vectype), elt);
- step_elts.quick_push (elt);
- if (!init_node)
- {
- /* The scalar inits of the IVs if not vectorized. */
- elt = inits[(ivn*const_nunits + eltn) % group_size];
- if (!useless_type_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (elt)))
- elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- TREE_TYPE (vectype), elt);
- init_elts.quick_push (elt);
- }
- /* The number of steps to add to the initial values. */
- unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
- mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, mul_elt,
- UNSIGNED)
- : build_int_cstu (stept, mul_elt));
- }
- vec_step = gimple_build_vector (&init_stmts, &step_elts);
- step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ /* The scalar steps of the IVs. */
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+ elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
+ step_elts.quick_push (elt);
if (!init_node)
- vec_init = gimple_build_vector (&init_stmts, &init_elts);
- }
- else
- {
- if (init_node)
- ;
- else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
- {
- new_name = gimple_convert (&init_stmts, stept, inits[0]);
- /* Build the initial value directly as a VEC_SERIES_EXPR. */
- vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
- step_vectype, new_name, steps[0]);
- if (!useless_type_conversion_p (vectype, step_vectype))
- vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- vectype, vec_init);
- }
- else
- {
- /* Build:
- [base, base, base, ...]
- + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
- gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
- gcc_assert (flag_associative_math);
- gcc_assert (index_vectype != NULL_TREE);
-
- tree index = build_index_vector (index_vectype, 0, 1);
- new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
- inits[0]);
- tree base_vec = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- new_name);
- tree step_vec = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- steps[0]);
- vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
- step_vectype, index);
- vec_init = gimple_build (&init_stmts, MULT_EXPR,
- step_vectype, vec_init, step_vec);
- vec_init = gimple_build (&init_stmts, PLUS_EXPR,
- step_vectype, vec_init, base_vec);
- if (!useless_type_conversion_p (vectype, step_vectype))
- vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- vectype, vec_init);
- }
- /* iv_loop is nested in the loop to be vectorized. Generate:
- vec_step = [S, S, S, S] */
- t = unshare_expr (steps[0]);
- gcc_assert (CONSTANT_CLASS_P (t)
- || TREE_CODE (t) == SSA_NAME);
- vec_step = gimple_build_vector_from_val (&init_stmts,
- step_vectype, t);
- }
- vec_steps.safe_push (vec_step);
- if (peel_mul)
- {
- if (!step_mul)
- step_mul = peel_mul;
- else
- step_mul = gimple_build (&init_stmts,
- MINUS_EXPR, step_vectype,
- step_mul, peel_mul);
- }
-
- /* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
- "vec_iv_");
- induction_phi = create_phi_node (vec_dest, iv_loop->header);
- induc_def = PHI_RESULT (induction_phi);
-
- /* Create the iv update inside the loop */
- tree up = vec_step;
- if (lupdate_mul)
- {
- if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
{
- /* When we're using loop_len produced by SELEC_VL, the
- non-final iterations are not always processing VF
- elements. So vectorize induction variable instead of
-
- _21 = vect_vec_iv_.6_22 + { VF, ... };
-
- We should generate:
-
- _35 = .SELECT_VL (ivtmp_33, VF);
- vect_cst__22 = [vec_duplicate_expr] _35;
- _21 = vect_vec_iv_.6_22 + vect_cst__22; */
- vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
- tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
- vectype, 0, 0);
- if (SCALAR_FLOAT_TYPE_P (stept))
- expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
- else
- expr = gimple_convert (&stmts, stept, len);
- lupdate_mul = gimple_build_vector_from_val (&stmts,
- step_vectype,
- expr);
- up = gimple_build (&stmts, MULT_EXPR,
- step_vectype, vec_step, lupdate_mul);
+ /* The scalar inits of the IVs if not vectorized. */
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (elt)))
+ elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ TREE_TYPE (vectype), elt);
+ init_elts.quick_push (elt);
}
- else
- up = gimple_build (&init_stmts,
- MULT_EXPR, step_vectype,
- vec_step, lupdate_mul);
- }
- vec_def = gimple_convert (&stmts, step_vectype, induc_def);
- vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, up);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
- insert_iv_increment (&incr_si, insert_after, stmts);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
-
- if (init_node)
- vec_init = vect_get_slp_vect_def (init_node, ivn);
- if (!nested_in_vect_loop
- && step_mul
- && !integer_zerop (step_mul))
- {
- gcc_assert (invariant);
- vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
- up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_step, step_mul);
- vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
- vec_def, up);
- vec_init = gimple_convert (&init_stmts, vectype, vec_def);
- }
-
- /* Set the arguments of the phi node: */
- add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
-
- slp_node->push_vec_def (induction_phi);
- }
- if (!nested_in_vect_loop)
- {
- /* Fill up to the number of vectors we need for the whole group. */
- if (nunits.is_constant (&const_nunits))
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
- else
- nivs = 1;
- vec_steps.reserve (nivs-ivn);
- for (; ivn < nivs; ++ivn)
- {
- slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]);
- vec_steps.quick_push (vec_steps[0]);
+ /* The number of steps to add to the initial values. */
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, mul_elt,
+ UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
}
+ vec_step = gimple_build_vector (&init_stmts, &step_elts);
+ step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ if (!init_node)
+ vec_init = gimple_build_vector (&init_stmts, &init_elts);
}
-
- /* Re-use IVs when we can. We are generating further vector
- stmts by adding VF' * stride to the IVs generated above. */
- if (ivn < nvects)
+ else
{
- if (nunits.is_constant (&const_nunits))
+ if (init_node)
+ ;
+ else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
{
- unsigned vfp = (least_common_multiple (group_size, const_nunits)
- / group_size);
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept,
- vfp, UNSIGNED)
- : build_int_cstu (stept, vfp));
+ new_name = gimple_convert (&init_stmts, stept, inits[0]);
+ /* Build the initial value directly as a VEC_SERIES_EXPR. */
+ vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
+ step_vectype, new_name, steps[0]);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
}
else
{
- if (SCALAR_FLOAT_TYPE_P (stept))
- {
- tree tem = build_int_cst (integer_type_node, nunits);
- lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
- stept, tem);
- }
- else
- lupdate_mul = build_int_cst (stept, nunits);
- lupdate_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- lupdate_mul);
- }
- for (; ivn < nvects; ++ivn)
- {
- gimple *iv
- = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
- tree def = gimple_get_lhs (iv);
- if (ivn < 2*nivs)
- vec_steps[ivn - nivs]
- = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_steps[ivn - nivs], lupdate_mul);
- gimple_seq stmts = NULL;
- def = gimple_convert (&stmts, step_vectype, def);
- def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
- def, vec_steps[ivn % nivs]);
- def = gimple_convert (&stmts, vectype, def);
- if (gimple_code (iv) == GIMPLE_PHI)
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- else
- {
- gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
- gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
- }
- slp_node->push_vec_def (def);
+ /* Build:
+ [base, base, base, ...]
+ + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
+ gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
+ gcc_assert (flag_associative_math);
+ gcc_assert (index_vectype != NULL_TREE);
+
+ tree index = build_index_vector (index_vectype, 0, 1);
+ new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
+ inits[0]);
+ tree base_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ new_name);
+ tree step_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ steps[0]);
+ vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
+ step_vectype, index);
+ vec_init = gimple_build (&init_stmts, MULT_EXPR,
+ step_vectype, vec_init, step_vec);
+ vec_init = gimple_build (&init_stmts, PLUS_EXPR,
+ step_vectype, vec_init, base_vec);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
}
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ t = unshare_expr (steps[0]);
+ gcc_assert (CONSTANT_CLASS_P (t)
+ || TREE_CODE (t) == SSA_NAME);
+ vec_step = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, t);
+ }
+ vec_steps.safe_push (vec_step);
+ if (peel_mul)
+ {
+ if (!step_mul)
+ step_mul = peel_mul;
+ else
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ step_mul, peel_mul);
}
- new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
- gcc_assert (!new_bb);
-
- return true;
- }
-
- tree init_expr = vect_phi_initial_value (phi);
-
- gimple_seq stmts = NULL;
- if (!nested_in_vect_loop)
- {
- /* Convert the initial value to the IV update type. */
- tree new_type = TREE_TYPE (step_expr);
- init_expr = gimple_convert (&stmts, new_type, init_expr);
+ /* Create the induction-phi that defines the induction-operand. */
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+ "vec_iv_");
+ induction_phi = create_phi_node (vec_dest, iv_loop->header);
+ induc_def = PHI_RESULT (induction_phi);
- /* If we are using the loop mask to "peel" for alignment then we need
- to adjust the start value here. */
- tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- if (skip_niters != NULL_TREE)
+ /* Create the iv update inside the loop */
+ tree up = vec_step;
+ if (lupdate_mul)
{
- if (FLOAT_TYPE_P (vectype))
- skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
- skip_niters);
- else
- skip_niters = gimple_convert (&stmts, new_type, skip_niters);
- tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
- skip_niters, step_expr);
- init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
- init_expr, skip_step);
- }
- }
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the
+ non-final iterations are not always processing VF
+ elements. So vectorize induction variable instead of
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
- /* Create the vector that holds the initial_value of the induction. */
- if (nested_in_vect_loop)
- {
- /* iv_loop is nested in the loop to be vectorized. init_expr had already
- been created during vectorization of previous stmts. We obtain it
- from the STMT_VINFO_VEC_STMT of the defining stmt. */
- auto_vec<tree> vec_inits;
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
- init_expr, &vec_inits);
- vec_init = vec_inits[0];
- /* If the initial value is not of proper type, convert it. */
- if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
- {
- new_stmt
- = gimple_build_assign (vect_get_new_ssa_name (vectype,
- vect_simple_var,
- "vec_iv_"),
- VIEW_CONVERT_EXPR,
- build1 (VIEW_CONVERT_EXPR, vectype,
- vec_init));
- vec_init = gimple_assign_lhs (new_stmt);
- new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
- new_stmt);
- gcc_assert (!new_bb);
- }
- }
- else
- {
- /* iv_loop is the loop to be vectorized. Create:
- vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
- stmts = NULL;
- new_name = gimple_convert (&stmts, TREE_TYPE (step_expr), init_expr);
+ We should generate:
- unsigned HOST_WIDE_INT const_nunits;
- if (nunits.is_constant (&const_nunits))
- {
- tree_vector_builder elts (step_vectype, const_nunits, 1);
- elts.quick_push (new_name);
- for (i = 1; i < const_nunits; i++)
- {
- /* Create: new_name_i = new_name + step_expr */
- new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
- new_name, step_expr);
- elts.quick_push (new_name);
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+ vectype, 0, 0);
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
+ else
+ expr = gimple_convert (&stmts, stept, len);
+ lupdate_mul = gimple_build_vector_from_val (&stmts, step_vectype,
+ expr);
+ up = gimple_build (&stmts, MULT_EXPR,
+ step_vectype, vec_step, lupdate_mul);
}
- /* Create a vector from [new_name_0, new_name_1, ...,
- new_name_nunits-1] */
- vec_init = gimple_build_vector (&stmts, &elts);
- }
- else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
- /* Build the initial value directly from a VEC_SERIES_EXPR. */
- vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, step_vectype,
- new_name, step_expr);
- else
- {
- /* Build:
- [base, base, base, ...]
- + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
- gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
- gcc_assert (flag_associative_math);
- gcc_assert (index_vectype != NULL_TREE);
-
- tree index = build_index_vector (index_vectype, 0, 1);
- tree base_vec = gimple_build_vector_from_val (&stmts, step_vectype,
- new_name);
- tree step_vec = gimple_build_vector_from_val (&stmts, step_vectype,
- step_expr);
- vec_init = gimple_build (&stmts, FLOAT_EXPR, step_vectype, index);
- vec_init = gimple_build (&stmts, MULT_EXPR, step_vectype,
- vec_init, step_vec);
- vec_init = gimple_build (&stmts, PLUS_EXPR, step_vectype,
- vec_init, base_vec);
- }
- vec_init = gimple_convert (&stmts, vectype, vec_init);
+ else
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
+ }
+ vec_def = gimple_convert (&stmts, step_vectype, induc_def);
+ vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up);
+ vec_def = gimple_convert (&stmts, vectype, vec_def);
+ insert_iv_increment (&incr_si, insert_after, stmts);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
- if (stmts)
+ if (init_node)
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (!nested_in_vect_loop
+ && step_mul
+ && !integer_zerop (step_mul))
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
+ gcc_assert (invariant);
+ vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, step_mul);
+ vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ vec_def, up);
+ vec_init = gimple_convert (&init_stmts, vectype, vec_def);
}
- }
+ /* Set the arguments of the phi node: */
+ add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- /* Create the vector that holds the step of the induction. */
- gimple_stmt_iterator *step_iv_si = NULL;
- if (nested_in_vect_loop)
- /* iv_loop is nested in the loop to be vectorized. Generate:
- vec_step = [S, S, S, S] */
- new_name = step_expr;
- else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
- {
- /* When we're using loop_len produced by SELEC_VL, the non-final
- iterations are not always processing VF elements. So vectorize
- induction variable instead of
-
- _21 = vect_vec_iv_.6_22 + { VF, ... };
-
- We should generate:
-
- _35 = .SELECT_VL (ivtmp_33, VF);
- vect_cst__22 = [vec_duplicate_expr] _35;
- _21 = vect_vec_iv_.6_22 + vect_cst__22; */
- gcc_assert (!slp_node);
- gimple_seq seq = NULL;
- vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
- tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
- expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
- unshare_expr (len)),
- &seq, true, NULL_TREE);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr,
- step_expr);
- gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
- step_iv_si = &si;
+ slp_node->push_vec_def (induction_phi);
}
- else
+ if (!nested_in_vect_loop)
{
- /* iv_loop is the loop to be vectorized. Generate:
- vec_step = [VF*S, VF*S, VF*S, VF*S] */
- gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vf);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
- }
+ /* Fill up to the number of vectors we need for the whole group. */
+ if (nunits.is_constant (&const_nunits))
+ nivs = least_common_multiple (group_size, const_nunits) / const_nunits;
else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (seq)
+ nivs = 1;
+ vec_steps.reserve (nivs-ivn);
+ for (; ivn < nivs; ++ivn)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]);
+ vec_steps.quick_push (vec_steps[0]);
}
}
- t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name)
- || TREE_CODE (new_name) == SSA_NAME);
- new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, step_iv_si);
-
-
- /* Create the following def-use cycle:
- loop prolog:
- vec_init = ...
- vec_step = ...
- loop:
- vec_iv = PHI <vec_init, vec_loop>
- ...
- STMT
- ...
- vec_loop = vec_iv + vec_step; */
-
- /* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
- induction_phi = create_phi_node (vec_dest, iv_loop->header);
- induc_def = PHI_RESULT (induction_phi);
-
- /* Create the iv update inside the loop */
- stmts = NULL;
- vec_def = gimple_convert (&stmts, step_vectype, induc_def);
- vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, vec_step);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- new_stmt = SSA_NAME_DEF_STMT (vec_def);
-
- /* Set the arguments of the phi node: */
- add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
-
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
- *vec_stmt = induction_phi;
-
- /* In case that vectorization factor (VF) is bigger than the number
- of elements that we can fit in a vectype (nunits), we have to generate
- more than one vector stmt - i.e - we need to "unroll" the
- vector stmt by a factor VF/nunits. For more details see documentation
- in vectorizable_operation. */
-
- if (ncopies > 1)
+ /* Re-use IVs when we can. We are generating further vector
+ stmts by adding VF' * stride to the IVs generated above. */
+ if (ivn < nvects)
{
- gimple_seq seq = NULL;
- /* FORNOW. This restriction should be relaxed. */
- gcc_assert (!nested_in_vect_loop);
- /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */
- gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-
- /* Create the vector that holds the step of the induction. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ if (nunits.is_constant (&const_nunits))
{
- expr = build_int_cst (integer_type_node, nunits);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ unsigned vfp = (least_common_multiple (group_size, const_nunits)
+ / group_size);
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
}
else
- expr = build_int_cst (TREE_TYPE (step_expr), nunits);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
- }
-
- t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name)
- || TREE_CODE (new_name) == SSA_NAME);
- new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
- vec_def = induc_def;
- for (i = 1; i < ncopies + 1; i++)
- {
- /* vec_i = vec_prev + vec_step */
- gimple_seq stmts = NULL;
- vec_def = gimple_convert (&stmts, step_vectype, vec_def);
- vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, vec_step);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
-
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- if (i < ncopies)
+ if (SCALAR_FLOAT_TYPE_P (stept))
{
- new_stmt = SSA_NAME_DEF_STMT (vec_def);
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ tree tem = build_int_cst (integer_type_node, nunits);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
}
else
+ lupdate_mul = build_int_cst (stept, nunits);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ lupdate_mul);
+ }
+ for (; ivn < nvects; ++ivn)
+ {
+ gimple *iv
+ = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
+ tree def = gimple_get_lhs (iv);
+ if (ivn < 2*nivs)
+ vec_steps[ivn - nivs]
+ = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_steps[ivn - nivs], lupdate_mul);
+ gimple_seq stmts = NULL;
+ def = gimple_convert (&stmts, step_vectype, def);
+ def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ def, vec_steps[ivn % nivs]);
+ def = gimple_convert (&stmts, vectype, def);
+ if (gimple_code (iv) == GIMPLE_PHI)
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ else
{
- /* vec_1 = vec_iv + (VF/n * S)
- vec_2 = vec_1 + (VF/n * S)
- ...
- vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop
-
- vec_n is used as vec_loop to save the large step register and
- related operations. */
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
+ gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
+ gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
}
+ slp_node->push_vec_def (def);
}
}
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "transform induction: created def-use cycle: %G%G",
- (gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
+ gcc_assert (!new_bb);
return true;
}
@@ -11683,7 +11079,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
factor = exact_div (nunits1, nunits2).to_constant ();
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
gimple_seq seq = NULL;
- loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len,
+ loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
build_int_cst (iv_type, factor));
if (seq)
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
@@ -11743,7 +11139,7 @@ scale_profile_for_vect_loop (class loop *loop, edge exit_e, unsigned vf, bool fl
profile_count entry_count = loop_preheader_edge (loop)->count ();
/* If we have unreliable loop profile avoid dropping entry
- count bellow header count. This can happen since loops
+ count below header count. This can happen since loops
has unrealistically low trip counts. */
while (vf > 1
&& loop->header->count > entry_count
@@ -12373,6 +11769,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
" variable-length vectorization factor\n");
}
+
+ /* When we have unrolled the loop due to a user requested value we should
+ leave it up to the RTL unroll heuristics to determine if it's still worth
+ while to unroll more. */
+ if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+ loop->unroll = 0;
+
/* Free SLP instances here because otherwise stmt reference counting
won't work. */
slp_instance instance;