aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/tree-vect-loop.c157
-rw-r--r--gcc/tree-vect-slp.c29
-rw-r--r--gcc/tree-vectorizer.h1
4 files changed, 131 insertions, 72 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a387c80..dab3e07 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2015-05-26 Richard Biener <rguenther@suse.de>
+
+ * tree-vect-loop.c (vect_update_vf_for_slp): Split out from ...
+ (vect_analyze_loop_operations): ... here. Remove slp parameter,
+ detect whether we apply SLP. Remove call to
+ vect_update_slp_costs_according_to_vf.
+ (vect_analyze_loop_2): Call vect_update_vf_for_slp and
+ vect_update_slp_costs_according_to_vf from here. Dispatch
+ to vect_slp_analyze_operations to analyze SLP stmts.
+ * tree-vect-slp.c (vect_slp_analyze_node_operations): Drop
+ unused bb_vec_info parameter, adjust assert.
+ (vect_slp_analyze_operations): Pass in the slp instance tree
+ instead of bb_vec_info.
+ (vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations.
+ * tree-vectorizer.h (vect_slp_analyze_operations): Declare.
+
2015-05-25 Alexander Monakov <amonakov@ispras.ru>
* config/i386/i386.h (enum reg_class): Move CLOBBERED_REGS prior to
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 2c983b8..89202c4 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1355,25 +1355,85 @@ vect_analyze_loop_form (struct loop *loop)
return loop_vinfo;
}
+/* Scan the loop stmts and dependent on whether there are any (non-)SLP
+ statements update the vectorization factor. */
+
+static void
+vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+ unsigned int vectorization_factor;
+ int i;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_update_vf_for_slp ===\n");
+
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ gcc_assert (vectorization_factor != 0);
+
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ bool only_slp_in_loop = true;
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+ gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && STMT_VINFO_RELATED_STMT (stmt_info))
+ {
+ stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ stmt_info = vinfo_for_stmt (stmt);
+ }
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor
+ = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Updating vectorization factor to %d\n",
+ vectorization_factor);
+}
/* Function vect_analyze_loop_operations.
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- unsigned int vectorization_factor = 0;
+ unsigned int vectorization_factor;
int i;
stmt_vec_info stmt_info;
bool need_to_vectorize = false;
int min_profitable_iters;
int min_scalar_loop_bound;
unsigned int th;
- bool only_slp_in_loop = true, ok;
+ bool ok;
HOST_WIDE_INT max_niter;
HOST_WIDE_INT estimated_niter;
int min_profitable_estimate;
@@ -1382,50 +1442,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_analyze_loop_operations ===\n");
- gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
- vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- if (slp)
- {
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by
- the SLP instances. If that unrolling factor is 1, we say, that we
- perform pure SLP on loop - cross iteration parallelism is not
- exploited. */
- for (i = 0; i < nbbs; i++)
- {
- basic_block bb = bbs[i];
- for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
- gsi_next (&si))
- {
- gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- if (STMT_VINFO_IN_PATTERN_P (stmt_info)
- && STMT_VINFO_RELATED_STMT (stmt_info))
- {
- stmt = STMT_VINFO_RELATED_STMT (stmt_info);
- stmt_info = vinfo_for_stmt (stmt);
- }
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
- }
- }
-
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to %d\n",
- vectorization_factor);
- }
-
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@@ -1540,6 +1556,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
+ if (STMT_SLP_TYPE (vinfo_for_stmt (stmt)))
+ {
+ need_to_vectorize = true;
+ continue;
+ }
if (!gimple_clobber_p (stmt)
&& !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
@@ -1563,6 +1584,9 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
return false;
}
+ vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ gcc_assert (vectorization_factor != 0);
+
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorization_factor = %d, niters = "
@@ -1586,10 +1610,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
/* Analyze cost. Decide if worth while to vectorize. */
- /* Once VF is set, SLP costs should be updated since the number of created
- vector stmts depends on VF. */
- vect_update_slp_costs_according_to_vf (loop_vinfo);
-
vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
&min_profitable_estimate);
LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
@@ -1664,7 +1684,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, slp = false;
+ bool ok;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
unsigned int th;
@@ -1790,19 +1810,34 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (ok)
{
- /* Decide which possible SLP instances to SLP. */
- slp = vect_make_slp_decision (loop_vinfo);
-
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
+ /* If there are any SLP instances mark them as pure_slp. */
+ if (vect_make_slp_decision (loop_vinfo))
+ {
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+
+ /* Update the vectorization factor based on the SLP decision. */
+ vect_update_vf_for_slp (loop_vinfo);
+
+ /* Once VF is set, SLP costs should be updated since the number of
+ created vector stmts depends on VF. */
+ vect_update_slp_costs_according_to_vf (loop_vinfo);
+
+ /* Analyze operations in the SLP instances. Note this may
+ remove unsupported SLP instances which makes the above
+ SLP kind detection invalid. */
+ unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
+ vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+ if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+ return false;
+ }
}
else
return false;
- /* Scan all the operations in the loop and make sure they are
- vectorizable. */
-
- ok = vect_analyze_loop_operations (loop_vinfo, slp);
+ /* Scan all the remaining operations in the loop that are not subject
+ to SLP and make sure they are vectorizable. */
+ ok = vect_analyze_loop_operations (loop_vinfo);
if (!ok)
{
if (dump_enabled_p ())
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 1c51990..1e68020 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2191,7 +2191,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo)
the subtree. Return TRUE if the operations are supported. */
static bool
-vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+vect_slp_analyze_node_operations (slp_tree node)
{
bool dummy;
int i;
@@ -2202,17 +2202,17 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
return true;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (!vect_slp_analyze_node_operations (bb_vinfo, child))
+ if (!vect_slp_analyze_node_operations (child))
return false;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
- gcc_assert (PURE_SLP_STMT (stmt_info));
+ gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
if (!vect_analyze_stmt (stmt, &dummy, node))
- return false;
+ return false;
}
return true;
@@ -2222,19 +2222,26 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
operations are supported. */
-static bool
-vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+bool
+vect_slp_analyze_operations (vec<slp_instance> slp_instances)
{
- vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
int i;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "=== vect_slp_analyze_operations ===\n");
+
for (i = 0; slp_instances.iterate (i, &instance); )
{
- if (!vect_slp_analyze_node_operations (bb_vinfo,
- SLP_INSTANCE_TREE (instance)))
+ if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
{
- vect_free_slp_instance (instance);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "removing SLP instance operations starting from: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
+ SLP_TREE_SCALAR_STMTS
+ (SLP_INSTANCE_TREE (instance))[0], 0);
+ vect_free_slp_instance (instance);
slp_instances.ordered_remove (i);
}
else
@@ -2498,7 +2505,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
return NULL;
}
- if (!vect_slp_analyze_operations (bb_vinfo))
+ if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index adde2fb..ae795a9 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1114,6 +1114,7 @@ extern void vect_free_slp_instance (slp_instance);
extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
gimple_stmt_iterator *, int,
slp_instance, bool);
+extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);