aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-transform.c
diff options
context:
space:
mode:
authorDorit Nuzman <dorit@il.ibm.com>2007-08-19 09:39:50 +0000
committerDorit Nuzman <dorit@gcc.gnu.org>2007-08-19 09:39:50 +0000
commitd29de1bf2836e4f06f32f7271192d7f92ba9235c (patch)
tree1440005827d5c910ba6597f144fa3292c95f2032 /gcc/tree-vect-transform.c
parent66d229b83597da5a73035cd2e13b7d5dd3a1d3d2 (diff)
downloadgcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.zip
gcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.tar.gz
gcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.tar.bz2
tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info as argument instead of struct loop.
* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info as argument instead of struct loop. (nested_in_vect_loop_p): New function. (vect_relevant): Add enum values vect_used_in_outer_by_reduction and vect_used_in_outer. (is_loop_header_bb_p): New. Used to differentiate loop-header phis from other phis in the loop. (destroy_loop_vec_info): Add additional argument to declaration. * tree-vectorizer.c (supportable_widening_operation): Also check if nested_in_vect_loop_p (don't allow changing the order in this case). (vect_is_simple_reduction): Takes a loop_vec_info as argument instead of struct loop. Call nested_in_vect_loop_p and don't require flag_unsafe_math_optimizations if it returns true. (new_stmt_vec_info): When setting def_type for phis differentiate loop-header phis from other phis. (bb_in_loop_p): New function. (new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just update their loop_vinfo. Order of BB traversal now matters - call dfs_enumerate_from with bb_in_loop_p. (destroy_loop_vec_info): Takes additional argument to control whether stmt_vinfo of the loop stmts should be destroyed as well. (vect_is_simple_reduction): Allow the "non-reduction" use of a reduction stmt to be defines by a non loop-header phi. (vectorize_loops): Call destroy_loop_vec_info with additional argument. * tree-vect-transform.c (vectorizable_reduction): Call nested_in_vect_loop_p. Check for multitypes in the inner-loop. (vectorizable_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_operation): Likewise. (vectorizable_type_promotion): Likewise. (vectorizable_type_demotion): Likewise. (vectorizable_store): Likewise. (vectorizable_live_operation): Likewise. (vectorizable_reduction): Likewise. Also pass loop_info to vect_is_simple_reduction instead of loop. (vect_init_vector): Call nested_in_vect_loop_p. (get_initial_def_for_reduction): Likewise. (vect_create_epilog_for_reduction): Likewise. (vect_init_vector): Check which loop to work with, in case there's an inner-loop. (get_initial_def_for_inducion): Extend to handle outer-loop vectorization. Fix indentation. (vect_get_vec_def_for_operand): Support phis in the case vect_loop_def. In the case vect_induction_def get the vector def from the induction phi node, instead of calling get_initial_def_for_inducion. (get_initial_def_for_reduction): Extend to handle outer-loop vectorization. (vect_create_epilog_for_reduction): Extend to handle outer-loop vectorization. (vect_transform_loop): Change assert to just skip this case. Add a dump printout. (vect_finish_stmt_generation): Add a couple asserts. (vect_estimate_min_profitable_iters): Multiply cost of inner-loop stmts (in outer-loop vectorization) by estimated inner-loop bound. (vect_model_reduction_cost): Don't add reduction epilogue cost in case this is an inner-loop reduction in outer-loop vectorization. * tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function. Same code as what used to be vect_analyze_scalar_cycles, only with additional argument loop, and loop_info passed to vect_is_simple_reduction instead of loop. (vect_analyze_scalar_cycles): Code factored out into vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest. Updated documentation. (analyze_operations): Check for inner-loop loop-closed exit-phis during outer-loop vectorization that are live or not used in the outerloop, cause this requires special handling. (vect_enhance_data_refs_alignment): Don't consider versioning for nested-loops. (vect_analyze_data_refs): Check that there are no datarefs in the inner-loop. (vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer and vect_used_in_outer_by_reduction cases. (process_use): Also consider the case of outer-loop stmt defining an inner-loop stmt and vice versa. (vect_analyze_loop_1): New function. (vect_analyze_loop_form): Extend, to allow a restricted form of nested loops. Call vect_analyze_loop_1. (vect_analyze_loop): Skip (inner-)loops within outer-loops that have been vectorized. Call destroy_loop_vec_info with additional argument. * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow in the inner-loop when doing outer-loop vectorization. Add documentation and printout. (vect_recog_dot_prod_pattern): Likewise. Also add check for GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop). From-SVN: r127623
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r--gcc/tree-vect-transform.c436
1 files changed, 356 insertions, 80 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 16beffc..6e88fa9 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
int byte_misalign;
+ int innerloop_iters, factor;
/* Cost model disabled. */
if (!flag_vect_cost_model)
@@ -152,11 +153,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
TODO: Consider assigning different costs to different scalar
statements. */
+ /* FORNOW. */
+ if (loop->inner)
+ innerloop_iters = 50; /* FIXME */
+
for (i = 0; i < nbbs; i++)
{
block_stmt_iterator si;
basic_block bb = bbs[i];
+ if (bb->loop_father == loop->inner)
+ factor = innerloop_iters;
+ else
+ factor = 1;
+
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt = bsi_stmt (si);
@@ -164,8 +174,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
continue;
- scalar_single_iter_cost += cost_for_stmt (stmt);
- vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
+ scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
+ vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
+ /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
+ some of the "outside" costs are generated inside the outer-loop. */
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
}
}
@@ -1071,6 +1083,9 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)
tree new_temp;
basic_block new_bb;
+ if (nested_in_vect_loop_p (loop, stmt))
+ loop = loop->inner;
+
new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
add_referenced_var (new_var);
@@ -1096,6 +1111,7 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)
/* Function get_initial_def_for_induction
Input:
+ STMT - a stmt that performs an induction operation in the loop.
IV_PHI - the initial value of the induction variable
Output:
@@ -1114,8 +1130,8 @@ get_initial_def_for_induction (tree iv_phi)
tree vectype = get_vectype_for_scalar_type (scalar_type);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
edge pe = loop_preheader_edge (loop);
+ struct loop *iv_loop;
basic_block new_bb;
- block_stmt_iterator bsi;
tree vec, vec_init, vec_step, t;
tree access_fn;
tree new_var;
@@ -1129,8 +1145,13 @@ get_initial_def_for_induction (tree iv_phi)
int ncopies = vf / nunits;
tree expr;
stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
+ bool nested_in_vect_loop = false;
tree stmts;
- tree stmt = NULL_TREE;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ tree exit_phi;
+ edge latch_e;
+ tree loop_arg;
block_stmt_iterator si;
basic_block bb = bb_for_stmt (iv_phi);
@@ -1139,65 +1160,107 @@ get_initial_def_for_induction (tree iv_phi)
/* Find the first insertion point in the BB. */
si = bsi_after_labels (bb);
- stmt = bsi_stmt (si);
- access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
+ if (INTEGRAL_TYPE_P (scalar_type))
+ step_expr = build_int_cst (scalar_type, 0);
+ else
+ step_expr = build_real (scalar_type, dconst0);
+
+ /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
+ if (nested_in_vect_loop_p (loop, iv_phi))
+ {
+ nested_in_vect_loop = true;
+ iv_loop = loop->inner;
+ }
+ else
+ iv_loop = loop;
+ gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
+
+ latch_e = loop_latch_edge (iv_loop);
+ loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
+
+ access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
gcc_assert (access_fn);
- ok = vect_is_simple_iv_evolution (loop->num, access_fn,
- &init_expr, &step_expr);
+ ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
+ &init_expr, &step_expr);
gcc_assert (ok);
+ pe = loop_preheader_edge (iv_loop);
/* Create the vector that holds the initial_value of the induction. */
- new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
- add_referenced_var (new_var);
-
- new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
- if (stmts)
+ if (nested_in_vect_loop)
{
- new_bb = bsi_insert_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
+ /* iv_loop is nested in the loop to be vectorized. init_expr had already
+ been created during vectorization of previous stmts; We obtain it from
+ the STMT_VINFO_VEC_STMT of the defining stmt. */
+ tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
+ vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
}
-
- t = NULL_TREE;
- t = tree_cons (NULL_TREE, new_name, t);
- for (i = 1; i < nunits; i++)
+ else
{
- tree tmp;
+ /* iv_loop is the loop to be vectorized. Create:
+ vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
+ new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
+ add_referenced_var (new_var);
- /* Create: new_name = new_name + step_expr */
- tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
- init_stmt = build_gimple_modify_stmt (new_var, tmp);
- new_name = make_ssa_name (new_var, init_stmt);
- GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
+ new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
+ if (stmts)
+ {
+ new_bb = bsi_insert_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
- new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
- gcc_assert (!new_bb);
+ t = NULL_TREE;
+ t = tree_cons (NULL_TREE, init_expr, t);
+ for (i = 1; i < nunits; i++)
+ {
+ tree tmp;
- if (vect_print_dump_info (REPORT_DETAILS))
- {
- fprintf (vect_dump, "created new init_stmt: ");
- print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
- }
- t = tree_cons (NULL_TREE, new_name, t);
+ /* Create: new_name_i = new_name + step_expr */
+ tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
+ init_stmt = build_gimple_modify_stmt (new_var, tmp);
+ new_name = make_ssa_name (new_var, init_stmt);
+ GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
+
+ new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
+ gcc_assert (!new_bb);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "created new init_stmt: ");
+ print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
+ }
+ t = tree_cons (NULL_TREE, new_name, t);
+ }
+ /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
+ vec = build_constructor_from_list (vectype, nreverse (t));
+ vec_init = vect_init_vector (iv_phi, vec, vectype);
}
- vec = build_constructor_from_list (vectype, nreverse (t));
- vec_init = vect_init_vector (stmt, vec, vectype);
/* Create the vector that holds the step of the induction. */
- expr = build_int_cst (scalar_type, vf);
- new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
+ if (nested_in_vect_loop)
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ new_name = step_expr;
+ else
+ {
+ /* iv_loop is the loop to be vectorized. Generate:
+ vec_step = [VF*S, VF*S, VF*S, VF*S] */
+ expr = build_int_cst (scalar_type, vf);
+ new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
+ }
+
t = NULL_TREE;
for (i = 0; i < nunits; i++)
t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
vec = build_constructor_from_list (vectype, t);
- vec_step = vect_init_vector (stmt, vec, vectype);
+ vec_step = vect_init_vector (iv_phi, vec, vectype);
/* Create the following def-use cycle:
loop prolog:
- vec_init = [X, X+S, X+2*S, X+3*S]
- vec_step = [VF*S, VF*S, VF*S, VF*S]
+ vec_init = ...
+ vec_step = ...
loop:
vec_iv = PHI <vec_init, vec_loop>
...
@@ -1208,7 +1271,7 @@ get_initial_def_for_induction (tree iv_phi)
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
add_referenced_var (vec_dest);
- induction_phi = create_phi_node (vec_dest, loop->header);
+ induction_phi = create_phi_node (vec_dest, iv_loop->header);
set_stmt_info (get_stmt_ann (induction_phi),
new_stmt_vec_info (induction_phi, loop_vinfo));
induc_def = PHI_RESULT (induction_phi);
@@ -1219,15 +1282,16 @@ get_initial_def_for_induction (tree iv_phi)
induc_def, vec_step));
vec_def = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
- bsi = bsi_for_stmt (stmt);
- vect_finish_stmt_generation (stmt, new_stmt, &bsi);
+ bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
+ set_stmt_info (get_stmt_ann (new_stmt),
+ new_stmt_vec_info (new_stmt, loop_vinfo));
/* Set the arguments of the phi node: */
- add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
+ add_phi_arg (induction_phi, vec_init, pe);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
- /* In case the vectorization factor (VF) is bigger than the number
+ /* In case that vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. For more details see documentation
@@ -1236,6 +1300,8 @@ get_initial_def_for_induction (tree iv_phi)
if (ncopies > 1)
{
stmt_vec_info prev_stmt_vinfo;
+ /* FORNOW. This restriction should be relaxed. */
+ gcc_assert (!nested_in_vect_loop);
/* Create the vector that holds the step of the induction. */
expr = build_int_cst (scalar_type, nunits);
@@ -1244,7 +1310,7 @@ get_initial_def_for_induction (tree iv_phi)
for (i = 0; i < nunits; i++)
t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
vec = build_constructor_from_list (vectype, t);
- vec_step = vect_init_vector (stmt, vec, vectype);
+ vec_step = vect_init_vector (iv_phi, vec, vectype);
vec_def = induc_def;
prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
@@ -1252,19 +1318,50 @@ get_initial_def_for_induction (tree iv_phi)
{
tree tmp;
- /* vec_i = vec_prev + vec_{step*nunits} */
+ /* vec_i = vec_prev + vec_step */
tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
vec_def = make_ssa_name (vec_dest, new_stmt);
GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
- bsi = bsi_for_stmt (stmt);
- vect_finish_stmt_generation (stmt, new_stmt, &bsi);
-
+ bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
+ set_stmt_info (get_stmt_ann (new_stmt),
+ new_stmt_vec_info (new_stmt, loop_vinfo));
STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
}
}
+ if (nested_in_vect_loop)
+ {
+ /* Find the loop-closed exit-phi of the induction, and record
+ the final vector of induction results: */
+ exit_phi = NULL;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
+ {
+ if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
+ {
+ exit_phi = USE_STMT (use_p);
+ break;
+ }
+ }
+ if (exit_phi)
+ {
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
+ /* FORNOW. Currently not supporting the case that an inner-loop induction
+ is not used in the outer-loop (i.e. only outside the outer-loop). */
+ gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
+ && !STMT_VINFO_LIVE_P (stmt_vinfo));
+
+ STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "vector of inductions after inner-loop:");
+ print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
+ }
+ }
+ }
+
+
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "transform induction: created def-use cycle:");
@@ -1300,7 +1397,6 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vec_inv;
tree vec_cst;
tree t = NULL_TREE;
@@ -1386,14 +1482,20 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
def_stmt_info = vinfo_for_stmt (def_stmt);
vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
gcc_assert (vec_stmt);
- vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
+ if (TREE_CODE (vec_stmt) == PHI_NODE)
+ vec_oprnd = PHI_RESULT (vec_stmt);
+ else
+ vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
return vec_oprnd;
}
/* Case 4: operand is defined by a loop header phi - reduction */
case vect_reduction_def:
{
+ struct loop *loop;
+
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+ loop = (bb_for_stmt (def_stmt))->loop_father;
/* Get the def before the loop */
op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
@@ -1405,8 +1507,12 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
{
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
- /* Get the def before the loop */
- return get_initial_def_for_induction (def_stmt);
+ /* Get the def from the vectorized stmt. */
+ def_stmt_info = vinfo_for_stmt (def_stmt);
+ vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
+ gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
+ vec_oprnd = PHI_RESULT (vec_stmt);
+ return vec_oprnd;
}
default:
@@ -1487,7 +1593,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
gcc_assert (vec_stmt_for_operand);
vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
-
return vec_oprnd;
}
@@ -1503,7 +1608,11 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt,
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ gcc_assert (stmt == bsi_stmt (*bsi));
+ gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
+
bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+
set_stmt_info (get_stmt_ann (vec_stmt),
new_stmt_vec_info (vec_stmt, loop_vinfo));
@@ -1571,6 +1680,8 @@ static tree
get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
@@ -1581,8 +1692,14 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
tree t = NULL_TREE;
int i;
tree vector_type;
+ bool nested_in_vect_loop = false;
gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+ if (nested_in_vect_loop_p (loop, stmt))
+ nested_in_vect_loop = true;
+ else
+ gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
+
vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
switch (code)
@@ -1590,7 +1707,10 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
case PLUS_EXPR:
- *adjustment_def = init_val;
+ if (nested_in_vect_loop)
+ *adjustment_def = vecdef;
+ else
+ *adjustment_def = init_val;
/* Create a vector of zeros for init_def. */
if (INTEGRAL_TYPE_P (type))
def_for_init = build_int_cst (type, 0);
@@ -1679,24 +1799,31 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
tree new_phi;
block_stmt_iterator exit_bsi;
tree vec_dest;
- tree new_temp;
+ tree new_temp = NULL_TREE;
tree new_name;
- tree epilog_stmt;
- tree new_scalar_dest, exit_phi;
+ tree epilog_stmt = NULL_TREE;
+ tree new_scalar_dest, exit_phi, new_dest;
tree bitsize, bitpos, bytesize;
enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
- tree scalar_initial_def;
+ tree adjustment_def;
tree vec_initial_def;
tree orig_name;
imm_use_iterator imm_iter;
use_operand_p use_p;
- bool extract_scalar_result;
- tree reduction_op;
+ bool extract_scalar_result = false;
+ tree reduction_op, expr;
tree orig_stmt;
tree use_stmt;
tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
+ bool nested_in_vect_loop = false;
int op_type;
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ loop = loop->inner;
+ nested_in_vect_loop = true;
+ }
+
op_type = TREE_OPERAND_LENGTH (operation);
reduction_op = TREE_OPERAND (operation, op_type-1);
vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
@@ -1709,7 +1836,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
the scalar def before the loop, that defines the initial value
of the reduction variable. */
vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
- &scalar_initial_def);
+ &adjustment_def);
add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
/* 1.2 set the loop-latch arg for the reduction-phi: */
@@ -1788,6 +1915,15 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type);
+
+ /* In case this is a reduction in an inner-loop while vectorizing an outer
+ loop - we don't need to extract a single scalar result at the end of the
+ inner-loop. The final vector of partial results will be used in the
+ vectorized outer-loop, or reduced to a scalar result at the end of the
+ outer-loop. */
+ if (nested_in_vect_loop)
+ goto vect_finalize_reduction;
+
/* 2.3 Create the reduction code, using one of the three schemes described
above. */
@@ -1934,6 +2070,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
{
tree rhs;
+ gcc_assert (!nested_in_vect_loop);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "extract scalar result");
@@ -1952,25 +2089,42 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
}
- /* 2.4 Adjust the final result by the initial value of the reduction
+vect_finalize_reduction:
+
+ /* 2.5 Adjust the final result by the initial value of the reduction
variable. (When such adjustment is not needed, then
- 'scalar_initial_def' is zero).
+ 'adjustment_def' is zero). For example, if code is PLUS we create:
+ new_temp = loop_exit_def + adjustment_def */
- Create:
- s_out4 = scalar_expr <s_out3, scalar_initial_def> */
-
- if (scalar_initial_def)
+ if (adjustment_def)
{
- tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
- epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
- new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ if (nested_in_vect_loop)
+ {
+ gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
+ expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
+ new_dest = vect_create_destination_var (scalar_dest, vectype);
+ }
+ else
+ {
+ gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
+ expr = build2 (code, scalar_type, new_temp, adjustment_def);
+ new_dest = vect_create_destination_var (scalar_dest, scalar_type);
+ }
+ epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
+ new_temp = make_ssa_name (new_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
+#if 0
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+#else
bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
+#endif
}
- /* 2.6 Replace uses of s_out0 with uses of s_out3 */
- /* Find the loop-closed-use at the loop exit of the original scalar result.
+ /* 2.6 Handle the loop-exit phi */
+
+ /* Replace uses of s_out0 with uses of s_out3:
+ Find the loop-closed-use at the loop exit of the original scalar result.
(The reduction result is expected to have two immediate uses - one at the
latch block, and one at the loop exit). */
exit_phi = NULL;
@@ -1984,6 +2138,29 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,
}
/* We expect to have found an exit_phi because of loop-closed-ssa form. */
gcc_assert (exit_phi);
+
+ if (nested_in_vect_loop)
+ {
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
+
+ /* FORNOW. Currently not supporting the case that an inner-loop reduction
+ is not used in the outer-loop (but only outside the outer-loop). */
+ gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
+ && !STMT_VINFO_LIVE_P (stmt_vinfo));
+
+ epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
+ STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
+ set_stmt_info (get_stmt_ann (epilog_stmt),
+ new_stmt_vec_info (epilog_stmt, loop_vinfo));
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "vector of partial results after inner-loop:");
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+ return;
+ }
+
/* Replace the uses: */
orig_name = PHI_RESULT (exit_phi);
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
@@ -2065,15 +2242,30 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree new_stmt = NULL_TREE;
int j;
+ if (nested_in_vect_loop_p (loop, stmt))
+ {
+ loop = loop->inner;
+ /* FORNOW. This restriction should be relaxed. */
+ if (ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+ }
+
gcc_assert (ncopies >= 1);
/* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop. */
- if (STMT_VINFO_RELEVANT_P (stmt_info))
+ if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
return false;
- if (!STMT_VINFO_LIVE_P (stmt_info))
+ /* Reductions that are not used even in an enclosing outer-loop,
+ are expected to be "live" (used out of the loop). */
+ if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
+ && !STMT_VINFO_LIVE_P (stmt_info))
return false;
/* Make sure it was already recognized as a reduction computation. */
@@ -2130,9 +2322,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (dt == vect_reduction_def);
gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
if (orig_stmt)
- gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
+ gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
else
- gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
+ gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
return false;
@@ -2357,6 +2549,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
int nunits_in;
int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
tree new_stmt;
@@ -2466,6 +2659,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
needs to be generated. */
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
@@ -2480,6 +2681,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+
/* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
@@ -2671,6 +2880,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp;
@@ -2752,6 +2962,14 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
needs to be generated. */
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
{
@@ -3093,6 +3311,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code;
enum machine_mode vec_mode;
tree new_temp;
@@ -3111,6 +3330,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
int j;
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -3373,6 +3599,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK;
tree new_temp;
tree def, def_stmt;
@@ -3425,6 +3652,13 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0)))
@@ -3522,6 +3756,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
int op_type;
@@ -3575,6 +3810,13 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0)))
@@ -3867,6 +4109,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum machine_mode vec_mode;
tree dummy;
enum dr_alignment_support alignment_support_cheme;
@@ -3882,6 +4125,13 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
unsigned int group_size, i;
VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -4517,6 +4767,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
bool strided_load = false;
tree first_stmt;
+ gcc_assert (ncopies >= 1);
+ /* FORNOW. This restriction should be relaxed. */
+ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in nested loop.");
+ return false;
+ }
+
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -4812,6 +5071,7 @@ vectorizable_live_operation (tree stmt,
tree operation;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i;
int op_type;
tree op;
@@ -4829,6 +5089,10 @@ vectorizable_live_operation (tree stmt,
if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
return false;
+ /* FORNOW. CHECKME. */
+ if (nested_in_vect_loop_p (loop, stmt))
+ return false;
+
operation = GIMPLE_STMT_OPERAND (stmt, 1);
op_type = TREE_OPERAND_LENGTH (operation);
@@ -6124,8 +6388,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
fprintf (vect_dump, "------>vectorizing statement: ");
print_generic_expr (vect_dump, stmt, TDF_SLIM);
}
+
stmt_info = vinfo_for_stmt (stmt);
- gcc_assert (stmt_info);
+
+ /* vector stmts created in the outer-loop during vectorization of
+ stmts in an inner-loop may not have a stmt_info, and do not
+ need to be vectorized. */
+ if (!stmt_info)
+ {
+ bsi_next (&si);
+ continue;
+ }
+
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
{
@@ -6197,4 +6471,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
fprintf (vect_dump, "LOOP VECTORIZED.");
+ if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
+ fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
}