diff options
author | Dorit Nuzman <dorit@il.ibm.com> | 2007-08-19 09:39:50 +0000 |
---|---|---|
committer | Dorit Nuzman <dorit@gcc.gnu.org> | 2007-08-19 09:39:50 +0000 |
commit | d29de1bf2836e4f06f32f7271192d7f92ba9235c (patch) | |
tree | 1440005827d5c910ba6597f144fa3292c95f2032 /gcc/tree-vect-transform.c | |
parent | 66d229b83597da5a73035cd2e13b7d5dd3a1d3d2 (diff) | |
download | gcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.zip gcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.tar.gz gcc-d29de1bf2836e4f06f32f7271192d7f92ba9235c.tar.bz2 |
tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info as argument instead of struct loop.
* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info
as argument instead of struct loop.
(nested_in_vect_loop_p): New function.
(vect_relevant): Add enum values vect_used_in_outer_by_reduction and
vect_used_in_outer.
(is_loop_header_bb_p): New. Used to differentiate loop-header phis
from other phis in the loop.
(destroy_loop_vec_info): Add additional argument to declaration.
* tree-vectorizer.c (supportable_widening_operation): Also check if
nested_in_vect_loop_p (don't allow changing the order in this case).
(vect_is_simple_reduction): Takes a loop_vec_info as argument instead
of struct loop. Call nested_in_vect_loop_p and don't require
flag_unsafe_math_optimizations if it returns true.
(new_stmt_vec_info): When setting def_type for phis differentiate
loop-header phis from other phis.
(bb_in_loop_p): New function.
(new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just
update their loop_vinfo. Order of BB traversal now matters - call
dfs_enumerate_from with bb_in_loop_p.
(destroy_loop_vec_info): Takes additional argument to control whether
stmt_vinfo of the loop stmts should be destroyed as well.
(vect_is_simple_reduction): Allow the "non-reduction" use of a
reduction stmt to be defines by a non loop-header phi.
(vectorize_loops): Call destroy_loop_vec_info with additional argument.
* tree-vect-transform.c (vectorizable_reduction): Call
nested_in_vect_loop_p. Check for multitypes in the inner-loop.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_type_promotion): Likewise.
(vectorizable_type_demotion): Likewise.
(vectorizable_store): Likewise.
(vectorizable_live_operation): Likewise.
(vectorizable_reduction): Likewise. Also pass loop_info to
vect_is_simple_reduction instead of loop.
(vect_init_vector): Call nested_in_vect_loop_p.
(get_initial_def_for_reduction): Likewise.
(vect_create_epilog_for_reduction): Likewise.
(vect_init_vector): Check which loop to work with, in case there's an
inner-loop.
(get_initial_def_for_inducion): Extend to handle outer-loop
vectorization. Fix indentation.
(vect_get_vec_def_for_operand): Support phis in the case vect_loop_def.
In the case vect_induction_def get the vector def from the induction
phi node, instead of calling get_initial_def_for_inducion.
(get_initial_def_for_reduction): Extend to handle outer-loop
vectorization.
(vect_create_epilog_for_reduction): Extend to handle outer-loop
vectorization.
(vect_transform_loop): Change assert to just skip this case. Add a
dump printout.
(vect_finish_stmt_generation): Add a couple asserts.
(vect_estimate_min_profitable_iters): Multiply
cost of inner-loop stmts (in outer-loop vectorization) by estimated
inner-loop bound.
(vect_model_reduction_cost): Don't add reduction epilogue cost in case
this is an inner-loop reduction in outer-loop vectorization.
* tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function.
Same code as what used to be vect_analyze_scalar_cycles, only with
additional argument loop, and loop_info passed to
vect_is_simple_reduction instead of loop.
(vect_analyze_scalar_cycles): Code factored out into
vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest.
Updated documentation.
(analyze_operations): Check for inner-loop loop-closed exit-phis during
outer-loop vectorization that are live or not used in the outerloop,
cause this requires special handling.
(vect_enhance_data_refs_alignment): Don't consider versioning for
nested-loops.
(vect_analyze_data_refs): Check that there are no datarefs in the
inner-loop.
(vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer
and vect_used_in_outer_by_reduction cases.
(process_use): Also consider the case of outer-loop stmt defining an
inner-loop stmt and vice versa.
(vect_analyze_loop_1): New function.
(vect_analyze_loop_form): Extend, to allow a restricted form of nested
loops. Call vect_analyze_loop_1.
(vect_analyze_loop): Skip (inner-)loops within outer-loops that have
been vectorized. Call destroy_loop_vec_info with additional argument.
* tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow
in the inner-loop when doing outer-loop vectorization. Add
documentation and printout.
(vect_recog_dot_prod_pattern): Likewise. Also add check for
GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop).
From-SVN: r127623
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 436 |
1 files changed, 356 insertions, 80 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 16beffc..6e88fa9 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; int byte_misalign; + int innerloop_iters, factor; /* Cost model disabled. */ if (!flag_vect_cost_model) @@ -152,11 +153,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) TODO: Consider assigning different costs to different scalar statements. */ + /* FORNOW. */ + if (loop->inner) + innerloop_iters = 50; /* FIXME */ + for (i = 0; i < nbbs; i++) { block_stmt_iterator si; basic_block bb = bbs[i]; + if (bb->loop_father == loop->inner) + factor = innerloop_iters; + else + factor = 1; + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { tree stmt = bsi_stmt (si); @@ -164,8 +174,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) continue; - scalar_single_iter_cost += cost_for_stmt (stmt); - vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info); + scalar_single_iter_cost += cost_for_stmt (stmt) * factor; + vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor; + /* FIXME: for stmts in the inner-loop in outer-loop vectorization, + some of the "outside" costs are generated inside the outer-loop. */ vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info); } } @@ -1071,6 +1083,9 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type) tree new_temp; basic_block new_bb; + if (nested_in_vect_loop_p (loop, stmt)) + loop = loop->inner; + new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_"); add_referenced_var (new_var); @@ -1096,6 +1111,7 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type) /* Function get_initial_def_for_induction Input: + STMT - a stmt that performs an induction operation in the loop. IV_PHI - the initial value of the induction variable Output: @@ -1114,8 +1130,8 @@ get_initial_def_for_induction (tree iv_phi) tree vectype = get_vectype_for_scalar_type (scalar_type); int nunits = TYPE_VECTOR_SUBPARTS (vectype); edge pe = loop_preheader_edge (loop); + struct loop *iv_loop; basic_block new_bb; - block_stmt_iterator bsi; tree vec, vec_init, vec_step, t; tree access_fn; tree new_var; @@ -1129,8 +1145,13 @@ get_initial_def_for_induction (tree iv_phi) int ncopies = vf / nunits; tree expr; stmt_vec_info phi_info = vinfo_for_stmt (iv_phi); + bool nested_in_vect_loop = false; tree stmts; - tree stmt = NULL_TREE; + imm_use_iterator imm_iter; + use_operand_p use_p; + tree exit_phi; + edge latch_e; + tree loop_arg; block_stmt_iterator si; basic_block bb = bb_for_stmt (iv_phi); @@ -1139,65 +1160,107 @@ get_initial_def_for_induction (tree iv_phi) /* Find the first insertion point in the BB. */ si = bsi_after_labels (bb); - stmt = bsi_stmt (si); - access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi)); + if (INTEGRAL_TYPE_P (scalar_type)) + step_expr = build_int_cst (scalar_type, 0); + else + step_expr = build_real (scalar_type, dconst0); + + /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */ + if (nested_in_vect_loop_p (loop, iv_phi)) + { + nested_in_vect_loop = true; + iv_loop = loop->inner; + } + else + iv_loop = loop; + gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father); + + latch_e = loop_latch_edge (iv_loop); + loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e); + + access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi)); gcc_assert (access_fn); - ok = vect_is_simple_iv_evolution (loop->num, access_fn, - &init_expr, &step_expr); + ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn, + &init_expr, &step_expr); gcc_assert (ok); + pe = loop_preheader_edge (iv_loop); /* Create the vector that holds the initial_value of the induction. */ - new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_"); - add_referenced_var (new_var); - - new_name = force_gimple_operand (init_expr, &stmts, false, new_var); - if (stmts) + if (nested_in_vect_loop) { - new_bb = bsi_insert_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + /* iv_loop is nested in the loop to be vectorized. init_expr had already + been created during vectorization of previous stmts; We obtain it from + the STMT_VINFO_VEC_STMT of the defining stmt. */ + tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop)); + vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL); } - - t = NULL_TREE; - t = tree_cons (NULL_TREE, new_name, t); - for (i = 1; i < nunits; i++) + else { - tree tmp; + /* iv_loop is the loop to be vectorized. Create: + vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ + new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_"); + add_referenced_var (new_var); - /* Create: new_name = new_name + step_expr */ - tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr); - init_stmt = build_gimple_modify_stmt (new_var, tmp); - new_name = make_ssa_name (new_var, init_stmt); - GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name; + new_name = force_gimple_operand (init_expr, &stmts, false, new_var); + if (stmts) + { + new_bb = bsi_insert_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } - new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); - gcc_assert (!new_bb); + t = NULL_TREE; + t = tree_cons (NULL_TREE, init_expr, t); + for (i = 1; i < nunits; i++) + { + tree tmp; - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "created new init_stmt: "); - print_generic_expr (vect_dump, init_stmt, TDF_SLIM); - } - t = tree_cons (NULL_TREE, new_name, t); + /* Create: new_name_i = new_name + step_expr */ + tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr); + init_stmt = build_gimple_modify_stmt (new_var, tmp); + new_name = make_ssa_name (new_var, init_stmt); + GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name; + + new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); + gcc_assert (!new_bb); + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "created new init_stmt: "); + print_generic_expr (vect_dump, init_stmt, TDF_SLIM); + } + t = tree_cons (NULL_TREE, new_name, t); + } + /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */ + vec = build_constructor_from_list (vectype, nreverse (t)); + vec_init = vect_init_vector (iv_phi, vec, vectype); } - vec = build_constructor_from_list (vectype, nreverse (t)); - vec_init = vect_init_vector (stmt, vec, vectype); /* Create the vector that holds the step of the induction. */ - expr = build_int_cst (scalar_type, vf); - new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr); + if (nested_in_vect_loop) + /* iv_loop is nested in the loop to be vectorized. Generate: + vec_step = [S, S, S, S] */ + new_name = step_expr; + else + { + /* iv_loop is the loop to be vectorized. Generate: + vec_step = [VF*S, VF*S, VF*S, VF*S] */ + expr = build_int_cst (scalar_type, vf); + new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr); + } + t = NULL_TREE; for (i = 0; i < nunits; i++) t = tree_cons (NULL_TREE, unshare_expr (new_name), t); vec = build_constructor_from_list (vectype, t); - vec_step = vect_init_vector (stmt, vec, vectype); + vec_step = vect_init_vector (iv_phi, vec, vectype); /* Create the following def-use cycle: loop prolog: - vec_init = [X, X+S, X+2*S, X+3*S] - vec_step = [VF*S, VF*S, VF*S, VF*S] + vec_init = ... + vec_step = ... loop: vec_iv = PHI <vec_init, vec_loop> ... @@ -1208,7 +1271,7 @@ get_initial_def_for_induction (tree iv_phi) /* Create the induction-phi that defines the induction-operand. */ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); add_referenced_var (vec_dest); - induction_phi = create_phi_node (vec_dest, loop->header); + induction_phi = create_phi_node (vec_dest, iv_loop->header); set_stmt_info (get_stmt_ann (induction_phi), new_stmt_vec_info (induction_phi, loop_vinfo)); induc_def = PHI_RESULT (induction_phi); @@ -1219,15 +1282,16 @@ get_initial_def_for_induction (tree iv_phi) induc_def, vec_step)); vec_def = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; - bsi = bsi_for_stmt (stmt); - vect_finish_stmt_generation (stmt, new_stmt, &bsi); + bsi_insert_before (&si, new_stmt, BSI_SAME_STMT); + set_stmt_info (get_stmt_ann (new_stmt), + new_stmt_vec_info (new_stmt, loop_vinfo)); /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop)); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop)); + add_phi_arg (induction_phi, vec_init, pe); + add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop)); - /* In case the vectorization factor (VF) is bigger than the number + /* In case that vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the vector stmt by a factor VF/nunits. For more details see documentation @@ -1236,6 +1300,8 @@ get_initial_def_for_induction (tree iv_phi) if (ncopies > 1) { stmt_vec_info prev_stmt_vinfo; + /* FORNOW. This restriction should be relaxed. */ + gcc_assert (!nested_in_vect_loop); /* Create the vector that holds the step of the induction. */ expr = build_int_cst (scalar_type, nunits); @@ -1244,7 +1310,7 @@ get_initial_def_for_induction (tree iv_phi) for (i = 0; i < nunits; i++) t = tree_cons (NULL_TREE, unshare_expr (new_name), t); vec = build_constructor_from_list (vectype, t); - vec_step = vect_init_vector (stmt, vec, vectype); + vec_step = vect_init_vector (iv_phi, vec, vectype); vec_def = induc_def; prev_stmt_vinfo = vinfo_for_stmt (induction_phi); @@ -1252,19 +1318,50 @@ get_initial_def_for_induction (tree iv_phi) { tree tmp; - /* vec_i = vec_prev + vec_{step*nunits} */ + /* vec_i = vec_prev + vec_step */ tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step); new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp); vec_def = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; - bsi = bsi_for_stmt (stmt); - vect_finish_stmt_generation (stmt, new_stmt, &bsi); - + bsi_insert_before (&si, new_stmt, BSI_SAME_STMT); + set_stmt_info (get_stmt_ann (new_stmt), + new_stmt_vec_info (new_stmt, loop_vinfo)); STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt; prev_stmt_vinfo = vinfo_for_stmt (new_stmt); } } + if (nested_in_vect_loop) + { + /* Find the loop-closed exit-phi of the induction, and record + the final vector of induction results: */ + exit_phi = NULL; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg) + { + if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p)))) + { + exit_phi = USE_STMT (use_p); + break; + } + } + if (exit_phi) + { + stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); + /* FORNOW. Currently not supporting the case that an inner-loop induction + is not used in the outer-loop (i.e. only outside the outer-loop). */ + gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) + && !STMT_VINFO_LIVE_P (stmt_vinfo)); + + STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt; + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "vector of inductions after inner-loop:"); + print_generic_expr (vect_dump, new_stmt, TDF_SLIM); + } + } + } + + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "transform induction: created def-use cycle:"); @@ -1300,7 +1397,6 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree vec_inv; tree vec_cst; tree t = NULL_TREE; @@ -1386,14 +1482,20 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) def_stmt_info = vinfo_for_stmt (def_stmt); vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); gcc_assert (vec_stmt); - vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0); + if (TREE_CODE (vec_stmt) == PHI_NODE) + vec_oprnd = PHI_RESULT (vec_stmt); + else + vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0); return vec_oprnd; } /* Case 4: operand is defined by a loop header phi - reduction */ case vect_reduction_def: { + struct loop *loop; + gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); + loop = (bb_for_stmt (def_stmt))->loop_father; /* Get the def before the loop */ op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); @@ -1405,8 +1507,12 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) { gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); - /* Get the def before the loop */ - return get_initial_def_for_induction (def_stmt); + /* Get the def from the vectorized stmt. */ + def_stmt_info = vinfo_for_stmt (def_stmt); + vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); + gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE)); + vec_oprnd = PHI_RESULT (vec_stmt); + return vec_oprnd; } default: @@ -1487,7 +1593,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); gcc_assert (vec_stmt_for_operand); vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0); - return vec_oprnd; } @@ -1503,7 +1608,11 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + gcc_assert (stmt == bsi_stmt (*bsi)); + gcc_assert (TREE_CODE (stmt) != LABEL_EXPR); + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + set_stmt_info (get_stmt_ann (vec_stmt), new_stmt_vec_info (vec_stmt, loop_vinfo)); @@ -1571,6 +1680,8 @@ static tree get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); @@ -1581,8 +1692,14 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) tree t = NULL_TREE; int i; tree vector_type; + bool nested_in_vect_loop = false; gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); + if (nested_in_vect_loop_p (loop, stmt)) + nested_in_vect_loop = true; + else + gcc_assert (loop == (bb_for_stmt (stmt))->loop_father); + vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL); switch (code) @@ -1590,7 +1707,10 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def) case WIDEN_SUM_EXPR: case DOT_PROD_EXPR: case PLUS_EXPR: - *adjustment_def = init_val; + if (nested_in_vect_loop) + *adjustment_def = vecdef; + else + *adjustment_def = init_val; /* Create a vector of zeros for init_def. */ if (INTEGRAL_TYPE_P (type)) def_for_init = build_int_cst (type, 0); @@ -1679,24 +1799,31 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree new_phi; block_stmt_iterator exit_bsi; tree vec_dest; - tree new_temp; + tree new_temp = NULL_TREE; tree new_name; - tree epilog_stmt; - tree new_scalar_dest, exit_phi; + tree epilog_stmt = NULL_TREE; + tree new_scalar_dest, exit_phi, new_dest; tree bitsize, bitpos, bytesize; enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); - tree scalar_initial_def; + tree adjustment_def; tree vec_initial_def; tree orig_name; imm_use_iterator imm_iter; use_operand_p use_p; - bool extract_scalar_result; - tree reduction_op; + bool extract_scalar_result = false; + tree reduction_op, expr; tree orig_stmt; tree use_stmt; tree operation = GIMPLE_STMT_OPERAND (stmt, 1); + bool nested_in_vect_loop = false; int op_type; + if (nested_in_vect_loop_p (loop, stmt)) + { + loop = loop->inner; + nested_in_vect_loop = true; + } + op_type = TREE_OPERAND_LENGTH (operation); reduction_op = TREE_OPERAND (operation, op_type-1); vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); @@ -1709,7 +1836,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, the scalar def before the loop, that defines the initial value of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, - &scalar_initial_def); + &adjustment_def); add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); /* 1.2 set the loop-latch arg for the reduction-phi: */ @@ -1788,6 +1915,15 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, bitsize = TYPE_SIZE (scalar_type); bytesize = TYPE_SIZE_UNIT (scalar_type); + + /* In case this is a reduction in an inner-loop while vectorizing an outer + loop - we don't need to extract a single scalar result at the end of the + inner-loop. The final vector of partial results will be used in the + vectorized outer-loop, or reduced to a scalar result at the end of the + outer-loop. */ + if (nested_in_vect_loop) + goto vect_finalize_reduction; + /* 2.3 Create the reduction code, using one of the three schemes described above. */ @@ -1934,6 +2070,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, { tree rhs; + gcc_assert (!nested_in_vect_loop); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "extract scalar result"); @@ -1952,25 +2089,42 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT); } - /* 2.4 Adjust the final result by the initial value of the reduction +vect_finalize_reduction: + + /* 2.5 Adjust the final result by the initial value of the reduction variable. (When such adjustment is not needed, then - 'scalar_initial_def' is zero). + 'adjustment_def' is zero). For example, if code is PLUS we create: + new_temp = loop_exit_def + adjustment_def */ - Create: - s_out4 = scalar_expr <s_out3, scalar_initial_def> */ - - if (scalar_initial_def) + if (adjustment_def) { - tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def); - epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp); - new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + if (nested_in_vect_loop) + { + gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE); + expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def); + new_dest = vect_create_destination_var (scalar_dest, vectype); + } + else + { + gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE); + expr = build2 (code, scalar_type, new_temp, adjustment_def); + new_dest = vect_create_destination_var (scalar_dest, scalar_type); + } + epilog_stmt = build_gimple_modify_stmt (new_dest, expr); + new_temp = make_ssa_name (new_dest, epilog_stmt); GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp; +#if 0 + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); +#else bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT); +#endif } - /* 2.6 Replace uses of s_out0 with uses of s_out3 */ - /* Find the loop-closed-use at the loop exit of the original scalar result. + /* 2.6 Handle the loop-exit phi */ + + /* Replace uses of s_out0 with uses of s_out3: + Find the loop-closed-use at the loop exit of the original scalar result. (The reduction result is expected to have two immediate uses - one at the latch block, and one at the loop exit). */ exit_phi = NULL; @@ -1984,6 +2138,29 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, } /* We expect to have found an exit_phi because of loop-closed-ssa form. */ gcc_assert (exit_phi); + + if (nested_in_vect_loop) + { + stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); + + /* FORNOW. Currently not supporting the case that an inner-loop reduction + is not used in the outer-loop (but only outside the outer-loop). */ + gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) + && !STMT_VINFO_LIVE_P (stmt_vinfo)); + + epilog_stmt = adjustment_def ? epilog_stmt : new_phi; + STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; + set_stmt_info (get_stmt_ann (epilog_stmt), + new_stmt_vec_info (epilog_stmt, loop_vinfo)); + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "vector of partial results after inner-loop:"); + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + } + return; + } + /* Replace the uses: */ orig_name = PHI_RESULT (exit_phi); FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) @@ -2065,15 +2242,30 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) tree new_stmt = NULL_TREE; int j; + if (nested_in_vect_loop_p (loop, stmt)) + { + loop = loop->inner; + /* FORNOW. This restriction should be relaxed. */ + if (ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } + } + gcc_assert (ncopies >= 1); /* 1. Is vectorizable reduction? */ /* Not supportable if the reduction variable is used in the loop. */ - if (STMT_VINFO_RELEVANT_P (stmt_info)) + if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer) return false; - if (!STMT_VINFO_LIVE_P (stmt_info)) + /* Reductions that are not used even in an enclosing outer-loop, + are expected to be "live" (used out of the loop). */ + if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop + && !STMT_VINFO_LIVE_P (stmt_info)) return false; /* Make sure it was already recognized as a reduction computation. */ @@ -2130,9 +2322,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) gcc_assert (dt == vect_reduction_def); gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); if (orig_stmt) - gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt)); + gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt)); else - gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt)); + gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt)); if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) return false; @@ -2357,6 +2549,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) int nunits_in; int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; @@ -2466,6 +2659,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) needs to be generated. */ gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; @@ -2480,6 +2681,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform operation."); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } + /* Handle def. */ scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -2671,6 +2880,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; @@ -2752,6 +2962,14 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, needs to be generated. */ gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } + /* Check the operands of the operation. */ if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { @@ -3093,6 +3311,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code; enum machine_mode vec_mode; tree new_temp; @@ -3111,6 +3330,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) int j; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -3373,6 +3599,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def, def_stmt; @@ -3425,6 +3652,13 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -3522,6 +3756,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; @@ -3575,6 +3810,13 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -3867,6 +4109,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL; tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum machine_mode vec_mode; tree dummy; enum dr_alignment_support alignment_support_cheme; @@ -3882,6 +4125,13 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) unsigned int group_size, i; VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4517,6 +4767,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) bool strided_load = false; tree first_stmt; + gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } + if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4812,6 +5071,7 @@ vectorizable_live_operation (tree stmt, tree operation; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int i; int op_type; tree op; @@ -4829,6 +5089,10 @@ vectorizable_live_operation (tree stmt, if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME) return false; + /* FORNOW. CHECKME. */ + if (nested_in_vect_loop_p (loop, stmt)) + return false; + operation = GIMPLE_STMT_OPERAND (stmt, 1); op_type = TREE_OPERAND_LENGTH (operation); @@ -6124,8 +6388,18 @@ vect_transform_loop (loop_vec_info loop_vinfo) fprintf (vect_dump, "------>vectorizing statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } + stmt_info = vinfo_for_stmt (stmt); - gcc_assert (stmt_info); + + /* vector stmts created in the outer-loop during vectorization of + stmts in an inner-loop may not have a stmt_info, and do not + need to be vectorized. */ + if (!stmt_info) + { + bsi_next (&si); + continue; + } + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) { @@ -6197,4 +6471,6 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) fprintf (vect_dump, "LOOP VECTORIZED."); + if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) + fprintf (vect_dump, "OUTER LOOP VECTORIZED."); } |