diff options
author | Dorit Nuzman <dorit@il.ibm.com> | 2005-06-18 13:18:52 +0000 |
---|---|---|
committer | Dorit Nuzman <dorit@gcc.gnu.org> | 2005-06-18 13:18:52 +0000 |
commit | 61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c (patch) | |
tree | 76522a399271e0448291fb8b63bbbf3f898dc165 /gcc/tree-vect-transform.c | |
parent | 6d409ca872d76ffce2b945153486992787e74250 (diff) | |
download | gcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.zip gcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.tar.gz gcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.tar.bz2 |
tree.def (REDUC_MAX_EXPR, [...]): New tree-codes.
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New
tree-codes.
* optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin,
OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction.
(reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab,
reduc_plus_optab): New optabs for reduction.
* expr.c (expand_expr_real_1): Handle new tree-codes.
* tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
* tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle
new tree-codes.
* optabs.c (optab_for_tree_code): Handle new tree-codes.
(init_optabs): Initialize new optabs.
* genopinit.c (optabs): Define handlers for new optabs.
* tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in
case of a phi that is marked as relevant. Call vectorizable_reduction.
(vect_mark_relevant): Phis may be marked as relevant.
(vect_mark_stmts_to_be_vectorized): The use corresponding to the
reduction variable in a reduction stmt does not mark its defining phi
as relevant. Update documentation accordingly.
(vect_can_advance_ivs_p): Skip reduction phis.
* tree-vect-transform.c (vect_get_vec_def_for_operand): Takes
additional argument. Handle reduction.
(vect_create_destination_var): Update call to vect_get_new_vect_var.
Handle non-vector argument.
(get_initial_def_for_reduction): New function.
(vect_create_epilog_for_reduction): New function.
(vectorizable_reduction): New function.
(vect_get_new_vect_var): Handle new vect_var_kind.
(vectorizable_assignment, vectorizable_operation, vectorizable_store,
vectorizable_condition): Update call to vect_get_new_vect_var.
(vect_transform_stmt): Call vectorizable_reduction.
(vect_update_ivs_after_vectorizer): Skip reduction phis.
(vect_transform_loop): Skip if stmt is both not relevant and not live.
* tree-vectorizer.c (reduction_code_for_scalar_code): New function.
(vect_is_simple_reduction): Was empty - added implementation.
* tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value.
(reduc_vec_info_type): New enum vect_def_type value.
* config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf,
reduc_plus_v4si, reduc_plus_v4sf): New define_expands.
* tree-vect-analyze.c (vect_determine_vectorization_factor): Remove
ENABLE_CHECKING around gcc_assert.
* tree-vect-transform.c (vect_do_peeling_for_loop_bound,
(vect_do_peeling_for_alignment, vect_transform_loop,
vect_get_vec_def_for_operand): Likewise.
From-SVN: r101155
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 566 |
1 files changed, 534 insertions, 32 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 4695e54..2b4d1d7 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -42,6 +42,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "langhooks.h" #include "tree-pass.h" #include "toplev.h" +#include "real.h" /* Utility functions for the code transformation. */ static bool vect_transform_stmt (tree, block_stmt_iterator *); @@ -52,12 +53,13 @@ static tree vect_create_data_ref_ptr static tree vect_create_index_for_vector_ref (loop_vec_info); static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); -static tree vect_get_vec_def_for_operand (tree, tree); +static tree vect_get_vec_def_for_operand (tree, tree, tree *); static tree vect_init_vector (tree, tree); static void vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); static bool vect_is_simple_cond (tree, loop_vec_info); static void update_vuses_to_preheader (tree, struct loop*); +static tree get_initial_def_for_reduction (tree, tree, tree *); /* Utility function dealing with loop peeling (not peeling itself). */ static void vect_generate_tmps_on_preheader @@ -85,10 +87,20 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) const char *prefix; tree new_vect_var; - if (var_kind == vect_simple_var) - prefix = "vect_"; - else + switch (var_kind) + { + case vect_simple_var: + prefix = "vect_"; + break; + case vect_scalar_var: + prefix = "stmp_"; + break; + case vect_pointer_var: prefix = "vect_p"; + break; + default: + gcc_unreachable (); + } if (name) new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); @@ -435,13 +447,18 @@ vect_create_destination_var (tree scalar_dest, tree vectype) { tree vec_dest; const char *new_name; + tree type; + enum vect_var_kind kind; + + kind = vectype ? vect_simple_var : vect_scalar_var; + type = vectype ? vectype : TREE_TYPE (scalar_dest); gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); new_name = get_name (scalar_dest); if (!new_name) new_name = "var_"; - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); + vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name); add_referenced_tmp_var (vec_dest); return vec_dest; @@ -502,7 +519,7 @@ vect_init_vector (tree stmt, tree vector_var) needs to be introduced. */ static tree -vect_get_vec_def_for_operand (tree op, tree stmt) +vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) { tree vec_oprnd; tree vec_stmt; @@ -512,6 +529,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt) tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree vec_inv; tree vec_cst; tree t = NULL_TREE; @@ -542,14 +560,14 @@ vect_get_vec_def_for_operand (tree op, tree stmt) } } - /* FORNOW */ - gcc_assert (dt != vect_reduction_def); - switch (dt) { /* Case 1: operand is a constant. */ case vect_constant_def: { + if (scalar_def) + *scalar_def = op; + /* Create 'vect_cst_ = {cst,cst,...,cst}' */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); @@ -565,6 +583,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt) /* Case 2: operand is defined outside the loop - loop invariant. */ case vect_invariant_def: { + if (scalar_def) + *scalar_def = def; + /* Create 'vec_inv = {inv,inv,..,inv}' */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "Create vector_inv."); @@ -581,6 +602,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt) /* Case 3: operand is defined inside the loop. */ case vect_loop_def: { + if (scalar_def) + *scalar_def = def_stmt; + /* Get the def from the vectorized stmt. */ def_stmt_info = vinfo_for_stmt (def_stmt); vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); @@ -589,7 +613,17 @@ vect_get_vec_def_for_operand (tree op, tree stmt) return vec_oprnd; } - /* Case 4: operand is defined by loop-header phi - induction. */ + /* Case 4: operand is defined by a loop header phi - reduction */ + case vect_reduction_def: + { + gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); + + /* Get the def before the loop */ + op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); + return get_initial_def_for_reduction (stmt, op, scalar_def); + } + + /* Case 5: operand is defined by loop-header phi - induction. */ case vect_induction_def: { if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) @@ -618,10 +652,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); } -#ifdef ENABLE_CHECKING /* Make sure bsi points to the stmt that is being vectorized. */ gcc_assert (stmt == bsi_stmt (*bsi)); -#endif #ifdef USE_MAPPED_LOCATION SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); @@ -631,6 +663,458 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) } +#define ADJUST_IN_EPILOG 1 + +/* Function get_initial_def_for_reduction + + Input: + STMT - a stmt that performs a reduction operation in the loop. + INIT_VAL - the initial value of the reduction variable + + Output: + SCALAR_DEF - a tree that holds a value to be added to the final result + of the reduction (used for "ADJUST_IN_EPILOG" - see below). + Return a vector variable, initialized according to the operation that STMT + performs. This vector will be used as the initial value of the + vector of partial results. + + Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows: + add: [0,0,...,0,0] + mult: [1,1,...,1,1] + min/max: [init_val,init_val,..,init_val,init_val] + bit and/or: [init_val,init_val,..,init_val,init_val] + and when necessary (e.g. add/mult case) let the caller know + that it needs to adjust the result by init_val. + + Option2: Initialize the vector as follows: + add: [0,0,...,0,init_val] + mult: [1,1,...,1,init_val] + min/max: [init_val,init_val,...,init_val] + bit and/or: [init_val,init_val,...,init_val] + and no adjustments are needed. + + For example, for the following code: + + s = init_val; + for (i=0;i<n;i++) + s = s + a[i]; + + STMT is 's = s + a[i]', and the reduction variable is 's'. + For a vector of 4 units, we want to return either [0,0,0,init_val], + or [0,0,0,0] and let the caller know that it needs to adjust + the result at the end by 'init_val'. + + FORNOW: We use the "ADJUST_IN_EPILOG" scheme. + TODO: Use some cost-model to estimate which scheme is more profitable. +*/ + +static tree +get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def) +{ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); + int nelements; + enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); + tree type = TREE_TYPE (init_val); + tree def; + tree vec, t = NULL_TREE; + bool need_epilog_adjust; + int i; + + gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); + + switch (code) + { + case PLUS_EXPR: + def = INTEGRAL_TYPE_P (type) ? integer_zero_node : + build_real (type, dconst0); +#ifdef ADJUST_IN_EPILOG + /* All the 'nunits' elements are set to 0. The final result will be + adjusted by 'init_val' at the loop epilog. */ + nelements = nunits; + need_epilog_adjust = true; +#else + /* 'nunits - 1' elements are set to 0; The last element is set to + 'init_val'. No further adjustments at the epilog are needed. */ + nelements = nunits - 1; + need_epilog_adjust = false; +#endif + break; + + case MIN_EXPR: + case MAX_EXPR: + def = init_val; + nelements = nunits; + need_epilog_adjust = false; + break; + + default: + gcc_unreachable (); + } + + for (i = nelements - 1; i >= 0; --i) + { + t = tree_cons (NULL_TREE, def, t); + } + + if (nelements == nunits - 1) + { + /* Set the last element of the vector. */ + t = tree_cons (NULL_TREE, init_val, t); + nelements += 1; + } + gcc_assert (nelements == nunits); + + if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST) + vec = build_vector (vectype, t); + else + vec = build_constructor (vectype, t); + + if (need_epilog_adjust) + *scalar_def = init_val; + else + *scalar_def = INTEGRAL_TYPE_P (type) ? integer_zero_node + : build_real (type, dconst0); + return vect_init_vector (stmt, vec); +} + + +/* Function vect_create_epilog_for_reduction: + + Create code at the loop-epilog to finalize the result of a reduction + computation. + + LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it + into a single result, by applying the operation REDUC_CODE on the + partial-results-vector. For this, we need to create a new phi node at the + loop exit to preserve loop-closed form, as illustrated below. + + STMT is the original scalar reduction stmt that is being vectorized. + REDUCTION_OP is the scalar reduction-variable. + REDUCTION_PHI is the phi-node that carries the reduction computation. + This function also sets the arguments for the REDUCTION_PHI: + The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP. + The loop-latch argument is VECT_DEF - the vector of partial sums. + + This function transforms this: + + loop: + vec_def = phi <null, null> # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi <s_loop> # EXIT_PHI + + use <s_out0> + use <s_out0> + + Into: + + loop: + vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi <s_loop> # EXIT_PHI + v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI + + v_out2 = reduc_expr <v_out1> + s_out3 = extract_field <v_out2, 0> + + use <s_out3> + use <s_out3> +*/ + +static void +vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op, + enum tree_code reduc_code, tree reduction_phi) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block exit_bb; + tree scalar_dest = TREE_OPERAND (stmt, 0); + tree scalar_type = TREE_TYPE (scalar_dest); + tree new_phi; + block_stmt_iterator exit_bsi; + tree vec_dest; + tree new_temp; + tree epilog_stmt; + tree new_scalar_dest, exit_phi; + tree bitsize, bitpos; + enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); + tree scalar_initial_def; + tree vec_initial_def; + tree orig_name; + imm_use_iterator imm_iter; + use_operand_p use_p; + + /*** 1. Create the reduction def-use cycle ***/ + + /* 1.1 set the loop-entry arg of the reduction-phi: */ + /* For the case of reduction, vect_get_vec_def_for_operand returns + the scalar def before the loop, that defines the initial value + of the reduction variable. */ + vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, + &scalar_initial_def); + add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); + + + /* 1.2 set the loop-latch arg for the reduction-phi: */ + add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created def-use cycle:"); + print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); + } + + + /*** 2. Create epilog code ***/ + + /* 2.1 Create new loop-exit-phi to preserve loop-closed form: + v_out1 = phi <v_loop> */ + + exit_bb = loop->single_exit->dest; + new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); + SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); + + exit_bsi = bsi_start (exit_bb); + + + /* 2.2 Create: + v_out2 = reduc_expr <v_out1> + s_out3 = extract_field <v_out2, 0> */ + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build1 (reduc_code, vectype, PHI_RESULT (new_phi))); + new_temp = make_ssa_name (vec_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created epilog code:"); + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + } + + new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); + bitsize = TYPE_SIZE (scalar_type); + + /* The result is in the low order bits. */ + if (BITS_BIG_ENDIAN) + bitpos = size_binop (MULT_EXPR, + bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), + TYPE_SIZE (scalar_type)); + else + bitpos = bitsize_zero_node; + + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build3 (BIT_FIELD_REF, scalar_type, + new_temp, bitsize, bitpos)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + + + /* 2.3 Adjust the final result by the initial value of the reduction + variable. (when such adjustment is not needed, then + 'scalar_initial_def' is zero). + + Create: + s_out = scalar_expr <s_out, scalar_initial_def> */ + + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build2 (code, scalar_type, new_temp, scalar_initial_def)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + + + /* 2.4 Replace uses of s_out0 with uses of s_out3 */ + + /* Find the loop-closed-use at the loop exit of the original + scalar result. (The reduction result is expected to have + two immediate uses - one at the latch block, and one at the + loop exit). */ + exit_phi = NULL; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) + { + if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p)))) + { + exit_phi = USE_STMT (use_p); + break; + } + } + + orig_name = PHI_RESULT (exit_phi); + + FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name) + SET_USE (use_p, new_temp); +} + + +/* Function vectorizable_reduction. + + Check if STMT performs a reduction operation that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree vec_dest; + tree scalar_dest; + tree op0, op1; + tree loop_vec_def; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree operation; + enum tree_code code, reduc_code = 0; + enum machine_mode vec_mode; + int op_type; + optab optab, reduc_optab; + tree new_temp; + tree def0, def1, def_stmt0, def_stmt1; + enum vect_def_type dt0, dt1; + tree new_phi; + tree scalar_type; + bool is_simple_use0; + bool is_simple_use1; + + /* Is vectorizable reduction? */ + + /* Not supportable if the reduction variable is used in the loop. */ + if (STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + if (!STMT_VINFO_LIVE_P (stmt_info)) + return false; + + /* Make sure it was already recognized as a reduction pattern. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) + return false; + + gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); + + operation = TREE_OPERAND (stmt, 1); + code = TREE_CODE (operation); + op_type = TREE_CODE_LENGTH (code); + + if (op_type != binary_op) + return false; + + op0 = TREE_OPERAND (operation, 0); + op1 = TREE_OPERAND (operation, 1); + scalar_dest = TREE_OPERAND (stmt, 0); + scalar_type = TREE_TYPE (scalar_dest); + + /* Check the first operand. It is expected to be defined inside the loop. */ + is_simple_use0 = + vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); + is_simple_use1 = + vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); + + gcc_assert (is_simple_use0); + gcc_assert (is_simple_use1); + gcc_assert (dt0 == vect_loop_def); + gcc_assert (dt1 == vect_reduction_def); + gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE); + gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1)); + + if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1))) + return false; + + /* Supportable by target? */ + + /* check support for the operation in the loop */ + optab = optab_for_tree_code (code, vectype); + if (!optab) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no optab."); + return false; + } + vec_mode = TYPE_MODE (vectype); + if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "op not supported by target."); + return false; + } + + /* check support for the epilog operation */ + if (!reduction_code_for_scalar_code (code, &reduc_code)) + return false; + reduc_optab = optab_for_tree_code (reduc_code, vectype); + if (!reduc_optab) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no optab for reduction."); + return false; + } + if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "op not supported by target."); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform reduction."); + + /* Create the destination vector */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + + /* Create the reduction-phi that defines the reduction-operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + + + /* Prepare the operand that is defined inside the loop body */ + loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); + + + /* Create the vectorized operation that computes the partial results */ + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi))); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + TREE_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + + + /* Finalize the reduction-phi (set it's arguments) and create the + epilog reduction code. */ + vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi); + return true; +} + + /* Function vectorizable_assignment. Check if STMT performs an assignment (copy) that can be vectorized. @@ -688,7 +1172,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle use. */ op = TREE_OPERAND (stmt, 1); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt); + vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); /* Arguments are ready. create the new vector stmt. */ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); @@ -846,12 +1330,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle uses. */ op0 = TREE_OPERAND (operation, 0); - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); - vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); + vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); } /* Arguments are ready. create the new vector stmt. */ @@ -940,7 +1424,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ /* Handle use - get the vectorized def from the defining stmt. */ - vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); + vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL); /* Handle def. */ /* FORNOW: make sure the data reference is aligned. */ @@ -1387,11 +1871,11 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle cond expr. */ vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt); + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); vec_cond_rhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt); - vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt); - vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt); + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); + vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); + vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); /* Arguments are ready. create the new vector stmt. */ vec_compare = build2 (TREE_CODE (cond_expr), vectype, @@ -1460,8 +1944,23 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) if (STMT_VINFO_LIVE_P (stmt_info)) { - done = vectorizable_live_operation (stmt, bsi, &vec_stmt); - gcc_assert (done); + switch (STMT_VINFO_TYPE (stmt_info)) + { + case reduc_vec_info_type: + done = vectorizable_reduction (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + + default: + done = vectorizable_live_operation (stmt, bsi, &vec_stmt); + gcc_assert (done); + } + + if (vec_stmt) + { + gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info)); + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; + } } return is_store; @@ -1717,6 +2216,14 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, continue; } + /* Skip reduction phis. */ + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduc phi. skip."); + continue; + } + access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); gcc_assert (access_fn); evolution_part = @@ -1770,9 +2277,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, struct loop *new_loop; edge update_e; basic_block preheader; -#ifdef ENABLE_CHECKING int loop_num; -#endif if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); @@ -1787,14 +2292,12 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf_name, ratio); -#ifdef ENABLE_CHECKING loop_num = loop->num; -#endif new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit, ratio_mult_vf_name, ni_name, false); -#ifdef ENABLE_CHECKING gcc_assert (new_loop); gcc_assert (loop_num == loop->num); +#ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (loop, new_loop); #endif @@ -2010,8 +2513,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), niters_of_prolog_loop, ni_name, true); -#ifdef ENABLE_CHECKING gcc_assert (new_loop); +#ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif @@ -2051,7 +2554,6 @@ vect_transform_loop (loop_vec_info loop_vinfo, if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "=== vec_transform_loop ==="); - /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ @@ -2104,18 +2606,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, } stmt_info = vinfo_for_stmt (stmt); gcc_assert (stmt_info); - if (!STMT_VINFO_RELEVANT_P (stmt_info)) + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) { bsi_next (&si); continue; } -#ifdef ENABLE_CHECKING /* FORNOW: Verify that all stmts operate on the same number of units and no inner unrolling is necessary. */ gcc_assert (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) == vectorization_factor); -#endif + /* -------- vectorize statement ------------ */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "transform statement."); |