aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-transform.c
diff options
context:
space:
mode:
authorDorit Nuzman <dorit@il.ibm.com>2005-06-18 13:18:52 +0000
committerDorit Nuzman <dorit@gcc.gnu.org>2005-06-18 13:18:52 +0000
commit61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c (patch)
tree76522a399271e0448291fb8b63bbbf3f898dc165 /gcc/tree-vect-transform.c
parent6d409ca872d76ffce2b945153486992787e74250 (diff)
downloadgcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.zip
gcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.tar.gz
gcc-61d3cdbb12e87dd8bfec8ce082c0ac1d5776474c.tar.bz2
tree.def (REDUC_MAX_EXPR, [...]): New tree-codes.
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New tree-codes. * optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin, OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction. (reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab, reduc_plus_optab): New optabs for reduction. * expr.c (expand_expr_real_1): Handle new tree-codes. * tree-inline.c (estimate_num_insns_1): Handle new tree-codes. * tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle new tree-codes. * optabs.c (optab_for_tree_code): Handle new tree-codes. (init_optabs): Initialize new optabs. * genopinit.c (optabs): Define handlers for new optabs. * tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in case of a phi that is marked as relevant. Call vectorizable_reduction. (vect_mark_relevant): Phis may be marked as relevant. (vect_mark_stmts_to_be_vectorized): The use corresponding to the reduction variable in a reduction stmt does not mark its defining phi as relevant. Update documentation accordingly. (vect_can_advance_ivs_p): Skip reduction phis. * tree-vect-transform.c (vect_get_vec_def_for_operand): Takes additional argument. Handle reduction. (vect_create_destination_var): Update call to vect_get_new_vect_var. Handle non-vector argument. (get_initial_def_for_reduction): New function. (vect_create_epilog_for_reduction): New function. (vectorizable_reduction): New function. (vect_get_new_vect_var): Handle new vect_var_kind. (vectorizable_assignment, vectorizable_operation, vectorizable_store, vectorizable_condition): Update call to vect_get_new_vect_var. (vect_transform_stmt): Call vectorizable_reduction. (vect_update_ivs_after_vectorizer): Skip reduction phis. (vect_transform_loop): Skip if stmt is both not relevant and not live. * tree-vectorizer.c (reduction_code_for_scalar_code): New function. (vect_is_simple_reduction): Was empty - added implementation. * tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value. (reduc_vec_info_type): New enum vect_def_type value. * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf, reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf, reduc_plus_v4si, reduc_plus_v4sf): New define_expands. * tree-vect-analyze.c (vect_determine_vectorization_factor): Remove ENABLE_CHECKING around gcc_assert. * tree-vect-transform.c (vect_do_peeling_for_loop_bound, (vect_do_peeling_for_alignment, vect_transform_loop, vect_get_vec_def_for_operand): Likewise. From-SVN: r101155
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r--gcc/tree-vect-transform.c566
1 files changed, 534 insertions, 32 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 4695e54..2b4d1d7 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -42,6 +42,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "langhooks.h"
#include "tree-pass.h"
#include "toplev.h"
+#include "real.h"
/* Utility functions for the code transformation. */
static bool vect_transform_stmt (tree, block_stmt_iterator *);
@@ -52,12 +53,13 @@ static tree vect_create_data_ref_ptr
static tree vect_create_index_for_vector_ref (loop_vec_info);
static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
-static tree vect_get_vec_def_for_operand (tree, tree);
+static tree vect_get_vec_def_for_operand (tree, tree, tree *);
static tree vect_init_vector (tree, tree);
static void vect_finish_stmt_generation
(tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
static bool vect_is_simple_cond (tree, loop_vec_info);
static void update_vuses_to_preheader (tree, struct loop*);
+static tree get_initial_def_for_reduction (tree, tree, tree *);
/* Utility function dealing with loop peeling (not peeling itself). */
static void vect_generate_tmps_on_preheader
@@ -85,10 +87,20 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
const char *prefix;
tree new_vect_var;
- if (var_kind == vect_simple_var)
- prefix = "vect_";
- else
+ switch (var_kind)
+ {
+ case vect_simple_var:
+ prefix = "vect_";
+ break;
+ case vect_scalar_var:
+ prefix = "stmp_";
+ break;
+ case vect_pointer_var:
prefix = "vect_p";
+ break;
+ default:
+ gcc_unreachable ();
+ }
if (name)
new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
@@ -435,13 +447,18 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
{
tree vec_dest;
const char *new_name;
+ tree type;
+ enum vect_var_kind kind;
+
+ kind = vectype ? vect_simple_var : vect_scalar_var;
+ type = vectype ? vectype : TREE_TYPE (scalar_dest);
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
new_name = get_name (scalar_dest);
if (!new_name)
new_name = "var_";
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
+ vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
add_referenced_tmp_var (vec_dest);
return vec_dest;
@@ -502,7 +519,7 @@ vect_init_vector (tree stmt, tree vector_var)
needs to be introduced. */
static tree
-vect_get_vec_def_for_operand (tree op, tree stmt)
+vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
{
tree vec_oprnd;
tree vec_stmt;
@@ -512,6 +529,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vec_inv;
tree vec_cst;
tree t = NULL_TREE;
@@ -542,14 +560,14 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
}
}
- /* FORNOW */
- gcc_assert (dt != vect_reduction_def);
-
switch (dt)
{
/* Case 1: operand is a constant. */
case vect_constant_def:
{
+ if (scalar_def)
+ *scalar_def = op;
+
/* Create 'vect_cst_ = {cst,cst,...,cst}' */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
@@ -565,6 +583,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
/* Case 2: operand is defined outside the loop - loop invariant. */
case vect_invariant_def:
{
+ if (scalar_def)
+ *scalar_def = def;
+
/* Create 'vec_inv = {inv,inv,..,inv}' */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Create vector_inv.");
@@ -581,6 +602,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
/* Case 3: operand is defined inside the loop. */
case vect_loop_def:
{
+ if (scalar_def)
+ *scalar_def = def_stmt;
+
/* Get the def from the vectorized stmt. */
def_stmt_info = vinfo_for_stmt (def_stmt);
vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
@@ -589,7 +613,17 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
return vec_oprnd;
}
- /* Case 4: operand is defined by loop-header phi - induction. */
+ /* Case 4: operand is defined by a loop header phi - reduction */
+ case vect_reduction_def:
+ {
+ gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+
+ /* Get the def before the loop */
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+ return get_initial_def_for_reduction (stmt, op, scalar_def);
+ }
+
+ /* Case 5: operand is defined by loop-header phi - induction. */
case vect_induction_def:
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
@@ -618,10 +652,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
}
-#ifdef ENABLE_CHECKING
/* Make sure bsi points to the stmt that is being vectorized. */
gcc_assert (stmt == bsi_stmt (*bsi));
-#endif
#ifdef USE_MAPPED_LOCATION
SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
@@ -631,6 +663,458 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
}
+#define ADJUST_IN_EPILOG 1
+
+/* Function get_initial_def_for_reduction
+
+ Input:
+ STMT - a stmt that performs a reduction operation in the loop.
+ INIT_VAL - the initial value of the reduction variable
+
+ Output:
+ SCALAR_DEF - a tree that holds a value to be added to the final result
+ of the reduction (used for "ADJUST_IN_EPILOG" - see below).
+ Return a vector variable, initialized according to the operation that STMT
+ performs. This vector will be used as the initial value of the
+ vector of partial results.
+
+ Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
+ add: [0,0,...,0,0]
+ mult: [1,1,...,1,1]
+ min/max: [init_val,init_val,..,init_val,init_val]
+ bit and/or: [init_val,init_val,..,init_val,init_val]
+ and when necessary (e.g. add/mult case) let the caller know
+ that it needs to adjust the result by init_val.
+
+ Option2: Initialize the vector as follows:
+ add: [0,0,...,0,init_val]
+ mult: [1,1,...,1,init_val]
+ min/max: [init_val,init_val,...,init_val]
+ bit and/or: [init_val,init_val,...,init_val]
+ and no adjustments are needed.
+
+ For example, for the following code:
+
+ s = init_val;
+ for (i=0;i<n;i++)
+ s = s + a[i];
+
+ STMT is 's = s + a[i]', and the reduction variable is 's'.
+ For a vector of 4 units, we want to return either [0,0,0,init_val],
+ or [0,0,0,0] and let the caller know that it needs to adjust
+ the result at the end by 'init_val'.
+
+ FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
+ TODO: Use some cost-model to estimate which scheme is more profitable.
+*/
+
+static tree
+get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ int nelements;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+ tree type = TREE_TYPE (init_val);
+ tree def;
+ tree vec, t = NULL_TREE;
+ bool need_epilog_adjust;
+ int i;
+
+ gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+
+ switch (code)
+ {
+ case PLUS_EXPR:
+ def = INTEGRAL_TYPE_P (type) ? integer_zero_node :
+ build_real (type, dconst0);
+#ifdef ADJUST_IN_EPILOG
+ /* All the 'nunits' elements are set to 0. The final result will be
+ adjusted by 'init_val' at the loop epilog. */
+ nelements = nunits;
+ need_epilog_adjust = true;
+#else
+ /* 'nunits - 1' elements are set to 0; The last element is set to
+ 'init_val'. No further adjustments at the epilog are needed. */
+ nelements = nunits - 1;
+ need_epilog_adjust = false;
+#endif
+ break;
+
+ case MIN_EXPR:
+ case MAX_EXPR:
+ def = init_val;
+ nelements = nunits;
+ need_epilog_adjust = false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = nelements - 1; i >= 0; --i)
+ {
+ t = tree_cons (NULL_TREE, def, t);
+ }
+
+ if (nelements == nunits - 1)
+ {
+ /* Set the last element of the vector. */
+ t = tree_cons (NULL_TREE, init_val, t);
+ nelements += 1;
+ }
+ gcc_assert (nelements == nunits);
+
+ if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
+ vec = build_vector (vectype, t);
+ else
+ vec = build_constructor (vectype, t);
+
+ if (need_epilog_adjust)
+ *scalar_def = init_val;
+ else
+ *scalar_def = INTEGRAL_TYPE_P (type) ? integer_zero_node
+ : build_real (type, dconst0);
+ return vect_init_vector (stmt, vec);
+}
+
+
+/* Function vect_create_epilog_for_reduction:
+
+ Create code at the loop-epilog to finalize the result of a reduction
+ computation.
+
+ LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
+ into a single result, by applying the operation REDUC_CODE on the
+ partial-results-vector. For this, we need to create a new phi node at the
+ loop exit to preserve loop-closed form, as illustrated below.
+
+ STMT is the original scalar reduction stmt that is being vectorized.
+ REDUCTION_OP is the scalar reduction-variable.
+ REDUCTION_PHI is the phi-node that carries the reduction computation.
+ This function also sets the arguments for the REDUCTION_PHI:
+ The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
+ The loop-latch argument is VECT_DEF - the vector of partial sums.
+
+ This function transforms this:
+
+ loop:
+ vec_def = phi <null, null> # REDUCTION_PHI
+ ....
+ VECT_DEF = ...
+
+ loop_exit:
+ s_out0 = phi <s_loop> # EXIT_PHI
+
+ use <s_out0>
+ use <s_out0>
+
+ Into:
+
+ loop:
+ vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
+ ....
+ VECT_DEF = ...
+
+ loop_exit:
+ s_out0 = phi <s_loop> # EXIT_PHI
+ v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
+
+ v_out2 = reduc_expr <v_out1>
+ s_out3 = extract_field <v_out2, 0>
+
+ use <s_out3>
+ use <s_out3>
+*/
+
+static void
+vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
+ enum tree_code reduc_code, tree reduction_phi)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block exit_bb;
+ tree scalar_dest = TREE_OPERAND (stmt, 0);
+ tree scalar_type = TREE_TYPE (scalar_dest);
+ tree new_phi;
+ block_stmt_iterator exit_bsi;
+ tree vec_dest;
+ tree new_temp;
+ tree epilog_stmt;
+ tree new_scalar_dest, exit_phi;
+ tree bitsize, bitpos;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+ tree scalar_initial_def;
+ tree vec_initial_def;
+ tree orig_name;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+
+ /*** 1. Create the reduction def-use cycle ***/
+
+ /* 1.1 set the loop-entry arg of the reduction-phi: */
+ /* For the case of reduction, vect_get_vec_def_for_operand returns
+ the scalar def before the loop, that defines the initial value
+ of the reduction variable. */
+ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
+ &scalar_initial_def);
+ add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
+
+
+ /* 1.2 set the loop-latch arg for the reduction-phi: */
+ add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "transform reduction: created def-use cycle:");
+ print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
+ fprintf (vect_dump, "\n");
+ print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
+ }
+
+
+ /*** 2. Create epilog code ***/
+
+ /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
+ v_out1 = phi <v_loop> */
+
+ exit_bb = loop->single_exit->dest;
+ new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
+ SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
+
+ exit_bsi = bsi_start (exit_bb);
+
+
+ /* 2.2 Create:
+ v_out2 = reduc_expr <v_out1>
+ s_out3 = extract_field <v_out2, 0> */
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
+ new_temp = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "transform reduction: created epilog code:");
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+ new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
+ bitsize = TYPE_SIZE (scalar_type);
+
+ /* The result is in the low order bits. */
+ if (BITS_BIG_ENDIAN)
+ bitpos = size_binop (MULT_EXPR,
+ bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
+ TYPE_SIZE (scalar_type));
+ else
+ bitpos = bitsize_zero_node;
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ new_temp, bitsize, bitpos));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ /* 2.3 Adjust the final result by the initial value of the reduction
+ variable. (when such adjustment is not needed, then
+ 'scalar_initial_def' is zero).
+
+ Create:
+ s_out = scalar_expr <s_out, scalar_initial_def> */
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build2 (code, scalar_type, new_temp, scalar_initial_def));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ /* 2.4 Replace uses of s_out0 with uses of s_out3 */
+
+ /* Find the loop-closed-use at the loop exit of the original
+ scalar result. (The reduction result is expected to have
+ two immediate uses - one at the latch block, and one at the
+ loop exit). */
+ exit_phi = NULL;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
+ {
+ if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
+ {
+ exit_phi = USE_STMT (use_p);
+ break;
+ }
+ }
+
+ orig_name = PHI_RESULT (exit_phi);
+
+ FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
+ SET_USE (use_p, new_temp);
+}
+
+
+/* Function vectorizable_reduction.
+
+ Check if STMT performs a reduction operation that can be vectorized.
+ If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+ stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+ Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+
+bool
+vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+{
+ tree vec_dest;
+ tree scalar_dest;
+ tree op0, op1;
+ tree loop_vec_def;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ tree operation;
+ enum tree_code code, reduc_code = 0;
+ enum machine_mode vec_mode;
+ int op_type;
+ optab optab, reduc_optab;
+ tree new_temp;
+ tree def0, def1, def_stmt0, def_stmt1;
+ enum vect_def_type dt0, dt1;
+ tree new_phi;
+ tree scalar_type;
+ bool is_simple_use0;
+ bool is_simple_use1;
+
+ /* Is vectorizable reduction? */
+
+ /* Not supportable if the reduction variable is used in the loop. */
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ return false;
+
+ if (!STMT_VINFO_LIVE_P (stmt_info))
+ return false;
+
+ /* Make sure it was already recognized as a reduction pattern. */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+ return false;
+
+ gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
+
+ operation = TREE_OPERAND (stmt, 1);
+ code = TREE_CODE (operation);
+ op_type = TREE_CODE_LENGTH (code);
+
+ if (op_type != binary_op)
+ return false;
+
+ op0 = TREE_OPERAND (operation, 0);
+ op1 = TREE_OPERAND (operation, 1);
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ scalar_type = TREE_TYPE (scalar_dest);
+
+ /* Check the first operand. It is expected to be defined inside the loop. */
+ is_simple_use0 =
+ vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
+ is_simple_use1 =
+ vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
+
+ gcc_assert (is_simple_use0);
+ gcc_assert (is_simple_use1);
+ gcc_assert (dt0 == vect_loop_def);
+ gcc_assert (dt1 == vect_reduction_def);
+ gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
+ gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
+
+ if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
+ return false;
+
+ /* Supportable by target? */
+
+ /* check support for the operation in the loop */
+ optab = optab_for_tree_code (code, vectype);
+ if (!optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "no optab.");
+ return false;
+ }
+ vec_mode = TYPE_MODE (vectype);
+ if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "op not supported by target.");
+ return false;
+ }
+
+ /* check support for the epilog operation */
+ if (!reduction_code_for_scalar_code (code, &reduc_code))
+ return false;
+ reduc_optab = optab_for_tree_code (reduc_code, vectype);
+ if (!reduc_optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "no optab for reduction.");
+ return false;
+ }
+ if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "op not supported by target.");
+ return false;
+ }
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ return true;
+ }
+
+ /** Transform. **/
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "transform reduction.");
+
+ /* Create the destination vector */
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+
+ /* Create the reduction-phi that defines the reduction-operand. */
+ new_phi = create_phi_node (vec_dest, loop->header);
+
+
+ /* Prepare the operand that is defined inside the loop body */
+ loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
+
+
+ /* Create the vectorized operation that computes the partial results */
+ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
+ new_temp = make_ssa_name (vec_dest, *vec_stmt);
+ TREE_OPERAND (*vec_stmt, 0) = new_temp;
+ vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+
+
+ /* Finalize the reduction-phi (set it's arguments) and create the
+ epilog reduction code. */
+ vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
+ return true;
+}
+
+
/* Function vectorizable_assignment.
Check if STMT performs an assignment (copy) that can be vectorized.
@@ -688,7 +1172,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle use. */
op = TREE_OPERAND (stmt, 1);
- vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
+ vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
/* Arguments are ready. create the new vector stmt. */
*vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
@@ -846,12 +1330,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle uses. */
op0 = TREE_OPERAND (operation, 0);
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
+ vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
if (op_type == binary_op)
{
op1 = TREE_OPERAND (operation, 1);
- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
}
/* Arguments are ready. create the new vector stmt. */
@@ -940,7 +1424,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
/* Handle use - get the vectorized def from the defining stmt. */
- vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
+ vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
/* Handle def. */
/* FORNOW: make sure the data reference is aligned. */
@@ -1387,11 +1871,11 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle cond expr. */
vec_cond_lhs =
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
vec_cond_rhs =
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt);
- vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt);
- vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt);
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
+ vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
+ vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
/* Arguments are ready. create the new vector stmt. */
vec_compare = build2 (TREE_CODE (cond_expr), vectype,
@@ -1460,8 +1944,23 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
if (STMT_VINFO_LIVE_P (stmt_info))
{
- done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
- gcc_assert (done);
+ switch (STMT_VINFO_TYPE (stmt_info))
+ {
+ case reduc_vec_info_type:
+ done = vectorizable_reduction (stmt, bsi, &vec_stmt);
+ gcc_assert (done);
+ break;
+
+ default:
+ done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
+ gcc_assert (done);
+ }
+
+ if (vec_stmt)
+ {
+ gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+ }
}
return is_store;
@@ -1717,6 +2216,14 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
continue;
}
+ /* Skip reduction phis. */
+ if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "reduc phi. skip.");
+ continue;
+ }
+
access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
gcc_assert (access_fn);
evolution_part =
@@ -1770,9 +2277,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
struct loop *new_loop;
edge update_e;
basic_block preheader;
-#ifdef ENABLE_CHECKING
int loop_num;
-#endif
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
@@ -1787,14 +2292,12 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
&ratio_mult_vf_name, ratio);
-#ifdef ENABLE_CHECKING
loop_num = loop->num;
-#endif
new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
ratio_mult_vf_name, ni_name, false);
-#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
+#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (loop, new_loop);
#endif
@@ -2010,8 +2513,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
niters_of_prolog_loop, ni_name, true);
-#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
+#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (new_loop, loop);
#endif
@@ -2051,7 +2554,6 @@ vect_transform_loop (loop_vec_info loop_vinfo,
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vec_transform_loop ===");
-
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
@@ -2104,18 +2606,18 @@ vect_transform_loop (loop_vec_info loop_vinfo,
}
stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
{
bsi_next (&si);
continue;
}
-#ifdef ENABLE_CHECKING
/* FORNOW: Verify that all stmts operate on the same number of
units and no inner unrolling is necessary. */
gcc_assert
(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
== vectorization_factor);
-#endif
+
/* -------- vectorize statement ------------ */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "transform statement.");