aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-transform.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r--gcc/tree-vect-transform.c422
1 files changed, 294 insertions, 128 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 42090f7..db0573c 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -1,5 +1,5 @@
/* Transformation Utilities for Loop Vectorization.
- Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
+ Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
This file is part of GCC.
@@ -59,6 +59,7 @@ static void vect_finish_stmt_generation
(tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
static bool vect_is_simple_cond (tree, loop_vec_info);
static void update_vuses_to_preheader (tree, struct loop*);
+static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
static tree get_initial_def_for_reduction (tree, tree, tree *);
/* Utility function dealing with loop peeling (not peeling itself). */
@@ -656,6 +657,8 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
switch (code)
{
+ case WIDEN_SUM_EXPR:
+ case DOT_PROD_EXPR:
case PLUS_EXPR:
if (INTEGRAL_TYPE_P (type))
def = build_int_cst (type, 0);
@@ -711,66 +714,66 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
}
-/* Function vect_create_epilog_for_reduction:
+/* Function vect_create_epilog_for_reduction
Create code at the loop-epilog to finalize the result of a reduction
- computation.
+ computation.
- LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
- into a single result, by applying the operation REDUC_CODE on the
- partial-results-vector. For this, we need to create a new phi node at the
- loop exit to preserve loop-closed form, as illustrated below.
-
- STMT is the original scalar reduction stmt that is being vectorized.
- REDUCTION_OP is the scalar reduction-variable.
+ VECT_DEF is a vector of partial results.
+ REDUC_CODE is the tree-code for the epilog reduction.
+ STMT is the scalar reduction stmt that is being vectorized.
REDUCTION_PHI is the phi-node that carries the reduction computation.
- This function also sets the arguments for the REDUCTION_PHI:
- The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
- The loop-latch argument is VECT_DEF - the vector of partial sums.
- This function transforms this:
+ This function:
+ 1. Creates the reduction def-use cycle: sets the the arguments for
+ REDUCTION_PHI:
+ The loop-entry argument is the vectorized initial-value of the reduction.
+ The loop-latch argument is VECT_DEF - the vector of partial sums.
+ 2. "Reduces" the vector of partial results VECT_DEF into a single result,
+ by applying the operation specified by REDUC_CODE if available, or by
+ other means (whole-vector shifts or a scalar loop).
+ The function also creates a new phi node at the loop exit to preserve
+ loop-closed form, as illustrated below.
+
+ The flow at the entry to this function:
loop:
- vec_def = phi <null, null> # REDUCTION_PHI
- ....
- VECT_DEF = ...
-
+ vec_def = phi <null, null> # REDUCTION_PHI
+ VECT_DEF = vector_stmt # vectorized form of STMT
+ s_loop = scalar_stmt # (scalar) STMT
loop_exit:
- s_out0 = phi <s_loop> # EXIT_PHI
-
+ s_out0 = phi <s_loop> # (scalar) EXIT_PHI
use <s_out0>
use <s_out0>
- Into:
+ The above is transformed by this function into:
loop:
- vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
- ....
- VECT_DEF = ...
-
+ vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
+ VECT_DEF = vector_stmt # vectorized form of STMT
+ s_loop = scalar_stmt # (scalar) STMT
loop_exit:
- s_out0 = phi <s_loop> # EXIT_PHI
- v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
-
- v_out2 = reduc_expr <v_out1>
+ s_out0 = phi <s_loop> # (scalar) EXIT_PHI
+ v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
+ v_out2 = reduce <v_out1>
s_out3 = extract_field <v_out2, 0>
-
- use <s_out3>
- use <s_out3>
+ s_out4 = adjust_result <s_out3>
+ use <s_out4>
+ use <s_out4>
*/
static void
-vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
+vect_create_epilog_for_reduction (tree vect_def, tree stmt,
enum tree_code reduc_code, tree reduction_phi)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- enum machine_mode mode = TYPE_MODE (vectype);
+ tree vectype;
+ enum machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block exit_bb;
- tree scalar_dest = TREE_OPERAND (stmt, 0);
- tree scalar_type = TREE_TYPE (scalar_dest);
+ tree scalar_dest;
+ tree scalar_type;
tree new_phi;
block_stmt_iterator exit_bsi;
tree vec_dest;
@@ -786,7 +789,16 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
imm_use_iterator imm_iter;
use_operand_p use_p;
bool extract_scalar_result;
+ tree reduction_op;
+ tree orig_stmt;
+ tree operation = TREE_OPERAND (stmt, 1);
+ int op_type;
+ op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
+ reduction_op = TREE_OPERAND (operation, op_type-1);
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+ mode = TYPE_MODE (vectype);
+
/*** 1. Create the reduction def-use cycle ***/
/* 1.1 set the loop-entry arg of the reduction-phi: */
@@ -797,7 +809,6 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
&scalar_initial_def);
add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
-
/* 1.2 set the loop-latch arg for the reduction-phi: */
add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
@@ -810,7 +821,32 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
}
- /*** 2. Create epilog code ***/
+ /*** 2. Create epilog code
+ The reduction epilog code operates across the elements of the vector
+ of partial results computed by the vectorized loop.
+ The reduction epilog code consists of:
+ step 1: compute the scalar result in a vector (v_out2)
+ step 2: extract the scalar result (s_out3) from the vector (v_out2)
+ step 3: adjust the scalar result (s_out3) if needed.
+
+ Step 1 can be accomplished using one the following three schemes:
+ (scheme 1) using reduc_code, if available.
+ (scheme 2) using whole-vector shifts, if available.
+ (scheme 3) using a scalar loop. In this case steps 1+2 above are
+ combined.
+
+ The overall epilog code looks like this:
+
+ s_out0 = phi <s_loop> # original EXIT_PHI
+ v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
+ v_out2 = reduce <v_out1> # step 1
+ s_out3 = extract_field <v_out2, 0> # step 2
+ s_out4 = adjust_result <s_out3> # step 3
+
+ (step 3 is optional, and step2 1 and 2 may be combined).
+ Lastly, the uses of s_out0 are replaced by s_out4.
+
+ ***/
/* 2.1 Create new loop-exit-phi to preserve loop-closed form:
v_out1 = phi <v_loop> */
@@ -818,15 +854,39 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
exit_bb = loop->single_exit->dest;
new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
-
exit_bsi = bsi_start (exit_bb);
-
+ /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
+ (i.e. when reduc_code is not available) and in the final adjusment code
+ (if needed). Also get the original scalar reduction variable as
+ defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
+ represents a reduction pattern), the tree-code and scalar-def are
+ taken from the original stmt that the pattern-stmt (STMT) replaces.
+ Otherwise (it is a regular reduction) - the tree-code and scalar-def
+ are taken from STMT. */
+
+ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ if (!orig_stmt)
+ {
+ /* Regular reduction */
+ orig_stmt = stmt;
+ }
+ else
+ {
+ /* Reduction pattern */
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
+ gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
+ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
+ }
+ code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
+ scalar_dest = TREE_OPERAND (orig_stmt, 0);
+ scalar_type = TREE_TYPE (scalar_dest);
new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type);
- /* 2.2 Create the reduction code. */
+ /* 2.3 Create the reduction code, using one of the three schemes described
+ above. */
if (reduc_code < NUM_TREE_CODES)
{
@@ -849,16 +909,11 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
{
enum tree_code shift_code = 0;
bool have_whole_vector_shift = true;
- enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
int bit_offset;
int element_bitsize = tree_low_cst (bitsize, 1);
int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
tree vec_temp;
- /* The result of the reduction is expected to be at the least
- significant bits of the vector. This is merely convention,
- as it's the extraction later that really matters, and that
- is also under our control. */
if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
shift_code = VEC_RSHIFT_EXPR;
else
@@ -881,7 +936,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
if (have_whole_vector_shift)
{
- /*** Case 2:
+ /*** Case 2: Create:
for (offset = VS/2; offset >= element_size; offset/=2)
{
Create: va' = vec_shift <va, offset>
@@ -905,17 +960,12 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
new_name = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_name;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
-
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
build2 (code, vectype, new_name, new_temp));
new_temp = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
extract_scalar_result = true;
@@ -924,10 +974,11 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
{
tree rhs;
- /*** Case 3:
- Create:
+ /*** Case 3: Create:
s = extract_field <v_out2, 0>
- for (offset=element_size; offset<vector_size; offset+=element_size;)
+ for (offset = element_size;
+ offset < vector_size;
+ offset += element_size;)
{
Create: s' = extract_field <v_out2, offset>
Create: s = op <s, s'>
@@ -938,18 +989,13 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
vec_temp = PHI_RESULT (new_phi);
vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
-
rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitsize_zero_node);
-
BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
- epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
- rhs);
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
for (bit_offset = element_bitsize;
bit_offset < vec_size_in_bits;
@@ -965,25 +1011,19 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_name;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
-
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build2 (code, scalar_type, new_name, new_temp));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
extract_scalar_result = false;
}
}
-
- /* 2.3 Extract the final scalar result. Create:
+ /* 2.4 Extract the final scalar result. Create:
s_out3 = extract_field <v_out2, bitpos> */
if (extract_scalar_result)
@@ -993,7 +1033,6 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "extract scalar result");
- /* The result is in the low order bits. */
if (BYTES_BIG_ENDIAN)
bitpos = size_binop (MULT_EXPR,
bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
@@ -1007,17 +1046,14 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
-
/* 2.4 Adjust the final result by the initial value of the reduction
- variable. (when such adjustment is not needed, then
+ variable. (When such adjustment is not needed, then
'scalar_initial_def' is zero).
Create:
- s_out = scalar_expr <s_out, scalar_initial_def> */
+ s_out4 = scalar_expr <s_out3, scalar_initial_def> */
if (scalar_initial_def)
{
@@ -1026,18 +1062,13 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
-
- if (vect_print_dump_info (REPORT_DETAILS))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
+ /* 2.6 Replace uses of s_out0 with uses of s_out3 */
- /* 2.5 Replace uses of s_out0 with uses of s_out3 */
-
- /* Find the loop-closed-use at the loop exit of the original
- scalar result. (The reduction result is expected to have
- two immediate uses - one at the latch block, and one at the
- loop exit). */
+ /* Find the loop-closed-use at the loop exit of the original scalar result.
+ (The reduction result is expected to have two immediate uses - one at the
+ latch block, and one at the loop exit). */
exit_phi = NULL;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
{
@@ -1047,9 +1078,10 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
break;
}
}
-
+ /* We expect to have found an exit_phi because of loop-closed-ssa form. */
+ gcc_assert (exit_phi);
+ /* Replace the uses: */
orig_name = PHI_RESULT (exit_phi);
-
FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
SET_USE (use_p, new_temp);
}
@@ -1060,33 +1092,69 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
Check if STMT performs a reduction operation that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
- Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+ Return FALSE if not a vectorizable STMT, TRUE otherwise.
+
+ This function also handles reduction idioms (patterns) that have been
+ recognized in advance during vect_pattern_recog. In this case, STMT may be
+ of this form:
+ X = pattern_expr (arg0, arg1, ..., X)
+ and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
+ sequence that had been detected and replaced by the pattern-stmt (STMT).
+
+ In some cases of reduction patterns, the type of the reduction variable X is
+ different than the type of the other arguments of STMT.
+ In such cases, the vectype that is used when transforming STMT into a vector
+ stmt is different than the vectype that is used to determine the
+ vectorization factor, because it consists of a different number of elements
+ than the actual number of elements that are being operated upon in parallel.
+
+ For example, consider an accumulation of shorts into an int accumulator.
+ On some targets it's possible to vectorize this pattern operating on 8
+ shorts at a time (hence, the vectype for purposes of determining the
+ vectorization factor should be V8HI); on the other hand, the vectype that
+ is used to create the vector form is actually V4SI (the type of the result).
+
+ Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
+ indicates what is the actual level of parallelism (V8HI in the example), so
+ that the right vectorization factor would be derived. This vectype
+ corresponds to the type of arguments to the reduction stmt, and should *NOT*
+ be used to create the vectorized stmt. The right vectype for the vectorized
+ stmt is obtained from the type of the result X:
+ get_vectype_for_scalar_type (TREE_TYPE (X))
+
+ This means that, contrary to "regular" reductions (or "regular" stmts in
+ general), the following equation:
+ STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
+ does *NOT* necessarily hold for reduction patterns. */
bool
vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
{
tree vec_dest;
tree scalar_dest;
- tree op0, op1;
- tree loop_vec_def;
+ tree op;
+ tree loop_vec_def0, loop_vec_def1;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree operation;
- enum tree_code code, reduc_code = 0;
+ enum tree_code code, orig_code, epilog_reduc_code = 0;
enum machine_mode vec_mode;
int op_type;
optab optab, reduc_optab;
tree new_temp;
- tree def0, def1, def_stmt0, def_stmt1;
- enum vect_def_type dt0, dt1;
+ tree def, def_stmt;
+ enum vect_def_type dt;
tree new_phi;
tree scalar_type;
- bool is_simple_use0;
- bool is_simple_use1;
+ bool is_simple_use;
+ tree orig_stmt;
+ stmt_vec_info orig_stmt_info;
+ tree expr = NULL_TREE;
+ int i;
- /* Is vectorizable reduction? */
+ /* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop. */
if (STMT_VINFO_RELEVANT_P (stmt_info))
@@ -1095,43 +1163,68 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!STMT_VINFO_LIVE_P (stmt_info))
return false;
- /* Make sure it was already recognized as a reduction pattern. */
+ /* Make sure it was already recognized as a reduction computation. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
return false;
+ /* 2. Has this been recognized as a reduction pattern?
+
+ Check if STMT represents a pattern that has been recognized
+ in earlier analysis stages. For stmts that represent a pattern,
+ the STMT_VINFO_RELATED_STMT field records the last stmt in
+ the original sequence that constitutes the pattern. */
+
+ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ if (orig_stmt)
+ {
+ orig_stmt_info = vinfo_for_stmt (orig_stmt);
+ gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
+ gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
+ gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
+ }
+
+ /* 3. Check the operands of the operation. The first operands are defined
+ inside the loop body. The last operand is the reduction variable,
+ which is defined by the loop-header-phi. */
+
gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
operation = TREE_OPERAND (stmt, 1);
code = TREE_CODE (operation);
op_type = TREE_CODE_LENGTH (code);
- if (op_type != binary_op)
+ if (op_type != binary_op && op_type != ternary_op)
return false;
-
- op0 = TREE_OPERAND (operation, 0);
- op1 = TREE_OPERAND (operation, 1);
scalar_dest = TREE_OPERAND (stmt, 0);
scalar_type = TREE_TYPE (scalar_dest);
- /* Check the first operand. It is expected to be defined inside the loop. */
- is_simple_use0 =
- vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
- is_simple_use1 =
- vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
-
- gcc_assert (is_simple_use0);
- gcc_assert (is_simple_use1);
- gcc_assert (dt0 == vect_loop_def);
- gcc_assert (dt1 == vect_reduction_def);
- gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
- gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
+ /* All uses but the last are expected to be defined in the loop.
+ The last use is the reduction variable. */
+ for (i = 0; i < op_type-1; i++)
+ {
+ op = TREE_OPERAND (operation, i);
+ is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+ gcc_assert (is_simple_use);
+ gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
+ dt == vect_constant_def);
+ }
- if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
- return false;
+ op = TREE_OPERAND (operation, i);
+ is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
+ gcc_assert (is_simple_use);
+ gcc_assert (dt == vect_reduction_def);
+ gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+ if (orig_stmt)
+ gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
+ else
+ gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
+
+ if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+ return false;
- /* Supportable by target? */
+ /* 4. Supportable by target? */
- /* check support for the operation in the loop */
+ /* 4.1. check support for the operation in the loop */
optab = optab_for_tree_code (code, vectype);
if (!optab)
{
@@ -1162,21 +1255,69 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
return false;
}
- /* check support for the epilog operation */
- if (!reduction_code_for_scalar_code (code, &reduc_code))
+ /* 4.2. Check support for the epilog operation.
+
+ If STMT represents a reduction pattern, then the type of the
+ reduction variable may be different than the type of the rest
+ of the arguments. For example, consider the case of accumulation
+ of shorts into an int accumulator; The original code:
+ S1: int_a = (int) short_a;
+ orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
+
+ was replaced with:
+ STMT: int_acc = widen_sum <short_a, int_acc>
+
+ This means that:
+ 1. The tree-code that is used to create the vector operation in the
+ epilog code (that reduces the partial results) is not the
+ tree-code of STMT, but is rather the tree-code of the original
+ stmt from the pattern that STMT is replacing. I.e, in the example
+ above we want to use 'widen_sum' in the loop, but 'plus' in the
+ epilog.
+ 2. The type (mode) we use to check available target support
+ for the vector operation to be created in the *epilog*, is
+ determined by the type of the reduction variable (in the example
+ above we'd check this: plus_optab[vect_int_mode]).
+ However the type (mode) we use to check available target support
+ for the vector operation to be created *inside the loop*, is
+ determined by the type of the other arguments to STMT (in the
+ example we'd check this: widen_sum_optab[vect_short_mode]).
+
+ This is contrary to "regular" reductions, in which the types of all
+ the arguments are the same as the type of the reduction variable.
+ For "regular" reductions we can therefore use the same vector type
+ (and also the same tree-code) when generating the epilog code and
+ when generating the code inside the loop. */
+
+ if (orig_stmt)
+ {
+ /* This is a reduction pattern: get the vectype from the type of the
+ reduction variable, and get the tree-code from orig_stmt. */
+ orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
+ vec_mode = TYPE_MODE (vectype);
+ }
+ else
+ {
+ /* Regular reduction: use the same vectype and tree-code as used for
+ the vector code inside the loop can be used for the epilog code. */
+ orig_code = code;
+ }
+
+ if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
return false;
- reduc_optab = optab_for_tree_code (reduc_code, vectype);
+ reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
if (!reduc_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab for reduction.");
- reduc_code = NUM_TREE_CODES;
+ epilog_reduc_code = NUM_TREE_CODES;
}
if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc op not supported by target.");
- reduc_code = NUM_TREE_CODES;
+ epilog_reduc_code = NUM_TREE_CODES;
}
if (!vec_stmt) /* transformation not required. */
@@ -1193,25 +1334,31 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
-
/* Create the reduction-phi that defines the reduction-operand. */
new_phi = create_phi_node (vec_dest, loop->header);
-
/* Prepare the operand that is defined inside the loop body */
- loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
+ op = TREE_OPERAND (operation, 0);
+ loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
+ if (op_type == binary_op)
+ expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
+ else if (op_type == ternary_op)
+ {
+ op = TREE_OPERAND (operation, 1);
+ loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
+ expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
+ PHI_RESULT (new_phi));
+ }
/* Create the vectorized operation that computes the partial results */
- *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
- build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
+ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
new_temp = make_ssa_name (vec_dest, *vec_stmt);
TREE_OPERAND (*vec_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
-
/* Finalize the reduction-phi (set it's arguments) and create the
epilog reduction code. */
- vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
+ vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
return true;
}
@@ -2040,6 +2187,7 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
bool is_store = false;
tree vec_stmt = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree orig_stmt_in_pattern;
bool done;
if (STMT_VINFO_RELEVANT_P (stmt_info))
@@ -2078,7 +2226,25 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
gcc_unreachable ();
}
+ gcc_assert (vec_stmt);
STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+ orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
+ if (orig_stmt_in_pattern)
+ {
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+ {
+ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
+
+ /* STMT was inserted by the vectorizer to replace a computation
+ idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
+ sequence that computed this idiom. We need to record a pointer
+ to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
+ detail in the documentation of vect_pattern_recog. */
+
+ STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
+ }
+ }
}
if (STMT_VINFO_LIVE_P (stmt_info))