aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2017-01-09 21:10:23 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2017-01-09 21:10:23 +0100
commitcb330ba582c9b175bb0c2debaba075a8af8d0b95 (patch)
tree6dd4df32ad5eb0ca2c76ee47cb1d2ae5a80687ef /gcc/tree-vectorizer.c
parent47d5beb478d39937b8068410101241ae806adc25 (diff)
downloadgcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.zip
gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.gz
gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.bz2
re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)
PR tree-optimization/78899 * tree-if-conv.c (version_loop_for_if_conversion): Instead of returning bool return struct loop *, NULL for failure and the new loop on success. (versionable_outer_loop_p): Don't version outer loop if it has dont_vectorized bit set. (tree_if_conversion): When versioning outer loop, ensure tree_if_conversion is performed also on the inner loop of the non-vectorizable outer loop copy. * tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold LOOP_VECTORIZED in inner loop of the scalar outer loop and prevent vectorization of it. (vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure the outer loop vectorization of the non-scalar version is attempted before vectorization of the inner loop in scalar version. If outer LOOP_VECTORIZED guarded loop is not vectorized, prevent vectorization of its inner loop. * tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop has 2 inner loops, rename also on edges from bb whose single pred is outer_loop->header. Fix typo in function comment. * gcc.target/i386/pr78899.c: New test. * gcc.dg/pr71077.c: New test. From-SVN: r244238
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c83
1 files changed, 74 insertions, 9 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 895794e..a4cead7 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value)
update_stmt (use_stmt);
}
}
+
/* Set the uids of all the statements in basic blocks inside loop
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
call guarding the loop which has been if converted. */
@@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
- gcc_checking_assert (vect_loop_vectorized_call
- (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+ gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
== loop_vectorized_call);
+ /* If we are going to vectorize outer loop, prevent vectorization
+ of the inner loop in the scalar loop - either the scalar loop is
+ thrown away, so it is a wasted work, or is used only for
+ a few iterations. */
+ if (scalar_loop->inner)
+ {
+ gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
+ if (g)
+ {
+ arg = gimple_call_arg (g, 0);
+ get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
+ fold_loop_vectorized_call (g, boolean_false_node);
+ }
+ }
bbs = get_loop_body (scalar_loop);
for (i = 0; i < scalar_loop->num_nodes; i++)
{
@@ -534,14 +548,59 @@ vectorize_loops (void)
only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (loop, 0)
if (loop->dont_vectorize)
- any_ifcvt_loops = true;
- else if ((flag_tree_loop_vectorize
- && optimize_loop_nest_for_speed_p (loop))
- || loop->force_vectorize)
{
- loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
- gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
-vectorize_epilogue:
+ any_ifcvt_loops = true;
+ /* If-conversion sometimes versions both the outer loop
+ (for the case when outer loop vectorization might be
+ desirable) as well as the inner loop in the scalar version
+ of the loop. So we have:
+ if (LOOP_VECTORIZED (1, 3))
+ {
+ loop1
+ loop2
+ }
+ else
+ loop3 (copy of loop1)
+ if (LOOP_VECTORIZED (4, 5))
+ loop4 (copy of loop2)
+ else
+ loop5 (copy of loop4)
+ If FOR_EACH_LOOP gives us loop3 first (which has
+ dont_vectorize set), make sure to process loop1 before loop4;
+ so that we can prevent vectorization of loop4 if loop1
+ is successfully vectorized. */
+ if (loop->inner)
+ {
+ gimple *loop_vectorized_call
+ = vect_loop_vectorized_call (loop);
+ if (loop_vectorized_call
+ && vect_loop_vectorized_call (loop->inner))
+ {
+ tree arg = gimple_call_arg (loop_vectorized_call, 0);
+ struct loop *vector_loop
+ = get_loop (cfun, tree_to_shwi (arg));
+ if (vector_loop && vector_loop != loop)
+ {
+ loop = vector_loop;
+ /* Make sure we don't vectorize it twice. */
+ loop->dont_vectorize = true;
+ goto try_vectorize;
+ }
+ }
+ }
+ }
+ else
+ {
+ loop_vec_info loop_vinfo, orig_loop_vinfo;
+ gimple *loop_vectorized_call;
+ try_vectorize:
+ if (!((flag_tree_loop_vectorize
+ && optimize_loop_nest_for_speed_p (loop))
+ || loop->force_vectorize))
+ continue;
+ orig_loop_vinfo = NULL;
+ loop_vectorized_call = vect_loop_vectorized_call (loop);
+ vectorize_epilogue:
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ())
@@ -595,6 +654,12 @@ vectorize_epilogue:
ret |= TODO_cleanup_cfg;
}
}
+ /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
+ loop, don't vectorize its inner loop; we'll attempt to
+ vectorize LOOP_VECTORIZED guarded inner loop of the scalar
+ loop version. */
+ if (loop_vectorized_call && loop->inner)
+ loop->inner->dont_vectorize = true;
continue;
}