diff options
author | Jakub Jelinek <jakub@redhat.com> | 2017-01-09 21:10:23 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2017-01-09 21:10:23 +0100 |
commit | cb330ba582c9b175bb0c2debaba075a8af8d0b95 (patch) | |
tree | 6dd4df32ad5eb0ca2c76ee47cb1d2ae5a80687ef /gcc/tree-vectorizer.c | |
parent | 47d5beb478d39937b8068410101241ae806adc25 (diff) | |
download | gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.zip gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.gz gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.bz2 |
re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)
PR tree-optimization/78899
* tree-if-conv.c (version_loop_for_if_conversion): Instead of
returning bool return struct loop *, NULL for failure and the new
loop on success.
(versionable_outer_loop_p): Don't version outer loop if it has
dont_vectorized bit set.
(tree_if_conversion): When versioning outer loop, ensure
tree_if_conversion is performed also on the inner loop of the
non-vectorizable outer loop copy.
* tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold
LOOP_VECTORIZED in inner loop of the scalar outer loop and
prevent vectorization of it.
(vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
the outer loop vectorization of the non-scalar version is attempted
before vectorization of the inner loop in scalar version. If
outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
vectorization of its inner loop.
* tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
has 2 inner loops, rename also on edges from bb whose single pred
is outer_loop->header. Fix typo in function comment.
* gcc.target/i386/pr78899.c: New test.
* gcc.dg/pr71077.c: New test.
From-SVN: r244238
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 83 |
1 files changed, 74 insertions, 9 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 895794e..a4cead7 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value) update_stmt (use_stmt); } } + /* Set the uids of all the statements in basic blocks inside loop represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal call guarding the loop which has been if converted. */ @@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; - gcc_checking_assert (vect_loop_vectorized_call - (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)) + gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) == loop_vectorized_call); + /* If we are going to vectorize outer loop, prevent vectorization + of the inner loop in the scalar loop - either the scalar loop is + thrown away, so it is a wasted work, or is used only for + a few iterations. */ + if (scalar_loop->inner) + { + gimple *g = vect_loop_vectorized_call (scalar_loop->inner); + if (g) + { + arg = gimple_call_arg (g, 0); + get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; + fold_loop_vectorized_call (g, boolean_false_node); + } + } bbs = get_loop_body (scalar_loop); for (i = 0; i < scalar_loop->num_nodes; i++) { @@ -534,14 +548,59 @@ vectorize_loops (void) only over initial loops skipping newly generated ones. */ FOR_EACH_LOOP (loop, 0) if (loop->dont_vectorize) - any_ifcvt_loops = true; - else if ((flag_tree_loop_vectorize - && optimize_loop_nest_for_speed_p (loop)) - || loop->force_vectorize) { - loop_vec_info loop_vinfo, orig_loop_vinfo = NULL; - gimple *loop_vectorized_call = vect_loop_vectorized_call (loop); -vectorize_epilogue: + any_ifcvt_loops = true; + /* If-conversion sometimes versions both the outer loop + (for the case when outer loop vectorization might be + desirable) as well as the inner loop in the scalar version + of the loop. So we have: + if (LOOP_VECTORIZED (1, 3)) + { + loop1 + loop2 + } + else + loop3 (copy of loop1) + if (LOOP_VECTORIZED (4, 5)) + loop4 (copy of loop2) + else + loop5 (copy of loop4) + If FOR_EACH_LOOP gives us loop3 first (which has + dont_vectorize set), make sure to process loop1 before loop4; + so that we can prevent vectorization of loop4 if loop1 + is successfully vectorized. */ + if (loop->inner) + { + gimple *loop_vectorized_call + = vect_loop_vectorized_call (loop); + if (loop_vectorized_call + && vect_loop_vectorized_call (loop->inner)) + { + tree arg = gimple_call_arg (loop_vectorized_call, 0); + struct loop *vector_loop + = get_loop (cfun, tree_to_shwi (arg)); + if (vector_loop && vector_loop != loop) + { + loop = vector_loop; + /* Make sure we don't vectorize it twice. */ + loop->dont_vectorize = true; + goto try_vectorize; + } + } + } + } + else + { + loop_vec_info loop_vinfo, orig_loop_vinfo; + gimple *loop_vectorized_call; + try_vectorize: + if (!((flag_tree_loop_vectorize + && optimize_loop_nest_for_speed_p (loop)) + || loop->force_vectorize)) + continue; + orig_loop_vinfo = NULL; + loop_vectorized_call = vect_loop_vectorized_call (loop); + vectorize_epilogue: vect_location = find_loop_location (loop); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION && dump_enabled_p ()) @@ -595,6 +654,12 @@ vectorize_epilogue: ret |= TODO_cleanup_cfg; } } + /* If outer loop vectorization fails for LOOP_VECTORIZED guarded + loop, don't vectorize its inner loop; we'll attempt to + vectorize LOOP_VECTORIZED guarded inner loop of the scalar + loop version. */ + if (loop_vectorized_call && loop->inner) + loop->inner->dont_vectorize = true; continue; } |