aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2017-01-09 21:10:23 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2017-01-09 21:10:23 +0100
commitcb330ba582c9b175bb0c2debaba075a8af8d0b95 (patch)
tree6dd4df32ad5eb0ca2c76ee47cb1d2ae5a80687ef /gcc
parent47d5beb478d39937b8068410101241ae806adc25 (diff)
downloadgcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.zip
gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.gz
gcc-cb330ba582c9b175bb0c2debaba075a8af8d0b95.tar.bz2
re PR tree-optimization/78899 (Vestorized loop with optmized mask stores motion is completely deleted after r242520.)
PR tree-optimization/78899 * tree-if-conv.c (version_loop_for_if_conversion): Instead of returning bool return struct loop *, NULL for failure and the new loop on success. (versionable_outer_loop_p): Don't version outer loop if it has dont_vectorized bit set. (tree_if_conversion): When versioning outer loop, ensure tree_if_conversion is performed also on the inner loop of the non-vectorizable outer loop copy. * tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold LOOP_VECTORIZED in inner loop of the scalar outer loop and prevent vectorization of it. (vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure the outer loop vectorization of the non-scalar version is attempted before vectorization of the inner loop in scalar version. If outer LOOP_VECTORIZED guarded loop is not vectorized, prevent vectorization of its inner loop. * tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop has 2 inner loops, rename also on edges from bb whose single pred is outer_loop->header. Fix typo in function comment. * gcc.target/i386/pr78899.c: New test. * gcc.dg/pr71077.c: New test. From-SVN: r244238
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog23
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/pr71077.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr78899.c27
-rw-r--r--gcc/tree-if-conv.c42
-rw-r--r--gcc/tree-vect-loop-manip.c24
-rw-r--r--gcc/tree-vectorizer.c83
7 files changed, 201 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 56de1fb..443a473 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2017-01-09 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/78899
+ * tree-if-conv.c (version_loop_for_if_conversion): Instead of
+ returning bool return struct loop *, NULL for failure and the new
+ loop on success.
+ (versionable_outer_loop_p): Don't version outer loop if it has
+ dont_vectorized bit set.
+ (tree_if_conversion): When versioning outer loop, ensure
+ tree_if_conversion is performed also on the inner loop of the
+ non-vectorizable outer loop copy.
+ * tree-vectorizer.c (set_uid_loop_bbs): Formatting fix. Fold
+ LOOP_VECTORIZED in inner loop of the scalar outer loop and
+ prevent vectorization of it.
+ (vectorize_loops): For outer + inner LOOP_VECTORIZED, ensure
+ the outer loop vectorization of the non-scalar version is attempted
+ before vectorization of the inner loop in scalar version. If
+ outer LOOP_VECTORIZED guarded loop is not vectorized, prevent
+ vectorization of its inner loop.
+ * tree-vect-loop-manip.c (rename_variables_in_bb): If outer_loop
+ has 2 inner loops, rename also on edges from bb whose single pred
+ is outer_loop->header. Fix typo in function comment.
+
2017-01-09 Martin Sebor <msebor@redhat.com>
PR bootstrap/79033
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 31b7a2c..6167ad9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-01-09 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/78899
+ * gcc.target/i386/pr78899.c: New test.
+ * gcc.dg/pr71077.c: New test.
+
2017-01-09 Martin Jambor <mjambor@suse.cz>
PR ipa/78365
diff --git a/gcc/testsuite/gcc.dg/pr71077.c b/gcc/testsuite/gcc.dg/pr71077.c
new file mode 100644
index 0000000..db83bc3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71077.c
@@ -0,0 +1,14 @@
+/* PR c++/71077 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
+
+void
+foo (int *a, int n)
+{
+ int b, c;
+ for (b = 0; b < n; b++)
+ for (c = 0; c < 32; c++)
+ if ((b & 1U) << c)
+ a[b + c] = 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr78899.c b/gcc/testsuite/gcc.target/i386/pr78899.c
new file mode 100644
index 0000000..f22f31d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr78899.c
@@ -0,0 +1,27 @@
+/* PR tree-optimization/78899 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fopenmp-simd -mavx2 -mno-avx512f" } */
+
+#define N 1024
+#define M 4
+int p1[N], p2[N], p3[N], c[N];
+
+void
+foo (int n)
+{
+ int i, k;
+ for (k = 0; k < n / M; k++)
+ {
+ #pragma omp simd
+ for (i = 0; i < M; i++)
+ if (c[k * M + i])
+ {
+ p1[k * M + i] += 1;
+ p2[k * M + i] = p3[k * M + i] + 2;
+ }
+ }
+}
+
+/* Ensure the loop is vectorized. */
+/* { dg-final { scan-assembler "vpmaskmov" } } */
+/* { dg-final { scan-assembler "vpadd" } } */
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 032182c..c12987e 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -2535,7 +2535,7 @@ combine_blocks (struct loop *loop)
loop to execute. The vectorizer pass will fold this
internal call into either true or false. */
-static bool
+static struct loop *
version_loop_for_if_conversion (struct loop *loop)
{
basic_block cond_bb;
@@ -2566,7 +2566,7 @@ version_loop_for_if_conversion (struct loop *loop)
ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL)
- return false;
+ return NULL;
new_loop->dont_vectorize = true;
new_loop->force_vectorize = false;
@@ -2574,7 +2574,7 @@ version_loop_for_if_conversion (struct loop *loop)
gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
update_ssa (TODO_update_ssa);
- return true;
+ return new_loop;
}
/* Return true when LOOP satisfies the follow conditions that will
@@ -2594,6 +2594,7 @@ static bool
versionable_outer_loop_p (struct loop *loop)
{
if (!loop_outer (loop)
+ || loop->dont_vectorize
|| !loop->inner
|| loop->inner->next
|| !single_exit (loop)
@@ -2602,7 +2603,7 @@ versionable_outer_loop_p (struct loop *loop)
|| !single_pred_p (loop->latch)
|| !single_pred_p (loop->inner->latch))
return false;
-
+
basic_block outer_exit = single_pred (loop->latch);
basic_block inner_exit = single_pred (loop->inner->latch);
@@ -2789,7 +2790,10 @@ tree_if_conversion (struct loop *loop)
{
unsigned int todo = 0;
bool aggressive_if_conv;
+ struct loop *rloop;
+ again:
+ rloop = NULL;
ifc_bbs = NULL;
any_pred_load_store = false;
any_complicated_phi = false;
@@ -2829,8 +2833,31 @@ tree_if_conversion (struct loop *loop)
struct loop *vloop
= (versionable_outer_loop_p (loop_outer (loop))
? loop_outer (loop) : loop);
- if (!version_loop_for_if_conversion (vloop))
+ struct loop *nloop = version_loop_for_if_conversion (vloop);
+ if (nloop == NULL)
goto cleanup;
+ if (vloop != loop)
+ {
+ /* If versionable_outer_loop_p decided to version the
+ outer loop, version also the inner loop of the non-vectorized
+ loop copy. So we transform:
+ loop1
+ loop2
+ into:
+ if (LOOP_VECTORIZED (1, 3))
+ {
+ loop1
+ loop2
+ }
+ else
+ loop3 (copy of loop1)
+ if (LOOP_VECTORIZED (4, 5))
+ loop4 (copy of loop2)
+ else
+ loop5 (copy of loop4) */
+ gcc_assert (nloop->inner && nloop->inner->next == NULL);
+ rloop = nloop->inner;
+ }
}
/* Now all statements are if-convertible. Combine all the basic
@@ -2854,6 +2881,11 @@ tree_if_conversion (struct loop *loop)
free (ifc_bbs);
ifc_bbs = NULL;
}
+ if (rloop != NULL)
+ {
+ loop = rloop;
+ goto again;
+ }
return todo;
}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 51d52ac..935cd75 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -71,7 +71,7 @@ rename_use_op (use_operand_p op_p)
}
-/* Renames the variables in basic block BB. Allow renaming of PHI argumnets
+/* Renames the variables in basic block BB. Allow renaming of PHI arguments
on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
true. */
@@ -102,9 +102,25 @@ rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
FOR_EACH_EDGE (e, ei, bb->preds)
{
- if (!flow_bb_inside_loop_p (loop, e->src)
- && (!rename_from_outer_loop || e->src != outer_loop->header))
- continue;
+ if (!flow_bb_inside_loop_p (loop, e->src))
+ {
+ if (!rename_from_outer_loop)
+ continue;
+ if (e->src != outer_loop->header)
+ {
+ if (outer_loop->inner->next)
+ {
+ /* If outer_loop has 2 inner loops, allow there to
+ be an extra basic block which decides which of the
+ two loops to use using LOOP_VECTORIZED. */
+ if (!single_pred_p (e->src)
+ || single_pred (e->src) != outer_loop->header)
+ continue;
+ }
+ else
+ continue;
+ }
+ }
for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
gsi_next (&gsi))
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 895794e..a4cead7 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -465,6 +465,7 @@ fold_loop_vectorized_call (gimple *g, tree value)
update_stmt (use_stmt);
}
}
+
/* Set the uids of all the statements in basic blocks inside loop
represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
call guarding the loop which has been if converted. */
@@ -477,9 +478,22 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
- gcc_checking_assert (vect_loop_vectorized_call
- (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+ gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
== loop_vectorized_call);
+ /* If we are going to vectorize outer loop, prevent vectorization
+ of the inner loop in the scalar loop - either the scalar loop is
+ thrown away, so it is a wasted work, or is used only for
+ a few iterations. */
+ if (scalar_loop->inner)
+ {
+ gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
+ if (g)
+ {
+ arg = gimple_call_arg (g, 0);
+ get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
+ fold_loop_vectorized_call (g, boolean_false_node);
+ }
+ }
bbs = get_loop_body (scalar_loop);
for (i = 0; i < scalar_loop->num_nodes; i++)
{
@@ -534,14 +548,59 @@ vectorize_loops (void)
only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (loop, 0)
if (loop->dont_vectorize)
- any_ifcvt_loops = true;
- else if ((flag_tree_loop_vectorize
- && optimize_loop_nest_for_speed_p (loop))
- || loop->force_vectorize)
{
- loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
- gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
-vectorize_epilogue:
+ any_ifcvt_loops = true;
+ /* If-conversion sometimes versions both the outer loop
+ (for the case when outer loop vectorization might be
+ desirable) as well as the inner loop in the scalar version
+ of the loop. So we have:
+ if (LOOP_VECTORIZED (1, 3))
+ {
+ loop1
+ loop2
+ }
+ else
+ loop3 (copy of loop1)
+ if (LOOP_VECTORIZED (4, 5))
+ loop4 (copy of loop2)
+ else
+ loop5 (copy of loop4)
+ If FOR_EACH_LOOP gives us loop3 first (which has
+ dont_vectorize set), make sure to process loop1 before loop4;
+ so that we can prevent vectorization of loop4 if loop1
+ is successfully vectorized. */
+ if (loop->inner)
+ {
+ gimple *loop_vectorized_call
+ = vect_loop_vectorized_call (loop);
+ if (loop_vectorized_call
+ && vect_loop_vectorized_call (loop->inner))
+ {
+ tree arg = gimple_call_arg (loop_vectorized_call, 0);
+ struct loop *vector_loop
+ = get_loop (cfun, tree_to_shwi (arg));
+ if (vector_loop && vector_loop != loop)
+ {
+ loop = vector_loop;
+ /* Make sure we don't vectorize it twice. */
+ loop->dont_vectorize = true;
+ goto try_vectorize;
+ }
+ }
+ }
+ }
+ else
+ {
+ loop_vec_info loop_vinfo, orig_loop_vinfo;
+ gimple *loop_vectorized_call;
+ try_vectorize:
+ if (!((flag_tree_loop_vectorize
+ && optimize_loop_nest_for_speed_p (loop))
+ || loop->force_vectorize))
+ continue;
+ orig_loop_vinfo = NULL;
+ loop_vectorized_call = vect_loop_vectorized_call (loop);
+ vectorize_epilogue:
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ())
@@ -595,6 +654,12 @@ vectorize_epilogue:
ret |= TODO_cleanup_cfg;
}
}
+ /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
+ loop, don't vectorize its inner loop; we'll attempt to
+ vectorize LOOP_VECTORIZED guarded inner loop of the scalar
+ loop version. */
+ if (loop_vectorized_call && loop->inner)
+ loop->inner->dont_vectorize = true;
continue;
}