aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2016-11-08 08:06:42 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2016-11-08 08:06:42 +0000
commitfe73a33284d23c5a7d8d8eb5b13e37454401b6c4 (patch)
tree3d6f2b801388eaa2f83d00e53e161e87ce062ae9 /gcc
parented053eb8240556e7953ff865c428b784b6546717 (diff)
downloadgcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.zip
gcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.tar.gz
gcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.tar.bz2
re PR tree-optimization/78205 (BB vectorization confused by too large load groups)
2016-11-08 Richard Biener <rguenther@suse.de> PR tree-optimization/78205 * tree-vect-stmts.c (vectorizable_load): Move check whether we may run into gaps when BB vectorizing SLP permutations ... * tree-vect-slp.c (vect_supported_load_permutation_p): ... here where we can do a more precise check. * gcc.dg/vect/bb-slp-pr78205.c: New testcase. From-SVN: r241956
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c25
-rw-r--r--gcc/tree-vect-slp.c19
-rw-r--r--gcc/tree-vect-stmts.c12
5 files changed, 57 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 64b0b93..59d5515 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2016-11-08 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/78205
+ * tree-vect-stmts.c (vectorizable_load): Move check whether
+ we may run into gaps when BB vectorizing SLP permutations ...
+ * tree-vect-slp.c (vect_supported_load_permutation_p): ...
+ here where we can do a more precise check.
+
+2016-11-08 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/78224
* tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds):
Split the fallthru edge in case its successor may have PHIs.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 45a09a5..acc09f3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
2016-11-08 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/78205
+ * gcc.dg/vect/bb-slp-pr78205.c: New testcase.
+
+2016-11-08 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/78224
* g++.dg/torture/pr78224.C: New testcase.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c
new file mode 100644
index 0000000..e02502a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+double x[2], a[4], b[4], c[5];
+
+void foo ()
+{
+ a[0] = c[0];
+ a[1] = c[1];
+ a[2] = c[0];
+ a[3] = c[1];
+ b[0] = c[2];
+ b[1] = c[3];
+ b[2] = c[2];
+ b[3] = c[3];
+ x[0] = c[4];
+ x[1] = c[4];
+}
+
+/* We may not vectorize the store to x[] as it accesses c out-of bounds
+ but we do want to vectorize the other two store groups. */
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 6694164..8d54768 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
SLP_TREE_LOAD_PERMUTATION (node).release ();
else
{
+ stmt_vec_info group_info
+ = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
+ group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
+ unsigned nunits
+ = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
+ unsigned k, maxk = 0;
+ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
+ if (k > maxk)
+ maxk = k;
+ /* In BB vectorization we may not actually use a loaded vector
+ accessing elements in excess of GROUP_SIZE. */
+ if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "BB vectorization with gaps at the end of "
+ "a load is not supported\n");
+ return false;
+ }
+
/* Verify the permutation can be generated. */
vec<tree> tem;
unsigned n_perms;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ab01def..15aec21 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
slp_perm = true;
- /* ??? The following is overly pessimistic (as well as the loop
- case above) in the case we can statically determine the excess
- elements loaded are within the bounds of a decl that is accessed.
- Likewise for BB vectorizations using masked loads is a possibility. */
- if (bb_vinfo && slp_perm && group_size % nunits != 0)
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "BB vectorization with gaps at the end of a load "
- "is not supported\n");
- return false;
- }
-
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)