diff options
author | Richard Biener <rguenther@suse.de> | 2016-11-08 08:06:42 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2016-11-08 08:06:42 +0000 |
commit | fe73a33284d23c5a7d8d8eb5b13e37454401b6c4 (patch) | |
tree | 3d6f2b801388eaa2f83d00e53e161e87ce062ae9 /gcc | |
parent | ed053eb8240556e7953ff865c428b784b6546717 (diff) | |
download | gcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.zip gcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.tar.gz gcc-fe73a33284d23c5a7d8d8eb5b13e37454401b6c4.tar.bz2 |
re PR tree-optimization/78205 (BB vectorization confused by too large load groups)
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78205
* tree-vect-stmts.c (vectorizable_load): Move check whether
we may run into gaps when BB vectorizing SLP permutations ...
* tree-vect-slp.c (vect_supported_load_permutation_p): ...
here where we can do a more precise check.
* gcc.dg/vect/bb-slp-pr78205.c: New testcase.
From-SVN: r241956
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c | 25 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 19 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 12 |
5 files changed, 57 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 64b0b93..59d5515 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2016-11-08 Richard Biener <rguenther@suse.de> + PR tree-optimization/78205 + * tree-vect-stmts.c (vectorizable_load): Move check whether + we may run into gaps when BB vectorizing SLP permutations ... + * tree-vect-slp.c (vect_supported_load_permutation_p): ... + here where we can do a more precise check. + +2016-11-08 Richard Biener <rguenther@suse.de> + PR tree-optimization/78224 * tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds): Split the fallthru edge in case its successor may have PHIs. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 45a09a5..acc09f3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2016-11-08 Richard Biener <rguenther@suse.de> + PR tree-optimization/78205 + * gcc.dg/vect/bb-slp-pr78205.c: New testcase. + +2016-11-08 Richard Biener <rguenther@suse.de> + PR tree-optimization/78224 * g++.dg/torture/pr78224.C: New testcase. diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c new file mode 100644 index 0000000..e02502a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +double x[2], a[4], b[4], c[5]; + +void foo () +{ + a[0] = c[0]; + a[1] = c[1]; + a[2] = c[0]; + a[3] = c[1]; + b[0] = c[2]; + b[1] = c[3]; + b[2] = c[2]; + b[3] = c[3]; + x[0] = c[4]; + x[1] = c[4]; +} + +/* We may not vectorize the store to x[] as it accesses c out-of bounds + but we do want to vectorize the other two store groups. */ + +/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 6694164..8d54768 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn) SLP_TREE_LOAD_PERMUTATION (node).release (); else { + stmt_vec_info group_info + = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); + group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); + unsigned nunits + = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info)); + unsigned k, maxk = 0; + FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) + if (k > maxk) + maxk = k; + /* In BB vectorization we may not actually use a loaded vector + accessing elements in excess of GROUP_SIZE. */ + if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "BB vectorization with gaps at the end of " + "a load is not supported\n"); + return false; + } + /* Verify the permutation can be generated. */ vec<tree> tem; unsigned n_perms; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ab01def..15aec21 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) slp_perm = true; - /* ??? The following is overly pessimistic (as well as the loop - case above) in the case we can statically determine the excess - elements loaded are within the bounds of a decl that is accessed. - Likewise for BB vectorizations using masked loads is a possibility. */ - if (bb_vinfo && slp_perm && group_size % nunits != 0) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "BB vectorization with gaps at the end of a load " - "is not supported\n"); - return false; - } - /* Invalidate assumptions made by dependence analysis when vectorization on the unrolled body effectively re-orders stmts. */ if (!PURE_SLP_STMT (stmt_info) |