diff options
author | Richard Biener <rguenther@suse.de> | 2020-05-20 09:22:58 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2020-05-20 14:09:26 +0200 |
commit | b2f26af32b5b031fce761aa090de9476a53e6e5a (patch) | |
tree | d856e2b9fad2e5c97d99a1070047700b5b8f2d99 /gcc | |
parent | 130bb4c79295487c5fc203103d80e3b754640eb4 (diff) | |
download | gcc-b2f26af32b5b031fce761aa090de9476a53e6e5a.zip gcc-b2f26af32b5b031fce761aa090de9476a53e6e5a.tar.gz gcc-b2f26af32b5b031fce761aa090de9476a53e6e5a.tar.bz2 |
tree-optimization/95219 - improve IV selection for induction
This improves code generation with SSE2 for the testcase by
making sure to only generate a single IV when the group size
is a multiple of the vector size. It also adjusts the testcase
which was passing before.
2020-05-20 Richard Biener <rguenther@suse.de>
PR tree-optimization/95219
* tree-vect-loop.c (vectorizable_induction): Reduce
group_size before computing the number of required IVs.
* gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c: Adjust.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c | 4 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 14 |
4 files changed, 28 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2eba6db..88b03be 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2020-05-20 Richard Biener <rguenther@suse.de> + PR tree-optimization/95219 + * tree-vect-loop.c (vectorizable_induction): Reduce + group_size before computing the number of required IVs. + +2020-05-20 Richard Biener <rguenther@suse.de> + PR middle-end/95231 * tree-inline.c (remap_gimple_stmt): Revert adjusting COND_EXPR and VEC_COND_EXPR for a -fnon-call-exception boundary. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3d63c57..9552d20 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,11 @@ 2020-05-20 Richard Biener <rguenther@suse.de> + PR tree-optimization/95219 + * tree-vect-loop.c (vectorizable_induction): Reduce + group_size before computing the number of required IVs. + +2020-05-20 Richard Biener <rguenther@suse.de> + PR middle-end/95231 * g++.dg/other/pr95231.C: New testcase. diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c index 257d098..9a75b98 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c @@ -20,4 +20,6 @@ void dacP98FillRGBMap (unsigned char *pBuffer) } } -/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" { target vect_interleave } } } */ +/* Even with SSE2 we should only generate one IV for the induction. */ +/* { dg-final { scan-tree-dump-times "# vect_vec_iv" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f065acc..ecce348 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7528,7 +7528,13 @@ vectorizable_induction (loop_vec_info loop_vinfo, unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); unsigned elts = const_nunits * nvects; - unsigned nivs = least_common_multiple (group_size, + /* Compute the number of distinct IVs we need. First reduce + group_size if it is a multiple of const_nunits so we get + one IV for a group_size of 4 but const_nunits 2. */ + unsigned group_sizep = group_size; + if (group_sizep % const_nunits == 0) + group_sizep = group_sizep / const_nunits; + unsigned nivs = least_common_multiple (group_sizep, const_nunits) / const_nunits; gcc_assert (elts % group_size == 0); tree elt = init_expr; @@ -7576,6 +7582,12 @@ vectorizable_induction (loop_vec_info loop_vinfo, SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi_info); } + /* Fill up to the number of vectors we need for the whole group. */ + nivs = least_common_multiple (group_size, + const_nunits) / const_nunits; + for (; ivn < nivs; ++ivn) + SLP_TREE_VEC_STMTS (slp_node) + .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]); /* Re-use IVs when we can. */ if (ivn < nvects) |