diff options
author | Richard Biener <rguenther@suse.de> | 2019-11-26 08:32:38 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2019-11-26 08:32:38 +0000 |
commit | f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444 (patch) | |
tree | 31c5b6b2f32b1d2e72dbecf908678ad3b98a9ce9 /gcc | |
parent | 59d37e97093b1b9f9498e61ee648557479e79cd4 (diff) | |
download | gcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.zip gcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.tar.gz gcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.tar.bz2 |
re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-11-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/92645
* tree-vect-slp.c (vect_build_slp_tree_2): For unary ops
do not build the operation from scalars if the operand is.
* gcc.target/i386/pr92645.c: New testcase.
From-SVN: r278719
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92645.c | 36 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 14 |
4 files changed, 55 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 186299a..6ea6e5b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-11-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/92645 + * tree-vect-slp.c (vect_build_slp_tree_2): For unary ops + do not build the operation from scalars if the operand is. + 2019-11-25 Tobias Burnus <tobias@codesourcery.com> * config/gcn/mkoffload.c (COMMENT_PREFIX, struct id_map, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0a284da..d105b60 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-11-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/92645 + * gcc.target/i386/pr92645.c: New testcase. + 2019-11-26 Jakub Jelinek <jakub@redhat.com> * gfortran.dg/dec-comparison.f90: Change dg-do from run to compile. diff --git a/gcc/testsuite/gcc.target/i386/pr92645.c b/gcc/testsuite/gcc.target/i386/pr92645.c new file mode 100644 index 0000000..467ed53 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92645.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized -msse2 -Wno-psabi" } */ + +typedef unsigned short v8hi __attribute__((vector_size(16))); +typedef unsigned int v4si __attribute__((vector_size(16))); + +void bar (v4si *dst, v8hi * __restrict src) +{ + unsigned int tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v4si *)tem; + dst[1] = *(v4si *)&tem[4]; +} +void foo (v4si *dst, v8hi src) +{ + unsigned int tem[8]; + tem[0] = src[0]; + tem[1] = src[1]; + tem[2] = src[2]; + tem[3] = src[3]; + tem[4] = src[4]; + tem[5] = src[5]; + tem[6] = src[6]; + tem[7] = src[7]; + dst[0] = *(v4si *)tem; + dst[1] = *(v4si *)&tem[4]; +} + +/* { dg-final { scan-tree-dump-times "vec_unpack_" 4 "optimized" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index bedbe9a..48aca3b 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1410,10 +1410,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, matches, npermutes, &this_tree_size, bst_map)) != NULL) { - /* If we have all children of child built up from scalars then just - throw that away and build it up this node from scalars. */ + /* If we have all children of a non-unary child built up from + scalars then just throw that away and build it up this node + from scalars. */ if (is_a <bb_vec_info> (vinfo) - && !SLP_TREE_CHILDREN (child).is_empty () + && SLP_TREE_CHILDREN (child).length () > 1 /* ??? Rejecting patterns this way doesn't work. We'd have to do extra work to cancel the pattern so the uses see the scalar version. */ @@ -1549,10 +1550,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, tem, npermutes, &this_tree_size, bst_map)) != NULL) { - /* If we have all children of child built up from scalars then - just throw that away and build it up this node from scalars. */ + /* If we have all children of a non-unary child built up from + scalars then just throw that away and build it up this node + from scalars. */ if (is_a <bb_vec_info> (vinfo) - && !SLP_TREE_CHILDREN (child).is_empty () + && SLP_TREE_CHILDREN (child).length () > 1 /* ??? Rejecting patterns this way doesn't work. We'd have to do extra work to cancel the pattern so the uses see the scalar version. */ |