aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-11-26 08:32:38 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-11-26 08:32:38 +0000
commitf4a74d2786ec812e40cfd0b3b7fa3cbeb2093444 (patch)
tree31c5b6b2f32b1d2e72dbecf908678ad3b98a9ce9 /gcc
parent59d37e97093b1b9f9498e61ee648557479e79cd4 (diff)
downloadgcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.zip
gcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.tar.gz
gcc-f4a74d2786ec812e40cfd0b3b7fa3cbeb2093444.tar.bz2
re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-11-26 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * tree-vect-slp.c (vect_build_slp_tree_2): For unary ops do not build the operation from scalars if the operand is. * gcc.target/i386/pr92645.c: New testcase. From-SVN: r278719
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92645.c36
-rw-r--r--gcc/tree-vect-slp.c14
4 files changed, 55 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 186299a..6ea6e5b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-11-26 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92645
+ * tree-vect-slp.c (vect_build_slp_tree_2): For unary ops
+ do not build the operation from scalars if the operand is.
+
2019-11-25 Tobias Burnus <tobias@codesourcery.com>
* config/gcn/mkoffload.c (COMMENT_PREFIX, struct id_map,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0a284da..d105b60 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-26 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92645
+ * gcc.target/i386/pr92645.c: New testcase.
+
2019-11-26 Jakub Jelinek <jakub@redhat.com>
* gfortran.dg/dec-comparison.f90: Change dg-do from run to compile.
diff --git a/gcc/testsuite/gcc.target/i386/pr92645.c b/gcc/testsuite/gcc.target/i386/pr92645.c
new file mode 100644
index 0000000..467ed53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92645.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized -msse2 -Wno-psabi" } */
+
+typedef unsigned short v8hi __attribute__((vector_size(16)));
+typedef unsigned int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *dst, v8hi * __restrict src)
+{
+ unsigned int tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v4si *)tem;
+ dst[1] = *(v4si *)&tem[4];
+}
+void foo (v4si *dst, v8hi src)
+{
+ unsigned int tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v4si *)tem;
+ dst[1] = *(v4si *)&tem[4];
+}
+
+/* { dg-final { scan-tree-dump-times "vec_unpack_" 4 "optimized" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index bedbe9a..48aca3b 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1410,10 +1410,11 @@ vect_build_slp_tree_2 (vec_info *vinfo,
matches, npermutes,
&this_tree_size, bst_map)) != NULL)
{
- /* If we have all children of child built up from scalars then just
- throw that away and build it up this node from scalars. */
+ /* If we have all children of a non-unary child built up from
+ scalars then just throw that away and build it up this node
+ from scalars. */
if (is_a <bb_vec_info> (vinfo)
- && !SLP_TREE_CHILDREN (child).is_empty ()
+ && SLP_TREE_CHILDREN (child).length () > 1
/* ??? Rejecting patterns this way doesn't work. We'd have to
do extra work to cancel the pattern so the uses see the
scalar version. */
@@ -1549,10 +1550,11 @@ vect_build_slp_tree_2 (vec_info *vinfo,
tem, npermutes,
&this_tree_size, bst_map)) != NULL)
{
- /* If we have all children of child built up from scalars then
- just throw that away and build it up this node from scalars. */
+ /* If we have all children of a non-unary child built up from
+ scalars then just throw that away and build it up this node
+ from scalars. */
if (is_a <bb_vec_info> (vinfo)
- && !SLP_TREE_CHILDREN (child).is_empty ()
+ && SLP_TREE_CHILDREN (child).length () > 1
/* ??? Rejecting patterns this way doesn't work. We'd have
to do extra work to cancel the pattern so the uses see the
scalar version. */