aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-12-06 07:53:15 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-12-06 07:53:15 +0000
commit2ef278569f60a2c1556f1752aeba39c586521371 (patch)
tree327828c5b3bda519cfed60d710e3a2d0e8e895c5
parent9961856c3acb8e7d9def11b58001db6af9f14253 (diff)
downloadgcc-2ef278569f60a2c1556f1752aeba39c586521371.zip
gcc-2ef278569f60a2c1556f1752aeba39c586521371.tar.gz
gcc-2ef278569f60a2c1556f1752aeba39c586521371.tar.bz2
re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_constructor)
2019-12-06 Richard Biener <rguenther@suse.de> PR tree-optimization/92819 * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts into the last lane. For two-element vectors try inserting into the last lane when inserting into the first fails. * gcc.target/i386/pr92819-1.c: New testcase. * gcc.target/i386/pr92803.c: Adjust. From-SVN: r279033
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/match.pd13
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92803.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92819-1.c20
5 files changed, 48 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b12636a..3e747a6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-12-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92819
+ * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
+ into the last lane. For two-element vectors try inserting
+ into the last lane when inserting into the first fails.
+
2019-12-06 Jakub Jelinek <jakub@redhat.com>
* common.opt (fprofile-partial-training): Terminate description with
diff --git a/gcc/match.pd b/gcc/match.pd
index 68027f6..e32d800 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR))
{
- if (sel.series_p (1, 1, nelts + 1, 1))
+ bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
+ if (insert_first_p)
{
/* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from
@@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
if ((ins = fold_read_from_vector (cop0, sel[0])))
op0 = op1;
}
- else
+ /* The above can fail for two-element vectors which always
+ appear to insert the first element, so try inserting
+ into the second lane as well. For more than two
+ elements that's wasted time. */
+ if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
{
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at))
break;
- if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
+ if (at < encoded_nelts
+ && (known_eq (at + 1, nelts)
+ || sel.series_p (at + 1, 1, at + 1, 1)))
{
if (known_lt (poly_uint64 (sel[at]), nelts))
ins = fold_read_from_vector (cop0, sel[at]);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 753aa39..bcc65f2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2019-12-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92819
+ * gcc.target/i386/pr92819-1.c: New testcase.
+ * gcc.target/i386/pr92803.c: Adjust.
+
2019-12-05 Martin Sebor <msebor@redhat.com>
PR testsuite/92829
diff --git a/gcc/testsuite/gcc.target/i386/pr92803.c b/gcc/testsuite/gcc.target/i386/pr92803.c
index fc8d64e..d533bae 100644
--- a/gcc/testsuite/gcc.target/i386/pr92803.c
+++ b/gcc/testsuite/gcc.target/i386/pr92803.c
@@ -31,8 +31,10 @@ barf (v8sf x)
return (v4sf) { x[4], x[5], 1.0f, 2.0f };
}
-/* We expect all CTORs to turn into permutes, the FP converting ones
+/* For bar we do two inserts, first zero, then convert, then insert *p. } */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
+/* We expect all other CTORs to turn into permutes, the FP converting ones
to two each with the one with constants possibly elided in the future
by converting 3.0f and 1.0f "back" to integers. */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92819-1.c b/gcc/testsuite/gcc.target/i386/pr92819-1.c
new file mode 100644
index 0000000..0ec0ca5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92819-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */
+
+typedef double v2df __attribute__((vector_size (16)));
+
+v2df
+foo (v2df x, double *p)
+{
+ return (v2df) { x[0], *p };
+}
+
+v2df
+bar (v2df x, double *p)
+{
+ return (v2df) { *p, x[1] };
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
+/* { dg-final { scan-assembler "movhpd" } } */
+/* { dg-final { scan-assembler "movlpd" } } */