aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2012-05-21 16:29:11 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2012-05-21 16:29:11 +0200
commit7772bae06caa4eb496d417efe23e9bb33693dc3c (patch)
tree4fad66eaca5a76ae4d23737387633a4cb659fe0f
parent017a202055c0bdd4529960e6d38992a910649966 (diff)
downloadgcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.zip
gcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.tar.gz
gcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.tar.bz2
re PR tree-optimization/53366 (wrong code generation by tree vectorizer using AVX)
PR tree-optimization/53366 * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut tests if complex_numbers == 2, but there are non-complex number loads too. * gcc.dg/torture/pr53366-1.c: New test. * gcc.dg/torture/pr53366-2.c: New test. * gcc.target/i386/pr53366-1.c: New test. * gcc.target/i386/pr53366-2.c: New test. From-SVN: r187717
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr53366-1.c70
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr53366-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/pr53366-1.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr53366-2.c5
-rw-r--r--gcc/tree-vect-slp.c3
7 files changed, 136 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 080975c..d7bdb99 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2012-05-21 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/53366
+ * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut
+ tests if complex_numbers == 2, but there are non-complex number loads
+ too.
+
PR tree-optimization/53409
* tree-vect-loop.c (vect_analyze_loop_operations): Don't check
vinfo_for_stmt (op_def_stmt) if op_def_stmt isn't inside loop.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a9ba222..1f1bcb4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2012-05-21 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/53366
+ * gcc.dg/torture/pr53366-1.c: New test.
+ * gcc.dg/torture/pr53366-2.c: New test.
+ * gcc.target/i386/pr53366-1.c: New test.
+ * gcc.target/i386/pr53366-2.c: New test.
+
PR tree-optimization/53409
* gcc.c-torture/compile/pr53409.c: New test.
diff --git a/gcc/testsuite/gcc.dg/torture/pr53366-1.c b/gcc/testsuite/gcc.dg/torture/pr53366-1.c
new file mode 100644
index 0000000..ca9997e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr53366-1.c
@@ -0,0 +1,70 @@
+/* PR tree-optimization/53366 */
+/* { dg-do run } */
+
+extern void abort (void);
+
+struct S { double v[3]; };
+struct T { struct S r, i; };
+struct U { struct T j[5]; };
+
+void
+foo (struct U *__restrict p1, struct U *__restrict p2,
+ struct S l1, struct S l2, struct S l3, struct S l4,
+ const double _Complex * __restrict x, int y, int z)
+{
+ int i, j;
+ while (y < z - 2)
+ {
+ for (j = 0; j < 5; ++j)
+ {
+ double a = __real__ x[5 * y + j];
+ double b = __imag__ x[5 * y + j];
+ double c = __real__ x[5 * (y + 2) + j];
+ double d = __imag__ x[5 * (y + 2) + j];
+ double e = __real__ x[5 * (y + 1) + j];
+ double f = __imag__ x[5 * (y + 1) + j];
+ double g = __real__ x[5 * (y + 3) + j];
+ double h = __imag__ x[5 * (y + 3) + j];
+ for (i = 0; i < 3; ++i)
+ {
+ p1->j[j].r.v[i] += l2.v[i] * a;
+ p1->j[j].r.v[i] += l4.v[i] * c;
+ p1->j[j].i.v[i] += l2.v[i] * b;
+ p1->j[j].i.v[i] += l4.v[i] * d;
+ p2->j[j].r.v[i] += l3.v[i] * e;
+ p2->j[j].r.v[i] += l1.v[i] * g;
+ p2->j[j].i.v[i] += l3.v[i] * f;
+ p2->j[j].i.v[i] += l1.v[i] * h;
+ }
+ }
+ y += 4;
+ }
+}
+
+_Complex double x[5005];
+struct U p1, p2;
+
+int
+main ()
+{
+ int i, j;
+ struct S l1, l2, l3, l4;
+ for (i = 0; i < 5005; ++i)
+ x[i] = i + 1.0iF * (2 * i);
+ for (i = 0; i < 3; ++i)
+ {
+ l1.v[i] = 1;
+ l2.v[i] = 2;
+ l3.v[i] = 3;
+ l4.v[i] = 4;
+ }
+ foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000);
+ for (j = 0; j < 5; ++j)
+ for (i = 0; i < 3; ++i)
+ if (p1.j[j].r.v[i] != 3752430 + j * 1494.0
+ || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2
+ || p2.j[j].r.v[i] != 2502450 + j * 996.0
+ || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2)
+ abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr53366-2.c b/gcc/testsuite/gcc.dg/torture/pr53366-2.c
new file mode 100644
index 0000000..6be6a56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr53366-2.c
@@ -0,0 +1,43 @@
+/* PR tree-optimization/53366 */
+/* { dg-do run } */
+
+extern void abort (void);
+
+struct T { float r[3], i[3]; };
+struct U { struct T j[2]; };
+
+void __attribute__ ((noinline))
+foo (struct U *__restrict y, const float _Complex *__restrict x)
+{
+ int i, j;
+ for (j = 0; j < 2; ++j)
+ {
+ float a = __real__ x[j];
+ float b = __imag__ x[j];
+ float c = __real__ x[j + 2];
+ float d = __imag__ x[j + 2];
+ for (i = 0; i < 3; ++i)
+ {
+ y->j[j].r[i] = y->j[j].r[i] + a + c;
+ y->j[j].i[i] = y->j[j].i[i] + b + d;
+ }
+ }
+}
+
+_Complex float x[4];
+struct U y;
+
+int
+main ()
+{
+ int i, j;
+ for (i = 0; i < 4; ++i)
+ x[i] = i + 1.0iF * (2 * i);
+ foo (&y, x);
+ for (j = 0; j < 2; ++j)
+ for (i = 0; i < 3; ++i)
+ if (y.j[j].r[i] != __real__ (x[j] + x[j + 2])
+ || y.j[j].i[i] != __imag__ (x[j] + x[j + 2]))
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr53366-1.c b/gcc/testsuite/gcc.target/i386/pr53366-1.c
new file mode 100644
index 0000000..c24a594
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53366-1.c
@@ -0,0 +1,5 @@
+/* PR tree-optimization/53366 */
+/* { dg-do run { target avx_runtime } } */
+/* { dg-options "-O3 -mavx" } */
+
+#include "../../gcc.dg/torture/pr53366-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr53366-2.c b/gcc/testsuite/gcc.target/i386/pr53366-2.c
new file mode 100644
index 0000000..77270a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53366-2.c
@@ -0,0 +1,5 @@
+/* PR tree-optimization/53366 */
+/* { dg-do run { target avx_runtime } } */
+/* { dg-options "-O3 -mavx" } */
+
+#include "../../gcc.dg/torture/pr53366-2.c"
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index e189c50..4690a4e 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
/* We checked that this case ok, so there is no need to proceed with
permutation tests. */
- if (complex_numbers == 2)
+ if (complex_numbers == 2
+ && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
{
VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));