diff options
author | Jakub Jelinek <jakub@redhat.com> | 2012-05-21 16:29:11 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2012-05-21 16:29:11 +0200 |
commit | 7772bae06caa4eb496d417efe23e9bb33693dc3c (patch) | |
tree | 4fad66eaca5a76ae4d23737387633a4cb659fe0f | |
parent | 017a202055c0bdd4529960e6d38992a910649966 (diff) | |
download | gcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.zip gcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.tar.gz gcc-7772bae06caa4eb496d417efe23e9bb33693dc3c.tar.bz2 |
re PR tree-optimization/53366 (wrong code generation by tree vectorizer using AVX)
PR tree-optimization/53366
* tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut
tests if complex_numbers == 2, but there are non-complex number loads
too.
* gcc.dg/torture/pr53366-1.c: New test.
* gcc.dg/torture/pr53366-2.c: New test.
* gcc.target/i386/pr53366-1.c: New test.
* gcc.target/i386/pr53366-2.c: New test.
From-SVN: r187717
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/pr53366-1.c | 70 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/pr53366-2.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr53366-1.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr53366-2.c | 5 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 3 |
7 files changed, 136 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 080975c..d7bdb99 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2012-05-21 Jakub Jelinek <jakub@redhat.com> + PR tree-optimization/53366 + * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut + tests if complex_numbers == 2, but there are non-complex number loads + too. + PR tree-optimization/53409 * tree-vect-loop.c (vect_analyze_loop_operations): Don't check vinfo_for_stmt (op_def_stmt) if op_def_stmt isn't inside loop. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a9ba222..1f1bcb4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,11 @@ 2012-05-21 Jakub Jelinek <jakub@redhat.com> + PR tree-optimization/53366 + * gcc.dg/torture/pr53366-1.c: New test. + * gcc.dg/torture/pr53366-2.c: New test. + * gcc.target/i386/pr53366-1.c: New test. + * gcc.target/i386/pr53366-2.c: New test. + PR tree-optimization/53409 * gcc.c-torture/compile/pr53409.c: New test. diff --git a/gcc/testsuite/gcc.dg/torture/pr53366-1.c b/gcc/testsuite/gcc.dg/torture/pr53366-1.c new file mode 100644 index 0000000..ca9997e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr53366-1.c @@ -0,0 +1,70 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct S { double v[3]; }; +struct T { struct S r, i; }; +struct U { struct T j[5]; }; + +void +foo (struct U *__restrict p1, struct U *__restrict p2, + struct S l1, struct S l2, struct S l3, struct S l4, + const double _Complex * __restrict x, int y, int z) +{ + int i, j; + while (y < z - 2) + { + for (j = 0; j < 5; ++j) + { + double a = __real__ x[5 * y + j]; + double b = __imag__ x[5 * y + j]; + double c = __real__ x[5 * (y + 2) + j]; + double d = __imag__ x[5 * (y + 2) + j]; + double e = __real__ x[5 * (y + 1) + j]; + double f = __imag__ x[5 * (y + 1) + j]; + double g = __real__ x[5 * (y + 3) + j]; + double h = __imag__ x[5 * (y + 3) + j]; + for (i = 0; i < 3; ++i) + { + p1->j[j].r.v[i] += l2.v[i] * a; + p1->j[j].r.v[i] += l4.v[i] * c; + p1->j[j].i.v[i] += l2.v[i] * b; + p1->j[j].i.v[i] += l4.v[i] * d; + p2->j[j].r.v[i] += l3.v[i] * e; + p2->j[j].r.v[i] += l1.v[i] * g; + p2->j[j].i.v[i] += l3.v[i] * f; + p2->j[j].i.v[i] += l1.v[i] * h; + } + } + y += 4; + } +} + +_Complex double x[5005]; +struct U p1, p2; + +int +main () +{ + int i, j; + struct S l1, l2, l3, l4; + for (i = 0; i < 5005; ++i) + x[i] = i + 1.0iF * (2 * i); + for (i = 0; i < 3; ++i) + { + l1.v[i] = 1; + l2.v[i] = 2; + l3.v[i] = 3; + l4.v[i] = 4; + } + foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000); + for (j = 0; j < 5; ++j) + for (i = 0; i < 3; ++i) + if (p1.j[j].r.v[i] != 3752430 + j * 1494.0 + || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2 + || p2.j[j].r.v[i] != 2502450 + j * 996.0 + || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr53366-2.c b/gcc/testsuite/gcc.dg/torture/pr53366-2.c new file mode 100644 index 0000000..6be6a56 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr53366-2.c @@ -0,0 +1,43 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct T { float r[3], i[3]; }; +struct U { struct T j[2]; }; + +void __attribute__ ((noinline)) +foo (struct U *__restrict y, const float _Complex *__restrict x) +{ + int i, j; + for (j = 0; j < 2; ++j) + { + float a = __real__ x[j]; + float b = __imag__ x[j]; + float c = __real__ x[j + 2]; + float d = __imag__ x[j + 2]; + for (i = 0; i < 3; ++i) + { + y->j[j].r[i] = y->j[j].r[i] + a + c; + y->j[j].i[i] = y->j[j].i[i] + b + d; + } + } +} + +_Complex float x[4]; +struct U y; + +int +main () +{ + int i, j; + for (i = 0; i < 4; ++i) + x[i] = i + 1.0iF * (2 * i); + foo (&y, x); + for (j = 0; j < 2; ++j) + for (i = 0; i < 3; ++i) + if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) + || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr53366-1.c b/gcc/testsuite/gcc.target/i386/pr53366-1.c new file mode 100644 index 0000000..c24a594 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr53366-1.c @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr53366-2.c b/gcc/testsuite/gcc.target/i386/pr53366-2.c new file mode 100644 index 0000000..77270a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr53366-2.c @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-2.c" diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e189c50..4690a4e 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, /* We checked that this case ok, so there is no need to proceed with permutation tests. */ - if (complex_numbers == 2) + if (complex_numbers == 2 + && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2) { VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn)); VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); |