diff options
author | Jakub Jelinek <jakub@redhat.com> | 2023-11-17 15:09:44 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2023-11-17 15:09:44 +0100 |
commit | 172a72da368146e0fe34194020eb7a6636db4438 (patch) | |
tree | bc653304a14c63356307aad107f3762e9c88e7cb | |
parent | 04abafe9831f6867af1211ecae853ff373235b2d (diff) | |
download | gcc-172a72da368146e0fe34194020eb7a6636db4438.zip gcc-172a72da368146e0fe34194020eb7a6636db4438.tar.gz gcc-172a72da368146e0fe34194020eb7a6636db4438.tar.bz2 |
vect: Fix check_reduction_path [PR112374]
As mentioned in the PR, the intent of the r14-5076 changes was that
it doesn't count one of the uses on the use_stmt, but what actually
got implemented is that it does this processing on any op_use_stmt,
even if it is not the use_stmt statement, which means that it
can increase count even on debug stmts (-fcompare-debug failures),
or if there would be some other use stmt with 2+ uses it could count
that as a single use. Though, because it fails whenever cnt != 1
and I believe use_stmt must be one of the uses, it would probably
fail in the latter case anyway.
The following patch fixes that by doing this extra processing only when
op_use_stmt is use_stmt, and using the normal processing otherwise
(so ignore debug stmts, and increase on any uses on the stmt).
2023-11-17 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/112374
* tree-vect-loop.cc (check_reduction_path): Perform the cond_fn_p
special case only if op_use_stmt == use_stmt, use as_a rather than
dyn_cast in that case.
* gcc.dg/pr112374-1.c: New test.
* gcc.dg/pr112374-2.c: New test.
* g++.dg/opt/pr112374.C: New test.
-rw-r--r-- | gcc/testsuite/g++.dg/opt/pr112374.C | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr112374-1.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr112374-2.c | 33 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 4 |
4 files changed, 79 insertions, 2 deletions
diff --git a/gcc/testsuite/g++.dg/opt/pr112374.C b/gcc/testsuite/g++.dg/opt/pr112374.C new file mode 100644 index 0000000..15d8a06 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr112374.C @@ -0,0 +1,24 @@ +// PR tree-optimization/112374 +// { dg-do compile { target c++11 } } +// { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } +// { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } +// { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } + +struct t +{ + long coef[1]; + t(const unsigned long &a) : coef{(long)a} {}; + t(const t &a); +}; +extern void gen_int_mode(t, int); +struct expand_vec_perm_d { + unsigned char perm[64]; + int vmode; + unsigned char nelt; +}; +void expand_vec_perm_blend(struct expand_vec_perm_d *d) { + unsigned long mask = 0; + for (unsigned i = 0; i < 4; ++i) + mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); + gen_int_mode(mask, 0); +} diff --git a/gcc/testsuite/gcc.dg/pr112374-1.c b/gcc/testsuite/gcc.dg/pr112374-1.c new file mode 100644 index 0000000..7fbd67a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112374-1.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/112374 */ +/* { dg-do compile } */ +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2 -w" } */ +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */ + +void foo (int, int); +struct S { char s[4]; }; +int a, b, c; + +void +bar () +{ + struct S d; + long e = 0; + for (c = 0; c < 4; ++c) + e |= (d.s[c] ? 3 : 0) << c; + if (e) + foo (a, b); +} diff --git a/gcc/testsuite/gcc.dg/pr112374-2.c b/gcc/testsuite/gcc.dg/pr112374-2.c new file mode 100644 index 0000000..1598c49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112374-2.c @@ -0,0 +1,33 @@ +/* PR tree-optimization/112374 */ +/* { dg-do compile } */ +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } */ +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */ + +void foo (int, int); +struct S { char s[64]; } *p; +char a, b; +unsigned char c; +int d, e; + +void +bar (void) +{ + unsigned i; + long j = 0; + for (i = 0; i < b; ++i) + j |= (p->s[i] ? 3 : 0) << i; + if (p->s[i + 1]) + lab: + for (;;) + ; + for (i = 0; i < 4; ++i) + j |= p->s[i] << i; + for (; i; i += 2) + if (c + 1 != a) + goto lab; + for (; i < 8; ++i) + j |= p->s[i] >= 6; + if (j) + foo (d, e); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3f59139..e8b8be5 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4118,9 +4118,9 @@ pop: /* In case of a COND_OP (mask, op1, op2, op1) reduction we might have op1 twice (once as definition, once as else) in the same operation. Allow this. */ - if (cond_fn_p) + if (cond_fn_p && op_use_stmt == use_stmt) { - gcall *call = dyn_cast<gcall *> (use_stmt); + gcall *call = as_a<gcall *> (use_stmt); unsigned else_pos = internal_fn_else_index (internal_fn (op.code)); |