aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-12-02 14:59:00 +0100
committerRichard Biener <rguenth@gcc.gnu.org>2024-12-03 07:53:28 +0100
commitbe8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c (patch)
tree338928e2a8061dcec81a03398a4940afb4279b4f
parentdfb9f6e0ed358706ade9a007f8723c4e9ef538fc (diff)
downloadgcc-be8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c.zip
gcc-be8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c.tar.gz
gcc-be8d1a358e3abc50c14a1d7b1cfee82fe6f6aa3c.tar.bz2
tree-optimization/117874 - missed vectorization that's formerly hybrid
With SLP forced we fail to consider using single-lane SLP for a case that we still end up discovering as hybrid (in the PR in question this is because we run into the SLP discovery limit due to excessive association). PR tree-optimization/117874 * tree-vect-loop.cc (vect_analyze_loop_2): When non-SLP analysis fails, try single-lane SLP. * gcc.dg/vect/pr117874.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr117874.c50
-rw-r--r--gcc/tree-vect-loop.cc7
2 files changed, 53 insertions, 4 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr117874.c b/gcc/testsuite/gcc.dg/vect/pr117874.c
new file mode 100644
index 0000000..27e5f8c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117874.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+typedef struct {
+ double real;
+ double imag;
+} complex;
+
+typedef struct { complex e[3][3]; } su3_matrix;
+
+void mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
+{
+ int j;
+ double a0r,a0i,a1r,a1i,a2r,a2i;
+ double b0r,b0i,b1r,b1i,b2r,b2i;
+ for(j=0;j<3;j++)
+ {
+ a0r=a->e[0][0].real; a0i=a->e[0][0].imag;
+ b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+ a1r=a->e[1][0].real; a1i=a->e[1][0].imag;
+ b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+ a2r=a->e[2][0].real; a2i=a->e[2][0].imag;
+ b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+ c->e[0][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+ c->e[0][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+
+ a0r=a->e[0][1].real; a0i=a->e[0][1].imag;
+ b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+ a1r=a->e[1][1].real; a1i=a->e[1][1].imag;
+ b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+ a2r=a->e[2][1].real; a2i=a->e[2][1].imag;
+ b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+ c->e[1][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+ c->e[1][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+
+ a0r=a->e[0][2].real; a0i=a->e[0][2].imag;
+ b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+ a1r=a->e[1][2].real; a1i=a->e[1][2].imag;
+ b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+ a2r=a->e[2][2].real; a2i=a->e[2][2].imag;
+ b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+ c->e[2][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+ c->e[2][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+ }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_hw_misalign } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5a24fb8..8520960 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3005,10 +3005,9 @@ start_over:
ok = vect_analyze_loop_operations (loop_vinfo);
if (!ok)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad operation or unsupported loop bound.\n");
- return ok;
+ ok = opt_result::failure_at (vect_location,
+ "bad operation or unsupported loop bound\n");
+ goto again;
}
/* For now, we don't expect to mix both masking and length approaches for one