aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/gimple.h2
-rw-r--r--gcc/testsuite/g++.dg/vect/simd-11.cc61
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-54.c23
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-55.c18
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-56.c17
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-57.c38
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-58.c23
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-59.c25
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-60.c18
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-61.c26
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-62.c21
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-63.c21
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-64.c11
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-65.c15
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-66.c32
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c31
-rw-r--r--gcc/testsuite/gfortran.dg/vect/O3-bb-slp-1.f28
-rw-r--r--gcc/testsuite/gfortran.dg/vect/O3-bb-slp-2.f40
-rw-r--r--gcc/tree-vect-loop-manip.c27
-rw-r--r--gcc/tree-vect-loop.c112
-rw-r--r--gcc/tree-vect-slp.c664
-rw-r--r--gcc/tree-vect-stmts.c8
-rw-r--r--gcc/tree-vectorizer.c3
-rw-r--r--gcc/tree-vectorizer.h2
24 files changed, 1031 insertions, 235 deletions
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 3c9b996..87c90be 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -6598,6 +6598,8 @@ gimple_expr_type (const gimple *stmt)
}
else if (code == GIMPLE_COND)
return boolean_type_node;
+ else if (code == GIMPLE_PHI)
+ return TREE_TYPE (gimple_phi_result (stmt));
else
return void_type_node;
}
diff --git a/gcc/testsuite/g++.dg/vect/simd-11.cc b/gcc/testsuite/g++.dg/vect/simd-11.cc
new file mode 100644
index 0000000..912d184
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/simd-11.cc
@@ -0,0 +1,61 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+// { dg-additional-options "-Ofast" }
+
+template <typename> class h {
+public:
+ ~h();
+};
+template <typename> struct l;
+template <typename c> struct l<h<c> > {
+ using d = c;
+ template <typename e> using f = h<e>;
+};
+template <typename g> struct o {
+ typedef l<g> j;
+ typedef typename j::d k;
+ template <typename c> struct p { typedef typename j::f<c> m; };
+};
+template <typename c, typename g> struct F {
+ typedef typename o<g>::p<c>::m q;
+ struct : q {
+ } r;
+};
+template <typename c, typename g = h<c> > class s : F<c, g> {
+public:
+ s(long);
+ typename o<typename F<c, g>::q>::k operator[](long);
+};
+template <int> class t {
+public:
+ int dimension;
+ t(const t &);
+ void operator+=(t);
+ double values[];
+};
+template <int dim> t<dim>::t(const t &p1) {
+ for (int i = 0; i < dim; ++i)
+ values[i] = p1.values[i];
+}
+template <int dim> class u : public t<dim> {
+public:
+ double m_fn1(const u &) const;
+};
+template <int dim> double u<dim>::m_fn1(const u &) const {
+ double a;
+ for (int i = 0; i < dim; ++i) {
+ double b = this->values[i];
+ a += b;
+ }
+ return a;
+}
+int m_fn2(const u<2> &p1, const u<2> &w) {
+ int n;
+ s<u<2> > c(n);
+ s<double> d(n);
+ double e = p1.m_fn1(w);
+ for (;;) {
+ c[0] += p1;
+ d = e;
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-54.c b/gcc/testsuite/gcc.dg/vect/bb-slp-54.c
new file mode 100644
index 0000000..d05ce33
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-54.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2], b[2], c[2];
+
+void foo(int flag)
+{
+ double tem1, tem2;
+ if (flag)
+ {
+ tem1 = a[0];
+ tem2 = a[1];
+ }
+ else
+ {
+ tem1 = b[0];
+ tem2 = b[1];
+ }
+ c[0] = tem1;
+ c[1] = tem2;
+}
+
+/* { dg-final { scan-tree-dump-times "transform load" 2 "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-55.c b/gcc/testsuite/gcc.dg/vect/bb-slp-55.c
new file mode 100644
index 0000000..57a042b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-55.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+typedef struct {
+ int a;
+ int b;
+ int c;
+ int d;
+} e;
+e *f;
+int g;
+void h() {
+ e *i;
+ if (g) {
+ i->c = f[g].b;
+ i->d = f[g].a;
+ } else
+ i->c = i->d = 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-56.c b/gcc/testsuite/gcc.dg/vect/bb-slp-56.c
new file mode 100644
index 0000000..90d1751
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-56.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+typedef struct {
+ double a, b;
+} c;
+int d, e;
+int i(void);
+void h(c, c);
+void f() {
+ c a, g;
+ do {
+ a.a = e ?: g.a;
+ a.b = g.b + d;
+ h(g, a);
+ g = a;
+ } while (i());
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-57.c b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c
new file mode 100644
index 0000000..6f13507
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-require-effective-target vect_float } */
+
+float *a;
+typedef struct {
+ int c;
+ float bbmax[3];
+} d;
+d e;
+int f[3];
+int g, h, i, j;
+float k, k;
+void l()
+{
+ for (unsigned z = 0; z < 2048; ++z) {
+ {
+ j = e.bbmax[1] > k ? e.bbmax[1] : k;
+ }
+ e.bbmax[1] = j;
+ { i = e.bbmax[2] > k ? e.bbmax[2] : k; }
+ e.bbmax[2] = i;
+ f[2] = a[2];
+ {
+ float b;
+ h = e.bbmax[1] > b ? e.bbmax[1] : b;
+ }
+ e.bbmax[1] = h;
+ {
+ float b;
+ g = e.bbmax[2] > b ? e.bbmax[2] : b;
+ }
+ e.bbmax[2] = g;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "transform load" 1 "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump "optimized: basic block" "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-58.c b/gcc/testsuite/gcc.dg/vect/bb-slp-58.c
new file mode 100644
index 0000000..11bf5c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-58.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+
+double x[1024];
+void bar (void);
+
+void foo (void)
+{
+ double tem1 = x[0];
+ double tem2 = x[1];
+ for (int i = 0; i < 511; ++i)
+ {
+ x[2*i] = tem1;
+ x[2*i+1] = tem2;
+ bar ();
+ tem1 = x[2*(i+1)];
+ tem2 = x[2*(i+1)+1];
+ }
+}
+
+/* We should be able to vectorize the cycle in one SLP attempt including
+ both load groups. */
+/* { dg-final { scan-tree-dump-times "transform load" 2 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-59.c b/gcc/testsuite/gcc.dg/vect/bb-slp-59.c
new file mode 100644
index 0000000..2e35725
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-59.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-loopdone" } */
+
+double x[1024];
+void bar (void);
+
+void foo (void)
+{
+ double tem1 = x[0];
+ double tem2 = x[1];
+ for (int i = 0; i < 511; ++i)
+ {
+ x[2*i] = tem2;
+ x[2*i+1] = tem1;
+ bar ();
+ tem1 = x[2*(i+1)];
+ tem2 = x[2*(i+1)+1];
+ }
+}
+
+/* We should be able to vectorize the cycle in one SLP attempt including
+ both load groups and do only one permutation. */
+/* { dg-final { scan-tree-dump-times "transform load" 2 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "loopdone" } } */
+/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-60.c b/gcc/testsuite/gcc.dg/vect/bb-slp-60.c
new file mode 100644
index 0000000..52643bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-60.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+enum { a = 1, b };
+float *c, *e;
+float d, h;
+int f, g;
+void i()
+{
+ float j = h;
+ for (; g;)
+ for (; f; f++)
+ {
+ c[a] = j * d;
+ c[b] = h * d;
+ j = 0;
+ h = e[2];
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-61.c b/gcc/testsuite/gcc.dg/vect/bb-slp-61.c
new file mode 100644
index 0000000..3323a2b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-61.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+
+struct a {
+ enum { b, c } d;
+ unsigned e;
+ unsigned f;
+};
+void j(struct a *a, int i, int h)
+{
+ unsigned f = a->f;
+ switch (a->d)
+ while (1)
+ {
+ if (i)
+ {
+ case b:
+ if (h)
+ goto k;
+ }
+ else
+ f = 0;
+ case c:;
+ }
+k:
+ a->e = a->f = f;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-62.c b/gcc/testsuite/gcc.dg/vect/bb-slp-62.c
new file mode 100644
index 0000000..84ee04c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-62.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+typedef struct {
+ char a;
+ int b[];
+} c;
+int d, f;
+c e;
+void g() {
+ int h, i, j;
+ for (; i;)
+ switch (i)
+ case 4: {
+ h = (__UINTPTR_TYPE__)g >= 3;
+ for (; h; h -= 1)
+ if (d)
+ j = f;
+ }
+ for (; i < 3; i++)
+ e.b[i] = j;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-63.c b/gcc/testsuite/gcc.dg/vect/bb-slp-63.c
new file mode 100644
index 0000000..6519c97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-63.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+struct {
+ unsigned a;
+ unsigned c;
+} d;
+int e, g;
+void h(unsigned b) {
+ unsigned a, c;
+ while (e) {
+ if (b) {
+ ++e;
+ continue;
+ }
+ c = g;
+ if (g)
+ a |= 10;
+ }
+ d.a = a;
+ d.c = c;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-64.c b/gcc/testsuite/gcc.dg/vect/bb-slp-64.c
new file mode 100644
index 0000000..dcb6a14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-64.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+enum { a, b };
+double *c, *e;
+int d, f;
+void g() {
+ for (;;) {
+ c[a] = c[b] = d * e[b];
+ f = d -= f;
+ }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-65.c b/gcc/testsuite/gcc.dg/vect/bb-slp-65.c
new file mode 100644
index 0000000..ec1707b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-65.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */
+
+int *a;
+int b, c, d, e;
+void f() {
+ int g;
+ for (;;)
+ for (; b;)
+ if (d)
+ for (; c;)
+ if (g)
+ e += a[1] = a[2] = e;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-66.c b/gcc/testsuite/gcc.dg/vect/bb-slp-66.c
new file mode 100644
index 0000000..b59a2cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-66.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+typedef struct {
+ double a, b;
+} c;
+typedef struct {
+ c d;
+ long coordinates;
+} e;
+int f;
+c g;
+e h;
+void k(int);
+int n();
+void j() { int i; k(i); }
+void k(int l) {
+ double a;
+ int b;
+ c m[4];
+ long i;
+ for (; l;)
+ do {
+ g.a = b ?: a;
+ m[3] = g;
+ if (f)
+ m[0] = m[1] = m[3];
+ i = 0;
+ for (; i < 4; i++)
+ (&h + i)->d = m[i];
+ } while (n());
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c
new file mode 100644
index 0000000..62b18bd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[1024];
+
+void foo (void)
+{
+ for (int i = 0; i < 1020; i += 4)
+ {
+ int suma = a[i];
+ int sumb = a[i+1];
+ int sumc = a[i+2];
+ int sumd = a[i+3];
+ for (unsigned j = 0; j < 77; ++j)
+ {
+ suma = (suma ^ i) + 1;
+ sumb = (sumb ^ i) + 2;
+ sumc = (sumc ^ i) + 3;
+ sumd = (sumd ^ i) + 4;
+ }
+ a[i] = suma;
+ a[i+1] = sumb;
+ a[i+2] = sumc;
+ a[i+3] = sumd;
+ }
+}
+
+/* We should vectorize this outer loop with SLP. */
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "vect" } } */
diff --git a/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-1.f b/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-1.f
new file mode 100644
index 0000000..74b3b17
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-1.f
@@ -0,0 +1,28 @@
+! { dg-do compile }
+ subroutine tranx3 (jbeg,jend,kbeg,kend,dlo,den,mflx,zro)
+ parameter(in = 128+5
+ & , jn = 128+5
+ & , kn = 128+5)
+ parameter(ijkn = 128+5)
+ real*8 zro, dqm, dqp, dx3bi (kn)
+ real*8 mflux (ijkn,4), dtwid (ijkn,4), dd (ijkn,4)
+ real*8 mflx (in,jn,kn)
+ real*8 dlo (in,jn,kn), den (in,jn,kn)
+ do 2100 j=jbeg-1,jend
+ dtwid (k,1) = ( 0.5 + q1 ) * ( dlo(i ,j,k-1)
+ 3 - ( dx3a(k ) + xi ) * dd (k ,1) )
+ mflux (k,1) = dtwid (k,1) * ( v3(i ,j,k) - vg3(k) ) * dt
+ if (j.ge.jbeg) then
+ den(i ,j,k) = ( dlo(i ,j,k) * dvl3a(k)
+ 1 - etwid (k+1,1) + etwid (k,1) ) * dvl3a i(k)
+ if (kend .eq. ke) mflx(i ,j,ke+1) = mflux (ke+1,1)
+ endif
+ do 2030 k=max(kbeg-2,ks-1),kend+1
+ dqm = (dlo(i ,j,k ) - dlo(i ,j,k-1)) * dx3bi(k )
+ dqp = (dlo(i ,j,k+1) - dlo(i ,j,k )) * dx3bi(k+1)
+ dd(k,1) = max ( dqm * dqp, zro )
+2030 continue
+ dtwid (k,3) = ( 0.5 + q1 ) * ( dlo(i+2,j,k-1)
+ 3 - ( dx3a(k ) + xi ) * deod (k ,3) )
+2100 continue
+ end
diff --git a/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-2.f b/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-2.f
new file mode 100644
index 0000000..34c44de
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/O3-bb-slp-2.f
@@ -0,0 +1,40 @@
+! { dg-do compile }
+! { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } }
+ subroutine tranx3 (ibeg,jbeg,jend,kbeg,kend
+ & ,dlo,den
+ & ,edn)
+ parameter(in = 128+5
+ & , jn = 128+5
+ & , kn = 128+5)
+ parameter(ijkn = 128+5)
+ real*8 e (in,jn,kn), dqm, dvl3a (kn), dvl3ai (kn)
+ & , dtwid (ijkn,4), dd (ijkn,4)
+ & , etwid (ijkn,4), deod (ijkn,4)
+ real*8 dlo (in,jn,kn), den (in,jn,kn)
+ & , edn (in,jn,kn)
+ do 2100 j=jbeg-1,jend
+ i = ibeg - 1
+ do 1080 k=kbeg,kend
+ den(i ,j,k) = ( dlo(i ,j,k) * dvl3a(k)
+ 1 - etwid (k+1,1) + etwid (k,1) ) * dvl3a i(k)
+1080 continue
+ do 2030 k=max(kbeg-2,ks-1),kend+1
+ dqm = (dlo(i+2,j,k ) - dlo(i+2,j,k-1)) * dx3bi(k )
+ dd(k,4) = max ( dqm * dqp, zro )
+2030 continue
+ dtwid (k,3) = ( 0.5 + q1 ) * ( dlo(i+2,j,k-1)
+ 1 + ( dx3a(k-1) - xi ) * dd (k-1,3) )
+ 2 + ( 0.5 - q1 ) * ( dlo(i+2,j,k )
+ 3 - ( dx3a(k ) + xi ) * deod (k ,3) )
+ do 2080 k=kbeg,kend
+ den(i ,j,k) = ( dlo(i ,j,k) * dvl3a(k)
+ 1 - dtwid (k+1,3) + dtwid (k,3) ) * dvl3a i(k)
+ e (i+2,j,k) = ( e (i+2,j,k) * dvl3a(k)
+ 1 - etwid (k+1,3) + etwid (k,3) ) * dvl3a i(k)
+ edn(i+2,j,k) = e(i+2,j,k) / den(i+2,j,k)
+ e (i+3,j,k) = ( e (i+3,j,k) * dvl3a(k)
+ 1 - etwid (k+1,4) + etwid (k,4) ) * dvl3a i(k)
+ edn(i+3,j,k) = e(i+3,j,k) / den(i+3,j,k)
+2080 continue
+2100 continue
+ end
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 5d00b6f..3617918 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1084,6 +1084,33 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
exit = single_exit (loop);
basic_block new_preheader = new_bbs[0];
+ /* Before installing PHI arguments make sure that the edges
+ into them match that of the scalar loop we analyzed. This
+ makes sure the SLP tree matches up between the main vectorized
+ loop and the epilogue vectorized copies. */
+ if (single_succ_edge (preheader)->dest_idx
+ != single_succ_edge (new_bbs[0])->dest_idx)
+ {
+ basic_block swap_bb = new_bbs[1];
+ gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
+ std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
+ EDGE_PRED (swap_bb, 0)->dest_idx = 0;
+ EDGE_PRED (swap_bb, 1)->dest_idx = 1;
+ }
+ if (duplicate_outer_loop)
+ {
+ class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
+ if (loop_preheader_edge (scalar_loop)->dest_idx
+ != loop_preheader_edge (new_inner_loop)->dest_idx)
+ {
+ basic_block swap_bb = new_inner_loop->header;
+ gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
+ std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
+ EDGE_PRED (swap_bb, 0)->dest_idx = 0;
+ EDGE_PRED (swap_bb, 1)->dest_idx = 1;
+ }
+ }
+
add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
/* Skip new preheader since it's deleted if copy loop is added at entry. */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index e42f327..75b7314 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7377,15 +7377,24 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
if (slp_node)
{
vec_initial_defs.reserve (vec_num);
- gcc_assert (slp_node == slp_node_instance->reduc_phis);
- stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
- tree neutral_op
- = neutral_op_for_slp_reduction (slp_node, vectype_out,
- STMT_VINFO_REDUC_CODE (reduc_info),
- first != NULL);
- get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
- &vec_initial_defs, vec_num,
- first != NULL, neutral_op);
+ if (nested_cycle)
+ {
+ unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+ &vec_initial_defs);
+ }
+ else
+ {
+ gcc_assert (slp_node == slp_node_instance->reduc_phis);
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
+ tree neutral_op
+ = neutral_op_for_slp_reduction (slp_node, vectype_out,
+ STMT_VINFO_REDUC_CODE (reduc_info),
+ first != NULL);
+ get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis,
+ &vec_initial_defs, vec_num,
+ first != NULL, neutral_op);
+ }
}
else
{
@@ -7520,6 +7529,79 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
return true;
}
+/* Vectorizes PHIs. */
+
+bool
+vectorizable_phi (vec_info *,
+ stmt_vec_info stmt_info, gimple **vec_stmt,
+ slp_tree slp_node)
+{
+ if (!is_a <gphi *> (stmt_info->stmt) || !slp_node)
+ return false;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+ return false;
+
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ slp_tree child;
+ unsigned i;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
+ if (!child)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "PHI node with unvectorized backedge def\n");
+ return false;
+ }
+ else if (!vect_maybe_update_slp_op_vectype (child, vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
+ }
+ STMT_VINFO_TYPE (stmt_info) = phi_info_type;
+ return true;
+ }
+
+ tree scalar_dest = gimple_phi_result (stmt_info->stmt);
+ basic_block bb = gimple_bb (stmt_info->stmt);
+ tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ auto_vec<tree> vec_oprnds;
+ auto_vec<gphi *> new_phis;
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt_info->stmt); ++i)
+ {
+ slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
+
+ /* Skip not yet vectorized defs. */
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+ && SLP_TREE_VEC_STMTS (child).is_empty ())
+ continue;
+
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
+ if (!new_phis.exists ())
+ {
+ new_phis.create (vec_oprnds.length ());
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ {
+ /* Create the vectorized LC PHI node. */
+ new_phis.quick_push (create_phi_node (vec_dest, bb));
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phis[j]);
+ }
+ }
+ edge e = gimple_phi_arg_edge (as_a <gphi *> (stmt_info->stmt), i);
+ for (unsigned j = 0; j < vec_oprnds.length (); j++)
+ add_phi_arg (new_phis[j], vec_oprnds[j], e, UNKNOWN_LOCATION);
+ }
+ /* We should have at least one already vectorized child. */
+ gcc_assert (new_phis.exists ());
+
+ return true;
+}
+
/* Function vect_min_worthwhile_factor.
@@ -8376,8 +8458,16 @@ vectorizable_live_operation (vec_info *vinfo,
gimple_seq stmts = NULL;
new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
&stmts, true, NULL_TREE);
-
- gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ if (is_a <gphi *> (vec_stmt))
+ {
+ gimple_stmt_iterator si = gsi_after_labels (gimple_bb (vec_stmt));
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ }
+ else
+ {
+ gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
+ gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
+ }
/* Replace use of lhs with newly computed result. If the use stmt is a
single arg PHI, just replace all uses of PHI result. It's necessary
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 85865da..f544b55 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -115,7 +115,8 @@ vect_free_slp_tree (slp_tree node)
return;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_free_slp_tree (child);
+ if (child)
+ vect_free_slp_tree (child);
delete node;
}
@@ -148,9 +149,9 @@ vect_free_slp_instance (slp_instance instance)
/* Create an SLP node for SCALAR_STMTS. */
static slp_tree
-vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
+vect_create_new_slp_node (slp_tree node,
+ vec<stmt_vec_info> scalar_stmts, unsigned nops)
{
- slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
SLP_TREE_CHILDREN (node).create (nops);
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
@@ -159,18 +160,33 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
return node;
}
+/* Create an SLP node for SCALAR_STMTS. */
+
+static slp_tree
+vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
+{
+ return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops);
+}
+
/* Create an SLP node for OPS. */
static slp_tree
-vect_create_new_slp_node (vec<tree> ops)
+vect_create_new_slp_node (slp_tree node, vec<tree> ops)
{
- slp_tree node = new _slp_tree;
SLP_TREE_SCALAR_OPS (node) = ops;
SLP_TREE_DEF_TYPE (node) = vect_external_def;
SLP_TREE_LANES (node) = ops.length ();
return node;
}
+/* Create an SLP node for OPS. */
+
+static slp_tree
+vect_create_new_slp_node (vec<tree> ops)
+{
+ return vect_create_new_slp_node (new _slp_tree, ops);
+}
+
/* This structure is used in creation of an SLP tree. Each instance
corresponds to the same operand in a group of scalar stmts in an SLP
@@ -443,10 +459,13 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
else
commutative_op = commutative_tree_code (code) ? 0U : -1U;
}
+ else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+ number_of_oprnds = gimple_phi_num_args (stmt);
else
return -1;
bool swapped = (swap != 0);
+ bool backedge = false;
gcc_assert (!swapped || first_op_cond);
enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds);
for (i = 0; i < number_of_oprnds; i++)
@@ -463,6 +482,13 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
else
oprnd = gimple_op (stmt_info->stmt, map[i]);
}
+ else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+ {
+ oprnd = gimple_phi_arg_def (stmt, i);
+ backedge = dominated_by_p (CDI_DOMINATORS,
+ gimple_phi_arg_edge (stmt, i)->src,
+ gimple_bb (stmt_info->stmt));
+ }
else
oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
@@ -487,6 +513,26 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
oprnd_info->def_stmts.quick_push (def_stmt_info);
oprnd_info->ops.quick_push (oprnd);
+ /* If there's a extern def on a backedge make sure we can
+ code-generate at the region start.
+ ??? This is another case that could be fixed by adjusting
+ how we split the function but at the moment we'd have conflicting
+ goals there. */
+ if (backedge
+ && dts[i] == vect_external_def
+ && is_a <bb_vec_info> (vinfo)
+ && !SSA_NAME_IS_DEFAULT_DEF (oprnd)
+ && !dominated_by_p (CDI_DOMINATORS,
+ as_a <bb_vec_info> (vinfo)->bbs[0],
+ gimple_bb (SSA_NAME_DEF_STMT (oprnd))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: extern def %T only defined "
+ "on backedge\n", oprnd);
+ return -1;
+ }
+
if (first)
{
tree type = TREE_TYPE (oprnd);
@@ -521,6 +567,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
case vect_internal_def:
case vect_reduction_def:
case vect_induction_def:
+ case vect_nested_cycle:
break;
default:
@@ -815,6 +862,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
machine_mode vec_mode;
stmt_vec_info first_load = NULL, prev_first_load = NULL;
bool first_stmt_load_p = false, load_p = false;
+ bool first_stmt_phi_p = false, phi_p = false;
/* For every stmt in NODE find its def stmt/s. */
stmt_vec_info stmt_info;
@@ -904,6 +952,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
return false;
}
}
+ else if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ rhs_code = ERROR_MARK;
+ phi_p = true;
+ }
else
{
rhs_code = gimple_assign_rhs_code (stmt);
@@ -916,6 +969,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
*node_vectype = vectype;
first_stmt_code = rhs_code;
first_stmt_load_p = load_p;
+ first_stmt_phi_p = phi_p;
/* Shift arguments should be equal in all the packed stmts for a
vector shift with scalar shift operand. */
@@ -1021,7 +1075,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|| first_stmt_code == INDIRECT_REF
|| first_stmt_code == COMPONENT_REF
|| first_stmt_code == MEM_REF)))
- || first_stmt_load_p != load_p)
+ || first_stmt_load_p != load_p
+ || first_stmt_phi_p != phi_p)
{
if (dump_enabled_p ())
{
@@ -1077,6 +1132,18 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
}
}
+ if (phi_p
+ && (gimple_bb (first_stmt_info->stmt)
+ != gimple_bb (stmt_info->stmt)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: different BB for PHI "
+ "in %G", stmt);
+ /* Mismatch. */
+ continue;
+ }
+
if (!types_compatible_p (vectype, *node_vectype))
{
if (dump_enabled_p ())
@@ -1138,7 +1205,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
}
/* Not memory operation. */
- if (TREE_CODE_CLASS (rhs_code) != tcc_binary
+ if (!phi_p
+ && TREE_CODE_CLASS (rhs_code) != tcc_binary
&& TREE_CODE_CLASS (rhs_code) != tcc_unary
&& TREE_CODE_CLASS (rhs_code) != tcc_expression
&& TREE_CODE_CLASS (rhs_code) != tcc_comparison
@@ -1251,7 +1319,7 @@ typedef hash_map <vec <gimple *>, slp_tree,
scalar_stmts_to_slp_tree_map_t;
static slp_tree
-vect_build_slp_tree_2 (vec_info *vinfo,
+vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
poly_uint64 *max_nunits,
bool *matches, unsigned *npermutes, unsigned *tree_size,
@@ -1276,19 +1344,37 @@ vect_build_slp_tree (vec_info *vinfo,
}
return *leader;
}
+
+ /* Seed the bst_map with a stub node to be filled by vect_build_slp_tree_2
+ so we can pick up backedge destinations during discovery. */
+ slp_tree res = new _slp_tree;
+ SLP_TREE_DEF_TYPE (res) = vect_internal_def;
+ SLP_TREE_SCALAR_STMTS (res) = stmts;
+ bst_map->put (stmts.copy (), res);
+
poly_uint64 this_max_nunits = 1;
- slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
+ slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size,
&this_max_nunits,
matches, npermutes, tree_size, bst_map);
- if (res)
+ if (!res_)
+ {
+ bool existed_p = bst_map->put (stmts, NULL);
+ gcc_assert (existed_p);
+ /* Mark the node invalid so we can detect those when still in use
+ as backedge destinations. */
+ SLP_TREE_SCALAR_STMTS (res) = vNULL;
+ SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def;
+ vect_free_slp_tree (res);
+ }
+ else
{
+ gcc_assert (res_ == res);
res->max_nunits = this_max_nunits;
vect_update_max_nunits (max_nunits, this_max_nunits);
/* Keep a reference for the bst_map use. */
SLP_TREE_REF_COUNT (res)++;
}
- bst_map->put (stmts.copy (), res);
- return res;
+ return res_;
}
/* Recursively build an SLP tree starting from NODE.
@@ -1299,7 +1385,7 @@ vect_build_slp_tree (vec_info *vinfo,
was found. */
static slp_tree
-vect_build_slp_tree_2 (vec_info *vinfo,
+vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
vec<stmt_vec_info> stmts, unsigned int group_size,
poly_uint64 *max_nunits,
bool *matches, unsigned *npermutes, unsigned *tree_size,
@@ -1307,7 +1393,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
{
unsigned nops, i, this_tree_size = 0;
poly_uint64 this_max_nunits = *max_nunits;
- slp_tree node;
matches[0] = false;
@@ -1327,41 +1412,60 @@ vect_build_slp_tree_2 (vec_info *vinfo,
/* If the SLP node is a PHI (induction or reduction), terminate
the recursion. */
- if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
- {
- tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
- group_size);
- if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
- max_nunits))
- return NULL;
+ bool skip_args[2] = { false, false };
+ if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+ {
+ tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+ group_size);
+ if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
+ max_nunits))
+ return NULL;
- vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
- /* Induction from different IVs is not supported. */
- if (def_type == vect_induction_def)
- {
- stmt_vec_info other_info;
- FOR_EACH_VEC_ELT (stmts, i, other_info)
- if (stmt_info != other_info)
- return NULL;
- }
- else if (def_type == vect_reduction_def
- || def_type == vect_double_reduction_def
- || def_type == vect_nested_cycle)
- {
- /* Else def types have to match. */
- stmt_vec_info other_info;
- FOR_EACH_VEC_ELT (stmts, i, other_info)
- if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
- return NULL;
- }
- else
- return NULL;
- (*tree_size)++;
- node = vect_create_new_slp_node (stmts, nops);
- SLP_TREE_VECTYPE (node) = vectype;
- return node;
- }
+ vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
+ /* Induction from different IVs is not supported. */
+ if (def_type == vect_induction_def)
+ {
+ stmt_vec_info other_info;
+ FOR_EACH_VEC_ELT (stmts, i, other_info)
+ if (stmt_info != other_info)
+ return NULL;
+
+ /* Induction PHIs are leafs. */
+ (*tree_size)++;
+ node = vect_create_new_slp_node (node, stmts, nops);
+ SLP_TREE_VECTYPE (node) = vectype;
+ SLP_TREE_CHILDREN (node).quick_grow_cleared (nops);
+ return node;
+ }
+ else if (def_type == vect_reduction_def
+ || def_type == vect_double_reduction_def
+ || def_type == vect_nested_cycle)
+ {
+ /* Else def types have to match. */
+ stmt_vec_info other_info;
+ bool all_same = true;
+ FOR_EACH_VEC_ELT (stmts, i, other_info)
+ {
+ if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
+ return NULL;
+ if (other_info != stmt_info)
+ all_same = false;
+ }
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ /* Reduction initial values are not explicitely represented. */
+ if (!nested_in_vect_loop_p (loop, stmt_info))
+ skip_args[loop_preheader_edge (loop)->dest_idx] = true;
+ /* Reduction chain backedge defs are filled manually.
+ ??? Need a better way to identify a SLP reduction chain PHI.
+ Or a better overall way to SLP match those. */
+ if (all_same && def_type == vect_reduction_def)
+ skip_args[loop_latch_edge (loop)->dest_idx] = true;
+ }
+ else if (def_type != vect_internal_def)
+ return NULL;
+ }
bool two_operators = false;
@@ -1386,7 +1490,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
{
*max_nunits = this_max_nunits;
(*tree_size)++;
- node = vect_create_new_slp_node (stmts, 0);
+ node = vect_create_new_slp_node (node, stmts, 0);
SLP_TREE_VECTYPE (node) = vectype;
/* And compute the load permutation. Whether it is actually
a permutation depends on the unrolling factor which is
@@ -1440,7 +1544,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
representing an actual vector without any scalar ops.
??? We could hide it completely with making the permute node
external? */
- node = vect_create_new_slp_node (stmts, 1);
+ node = vect_create_new_slp_node (node, stmts, 1);
SLP_TREE_CODE (node) = VEC_PERM_EXPR;
SLP_TREE_LANE_PERMUTATION (node) = lperm;
SLP_TREE_VECTYPE (node) = vectype;
@@ -1486,6 +1590,14 @@ vect_build_slp_tree_2 (vec_info *vinfo,
continue;
}
+ /* We're skipping certain operands from processing, for example
+ outer loop reduction initial defs. */
+ if (i <= 1 && skip_args[i])
+ {
+ children.safe_push (NULL);
+ continue;
+ }
+
if (is_a <bb_vec_info> (vinfo)
&& oprnd_info->first_dt == vect_internal_def)
{
@@ -1508,9 +1620,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
}
}
- if (oprnd_info->first_dt != vect_internal_def
- && oprnd_info->first_dt != vect_reduction_def
- && oprnd_info->first_dt != vect_induction_def)
+ if (oprnd_info->first_dt == vect_external_def
+ || oprnd_info->first_dt == vect_constant_def)
{
slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
@@ -1639,14 +1750,14 @@ fail:
child = vect_create_new_slp_node (oprnd_info->ops);
children.safe_push (child);
oprnd_info->ops = vNULL;
- oprnd_info->def_stmts = vNULL;
continue;
}
}
gcc_assert (child == NULL);
FOR_EACH_VEC_ELT (children, j, child)
- vect_free_slp_tree (child);
+ if (child)
+ vect_free_slp_tree (child);
vect_free_oprnd_info (oprnds_info);
return NULL;
}
@@ -1670,7 +1781,9 @@ fail:
unsigned n_vector_builds = 0;
FOR_EACH_VEC_ELT (children, j, child)
{
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ if (!child)
+ ;
+ else if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
all_uniform_p = false;
else if (!vect_slp_tree_uniform_p (child))
{
@@ -1684,7 +1797,8 @@ fail:
/* Roll back. */
matches[0] = false;
FOR_EACH_VEC_ELT (children, j, child)
- vect_free_slp_tree (child);
+ if (child)
+ vect_free_slp_tree (child);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1718,7 +1832,7 @@ fail:
/* Here we record the original defs since this
node represents the final lane configuration. */
- node = vect_create_new_slp_node (stmts, 2);
+ node = vect_create_new_slp_node (node, stmts, 2);
SLP_TREE_VECTYPE (node) = vectype;
SLP_TREE_CODE (node) = VEC_PERM_EXPR;
SLP_TREE_CHILDREN (node).quick_push (one);
@@ -1749,7 +1863,7 @@ fail:
return node;
}
- node = vect_create_new_slp_node (stmts, nops);
+ node = vect_create_new_slp_node (node, stmts, nops);
SLP_TREE_VECTYPE (node) = vectype;
SLP_TREE_CHILDREN (node).splice (children);
return node;
@@ -1835,7 +1949,8 @@ vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc,
vect_print_slp_tree (dump_kind, loc, node);
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_print_slp_graph (dump_kind, loc, child, visited);
+ if (child)
+ vect_print_slp_graph (dump_kind, loc, child, visited);
}
static void
@@ -1865,7 +1980,8 @@ vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
STMT_SLP_TYPE (stmt_info) = pure_slp;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_mark_slp_stmts (child, visited);
+ if (child)
+ vect_mark_slp_stmts (child, visited);
}
static void
@@ -1898,7 +2014,8 @@ vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_mark_slp_stmts_relevant (child, visited);
+ if (child)
+ vect_mark_slp_stmts_relevant (child, visited);
}
static void
@@ -1915,7 +2032,7 @@ static void
vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
hash_set<slp_tree> &visited)
{
- if (visited.add (node))
+ if (!node || visited.add (node))
return;
if (SLP_TREE_CHILDREN (node).length () == 0)
@@ -2176,34 +2293,62 @@ vect_build_slp_instance (vec_info *vinfo,
}
}
- /* If this is a reduction chain with a conversion in front
- amend the SLP tree with a node for that. */
- if (kind == slp_inst_kind_reduc_chain
- && STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
+ /* Fixup SLP reduction chains. */
+ if (kind == slp_inst_kind_reduc_chain)
{
- /* Get at the conversion stmt - we know it's the single use
- of the last stmt of the reduction chain. */
- gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
+ /* If this is a reduction chain with a conversion in front
+ amend the SLP tree with a node for that. */
+ gimple *scalar_def
+ = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
+ if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
+ {
+ /* Get at the conversion stmt - we know it's the single use
+ of the last stmt of the reduction chain. */
+ use_operand_p use_p;
+ bool r = single_imm_use (gimple_assign_lhs (scalar_def),
+ &use_p, &scalar_def);
+ gcc_assert (r);
+ stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
+ next_info = vect_stmt_to_vectorize (next_info);
+ scalar_stmts = vNULL;
+ scalar_stmts.create (group_size);
+ for (unsigned i = 0; i < group_size; ++i)
+ scalar_stmts.quick_push (next_info);
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
+ SLP_TREE_VECTYPE (conv) = STMT_VINFO_VECTYPE (next_info);
+ SLP_TREE_CHILDREN (conv).quick_push (node);
+ SLP_INSTANCE_TREE (new_instance) = conv;
+ /* We also have to fake this conversion stmt as SLP reduction
+ group so we don't have to mess with too much code
+ elsewhere. */
+ REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
+ REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
+ }
+ /* Fill the backedge child of the PHI SLP node. The
+ general matching code cannot find it because the
+ scalar code does not reflect how we vectorize the
+ reduction. */
use_operand_p use_p;
- gimple *use_stmt;
- bool r = single_imm_use (gimple_assign_lhs (tem),
- &use_p, &use_stmt);
- gcc_assert (r);
- stmt_vec_info next_info = vinfo->lookup_stmt (use_stmt);
- next_info = vect_stmt_to_vectorize (next_info);
- scalar_stmts = vNULL;
- scalar_stmts.create (group_size);
- for (unsigned i = 0; i < group_size; ++i)
- scalar_stmts.quick_push (next_info);
- slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
- SLP_TREE_VECTYPE (conv) = STMT_VINFO_VECTYPE (next_info);
- SLP_TREE_CHILDREN (conv).quick_push (node);
- SLP_INSTANCE_TREE (new_instance) = conv;
- /* We also have to fake this conversion stmt as SLP reduction
- group so we don't have to mess with too much code
- elsewhere. */
- REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
- REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
+ imm_use_iterator imm_iter;
+ class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
+ gimple_get_lhs (scalar_def))
+ /* There are exactly two non-debug uses, the reduction
+ PHI and the loop-closed PHI node. */
+ if (!is_gimple_debug (USE_STMT (use_p))
+ && gimple_bb (USE_STMT (use_p)) == loop->header)
+ {
+ auto_vec<stmt_vec_info, 64> phis (group_size);
+ stmt_vec_info phi_info
+ = vinfo->lookup_stmt (USE_STMT (use_p));
+ for (unsigned i = 0; i < group_size; ++i)
+ phis.quick_push (phi_info);
+ slp_tree *phi_node = bst_map->get (phis);
+ unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
+ SLP_TREE_CHILDREN (*phi_node)[dest_idx]
+ = SLP_INSTANCE_TREE (new_instance);
+ SLP_INSTANCE_TREE (new_instance)->refcnt++;
+ }
}
vinfo->slp_instances.safe_push (new_instance);
@@ -2437,60 +2582,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
return res;
}
-/* Fill in backedge SLP children in the SLP graph. */
-
-static void
-vect_analyze_slp_backedges (vec_info *vinfo, slp_tree node,
- scalar_stmts_to_slp_tree_map_t *bst_map,
- hash_set<slp_tree> &visited)
-{
- if (SLP_TREE_DEF_TYPE (node) != vect_internal_def
- || visited.add (node))
- return;
-
- slp_tree child;
- unsigned i;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (child)
- vect_analyze_slp_backedges (vinfo, child, bst_map, visited);
-
- /* Inductions are not vectorized by vectorizing their defining cycle
- but by materializing the values from SCEV data. */
- if (STMT_VINFO_DEF_TYPE (SLP_TREE_REPRESENTATIVE (node))
- == vect_induction_def)
- return;
-
- if (gphi *phi = dyn_cast <gphi *> (SLP_TREE_REPRESENTATIVE (node)->stmt))
- for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
- {
- auto_vec<stmt_vec_info, 64> stmts;
- unsigned j;
- stmt_vec_info phi_info;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, phi_info)
- {
- tree def = gimple_phi_arg_def (as_a <gphi *>(phi_info->stmt), i);
- stmt_vec_info def_info = vinfo->lookup_def (def);
- if (!def_info)
- break;
- stmts.safe_push (vect_stmt_to_vectorize (def_info));
- }
- if (j != SLP_TREE_LANES (node))
- continue;
- slp_tree *edge_def = bst_map->get (stmts);
- if (edge_def)
- {
- /* ??? We're currently not recording non-backedge children
- of PHIs like external reduction initial values. Avoid
- NULL entries in SLP_TREE_CHILDREN for those and thus
- for now simply only record backedge defs at a
- SLP_TREE_CHILDREN index possibly not matching that of
- the corresponding PHI argument index. */
- SLP_TREE_CHILDREN (node).quick_push (*edge_def);
- (*edge_def)->refcnt++;
- }
- }
-}
-
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
trees of packed scalar stmts if SLP is possible. */
@@ -2541,13 +2632,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
max_tree_size);
}
- /* Fill in backedges. */
- slp_instance instance;
- hash_set<slp_tree> visited;
- FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
- vect_analyze_slp_backedges (vinfo, SLP_INSTANCE_TREE (instance),
- bst_map, visited);
-
/* The map keeps a reference on SLP nodes built, release that. */
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
it != bst_map->end (); ++it)
@@ -2572,11 +2656,16 @@ vect_slp_build_vertices (hash_set<slp_tree> &visited, slp_tree node,
node->vertex = vertices.length ();
vertices.safe_push (node);
- if (SLP_TREE_CHILDREN (node).is_empty ())
+
+ bool leaf = true;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (child)
+ {
+ leaf = false;
+ vect_slp_build_vertices (visited, child, vertices, leafs);
+ }
+ if (leaf)
leafs.safe_push (node->vertex);
- else
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_slp_build_vertices (visited, child, vertices, leafs);
}
/* Fill the vertices and leafs vector with all nodes in the SLP graph. */
@@ -2654,7 +2743,8 @@ vect_optimize_slp (vec_info *vinfo)
unsigned j;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
- add_edge (slpg, i, child->vertex);
+ if (child)
+ add_edge (slpg, i, child->vertex);
}
/* Compute (reverse) postorder on the inverted graph. */
@@ -2853,7 +2943,7 @@ vect_optimize_slp (vec_info *vinfo)
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
{
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ if (!child || SLP_TREE_DEF_TYPE (child) == vect_internal_def)
continue;
/* If the vector is uniform there's nothing to do. */
@@ -3291,6 +3381,7 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
unsigned int group_size = SLP_TREE_LANES (node);
SLP_TREE_DEF_TYPE (node) = vect_external_def;
SLP_TREE_SCALAR_OPS (node).safe_grow (group_size, true);
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
{
tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
@@ -3373,9 +3464,20 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
slp_tree child;
/* Assume we can code-generate all invariants. */
- if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+ if (!node
+ || SLP_TREE_DEF_TYPE (node) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (node) == vect_external_def)
return true;
+ if (SLP_TREE_DEF_TYPE (node) == vect_uninitialized_def)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Failed cyclic SLP reference in %p", node);
+ return false;
+ }
+ gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_internal_def);
+
/* If we already analyzed the exact same set of scalar stmts we're done.
We share the generated vector stmts for those. */
if (visited.contains (node)
@@ -3404,8 +3506,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
other referrers. */
if (res)
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
- if ((SLP_TREE_DEF_TYPE (child) == vect_constant_def
- || SLP_TREE_DEF_TYPE (child) == vect_external_def)
+ if (child
+ && (SLP_TREE_DEF_TYPE (child) == vect_constant_def
+ || SLP_TREE_DEF_TYPE (child) == vect_external_def)
/* Perform usual caching, note code-generation still
code-gens these nodes multiple times but we expect
to CSE them later. */
@@ -3459,8 +3562,12 @@ static void
vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_instance instance,
stmt_vector_for_cost *cost_vec,
- hash_set<stmt_vec_info> &svisited)
+ hash_set<stmt_vec_info> &svisited,
+ hash_set<slp_tree> &visited)
{
+ if (visited.add (node))
+ return;
+
unsigned i;
stmt_vec_info stmt_info;
stmt_vec_info last_stmt = vect_find_last_scalar_stmt_in_slp (node);
@@ -3479,7 +3586,7 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
gimple *orig_stmt = orig_stmt_info->stmt;
ssa_op_iter op_iter;
def_operand_p def_p;
- FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
imm_use_iterator use_iter;
gimple *use_stmt;
@@ -3548,9 +3655,9 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
- cost_vec, svisited);
+ cost_vec, svisited, visited);
}
/* Analyze statements in SLP instances of VINFO. Return true if the
@@ -3615,11 +3722,13 @@ vect_slp_analyze_operations (vec_info *vinfo)
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
{
hash_set<stmt_vec_info> svisited;
+ hash_set<slp_tree> visited;
for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
{
vect_location = instance->location ();
vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
- instance, &instance->cost_vec, svisited);
+ instance, &instance->cost_vec, svisited,
+ visited);
}
}
@@ -3683,7 +3792,7 @@ vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance,
instance_leader, visited);
}
@@ -3759,7 +3868,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
the scalar cost. */
if (!STMT_VINFO_LIVE_P (stmt_info))
{
- FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
imm_use_iterator use_iter;
gimple *use_stmt;
@@ -3803,7 +3912,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
auto_vec<bool, 20> subtree_life;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
{
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
{
/* Do not directly pass LIFE to the recursive call, copy it to
confine changes in the callee to the current child/subtree. */
@@ -4272,19 +4381,18 @@ vect_slp_function (function *fun)
for (unsigned i = 0; i < n; i++)
{
basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i]);
-
- /* Split when a basic block has multiple predecessors or when the
- edge into it exits a loop (because of implementation issues with
- respect to placement of CTORs for externals). */
bool split = false;
- edge e;
- if (!single_pred_p (bb)
- || ((e = single_pred_edge (bb)),
- loop_exit_edge_p (e->src->loop_father, e)))
- split = true;
+
/* Split when a BB is not dominated by the first block. */
+ if (!bbs.is_empty ()
+ && !dominated_by_p (CDI_DOMINATORS, bb, bbs[0]))
+ split = true;
+ /* Split when the loop determined by the first block
+ is exited. This is because we eventually insert
+ invariants at region begin. */
else if (!bbs.is_empty ()
- && !dominated_by_p (CDI_DOMINATORS, bb, bbs[0]))
+ && bbs[0]->loop_father != bb->loop_father
+ && !flow_loop_nested_p (bbs[0]->loop_father, bb->loop_father))
split = true;
if (split && !bbs.is_empty ())
@@ -5087,24 +5195,22 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
return true;
}
-/* Vectorize SLP instance tree in postorder. */
+/* Vectorize SLP NODE. */
static void
-vect_schedule_slp_instance (vec_info *vinfo,
- slp_tree node, slp_instance instance,
- hash_set<slp_tree> &visited)
+vect_schedule_slp_node (vec_info *vinfo,
+ slp_tree node, slp_instance instance)
{
gimple_stmt_iterator si;
int i;
slp_tree child;
- /* See if we have already vectorized the node in the graph of the
- SLP instance. */
- if ((SLP_TREE_DEF_TYPE (node) == vect_internal_def
- && SLP_TREE_VEC_STMTS (node).exists ())
- || SLP_TREE_VEC_DEFS (node).exists ())
+ /* For existing vectors there's nothing to do. */
+ if (SLP_TREE_VEC_DEFS (node).exists ())
return;
+ gcc_assert (SLP_TREE_VEC_STMTS (node).is_empty ());
+
/* Vectorize externals and constants. */
if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
|| SLP_TREE_DEF_TYPE (node) == vect_external_def)
@@ -5119,17 +5225,11 @@ vect_schedule_slp_instance (vec_info *vinfo,
return;
}
- /* ??? If we'd have a way to mark backedges that would be cheaper. */
- if (visited.add (node))
- return;
-
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_schedule_slp_instance (vinfo, child, instance, visited);
+ stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
- stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"------>vectorizing SLP node starting from: %G",
@@ -5148,13 +5248,12 @@ vect_schedule_slp_instance (vec_info *vinfo,
last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
si = gsi_for_stmt (last_stmt_info->stmt);
}
- else if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
- == cycle_phi_info_type)
- || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
- == induc_vec_info_type))
+ else if ((STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type
+ || STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type
+ || STMT_VINFO_TYPE (stmt_info) == phi_info_type)
+ && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
{
- /* For reduction and induction PHIs we do not use the
- insertion iterator. */
+ /* For PHI node vectorization we do not use the insertion iterator. */
si = gsi_none ();
}
else
@@ -5277,7 +5376,7 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
tree lhs;
stmt_vec_info stmt_info;
- if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+ if (!node || SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
if (visited.add (node))
@@ -5359,6 +5458,142 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
gsi_replace (&rgsi, rstmt, true);
}
+struct slp_scc_info
+{
+ bool on_stack;
+ int dfs;
+ int lowlink;
+};
+
+/* Schedule the SLP INSTANCE doing a DFS walk and collecting SCCs. */
+
+static void
+vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance,
+ hash_map<slp_tree, slp_scc_info> &scc_info,
+ int &maxdfs, vec<slp_tree> &stack)
+{
+ bool existed_p;
+ slp_scc_info *info = &scc_info.get_or_insert (node, &existed_p);
+ gcc_assert (!existed_p);
+ info->dfs = maxdfs;
+ info->lowlink = maxdfs;
+ info->on_stack = true;
+ maxdfs++;
+ stack.safe_push (node);
+ unsigned i;
+ slp_tree child;
+
+ /* ??? We're keeping SLP_TREE_CHILDREN of externalized nodes. */
+ if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ {
+ if (!child)
+ continue;
+ slp_scc_info *child_info = scc_info.get (child);
+ if (!child_info)
+ {
+ vect_schedule_scc (vinfo, child, instance, scc_info, maxdfs, stack);
+ /* Recursion might have re-allocated the node. */
+ info = scc_info.get (node);
+ child_info = scc_info.get (child);
+ info->lowlink = MIN (info->lowlink, child_info->lowlink);
+ }
+ else if (child_info->on_stack)
+ info->lowlink = MIN (info->lowlink, child_info->dfs);
+ }
+ if (info->lowlink != info->dfs)
+ return;
+
+ /* Singleton. */
+ if (stack.last () == node)
+ {
+ stack.pop ();
+ info->on_stack = false;
+ vect_schedule_slp_node (vinfo, node, instance);
+ return;
+ }
+ /* SCC. */
+ int last_idx = stack.length () - 1;
+ while (stack[last_idx] != node)
+ last_idx--;
+ /* We can break the cycle at PHIs who have at least one child
+ code generated. Then we could re-start the DFS walk until
+ all nodes in the SCC are covered (we might have new entries
+ for only back-reachable nodes). But it's simpler to just
+ iterate and schedule those that are ready. */
+ auto_vec<slp_tree, 4> phis_to_fixup;
+ unsigned todo = stack.length () - last_idx;
+ do
+ {
+ for (int idx = stack.length () - 1; idx >= last_idx; --idx)
+ {
+ slp_tree entry = stack[idx];
+ if (!entry)
+ continue;
+ bool phi = (SLP_TREE_CODE (entry) != VEC_PERM_EXPR
+ && is_a <gphi *> (SLP_TREE_REPRESENTATIVE (entry)->stmt));
+ bool ready = !phi;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (entry), i, child)
+ if (!child)
+ {
+ gcc_assert (phi);
+ ready = true;
+ break;
+ }
+ else if (scc_info.get (child)->on_stack)
+ {
+ if (!phi)
+ {
+ ready = false;
+ break;
+ }
+ }
+ else
+ {
+ if (phi)
+ {
+ ready = true;
+ break;
+ }
+ }
+ if (ready)
+ {
+ vect_schedule_slp_node (vinfo, entry, instance);
+ scc_info.get (entry)->on_stack = false;
+ stack[idx] = NULL;
+ todo--;
+ if (phi)
+ phis_to_fixup.safe_push (entry);
+ }
+ }
+ }
+ while (todo != 0);
+
+ /* Now fixup the backedge def of the vectorized PHIs in this SCC. */
+ slp_tree phi_node;
+ FOR_EACH_VEC_ELT (phis_to_fixup, i, phi_node)
+ {
+ gphi *phi = as_a <gphi *> (SLP_TREE_REPRESENTATIVE (phi_node)->stmt);
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, gimple_bb (phi)->preds)
+ {
+ unsigned dest_idx = e->dest_idx;
+ child = SLP_TREE_CHILDREN (phi_node)[dest_idx];
+ if (!child || SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ continue;
+ /* Simply fill all args. */
+ for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
+ vect_get_slp_vect_def (child, i),
+ e, gimple_phi_arg_location (phi, dest_idx));
+ }
+ }
+
+ /* Pop the SCC. */
+ stack.truncate (last_idx);
+}
+
/* Generate vector code for SLP_INSTANCES in the loop/basic block. */
void
@@ -5367,7 +5602,8 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> slp_instances)
slp_instance instance;
unsigned int i;
- hash_set<slp_tree> visited;
+ hash_map<slp_tree, slp_scc_info> scc_info;
+ int maxdfs = 0;
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
slp_tree node = SLP_INSTANCE_TREE (instance);
@@ -5381,8 +5617,11 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> slp_instances)
vect_print_slp_graph (MSG_NOTE, vect_location,
SLP_INSTANCE_TREE (instance));
}
- /* Schedule the tree of INSTANCE. */
- vect_schedule_slp_instance (vinfo, node, instance, visited);
+ /* Schedule the tree of INSTANCE, scheduling SCCs in a way to
+ have a PHI be the node breaking the cycle. */
+ auto_vec<slp_tree> stack;
+ if (!scc_info.get (node))
+ vect_schedule_scc (vinfo, node, instance, scc_info, maxdfs, stack);
if (SLP_INSTANCE_ROOT_STMT (instance))
vectorize_slp_instance_root_stmt (node, instance);
@@ -5398,25 +5637,6 @@ vect_schedule_slp (vec_info *vinfo, vec<slp_instance> slp_instances)
stmt_vec_info store_info;
unsigned int j;
- /* For reductions set the latch values of the vectorized PHIs. */
- if (instance->reduc_phis
- && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
- (instance->reduc_phis)) != FOLD_LEFT_REDUCTION
- && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
- (instance->reduc_phis)) != EXTRACT_LAST_REDUCTION)
- {
- slp_tree slp_node = root;
- slp_tree phi_node = instance->reduc_phis;
- gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
- edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
- gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
- == SLP_TREE_VEC_STMTS (slp_node).length ());
- for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
- add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]),
- vect_get_slp_vect_def (slp_node, j),
- e, gimple_phi_arg_location (phi, e->dest_idx));
- }
-
/* Remove scalar call stmts. Do not do this for basic-block
vectorization as not all uses may be vectorized.
??? Why should this be necessary? DCE should be able to
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 3575f25..7f0763f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10745,7 +10745,8 @@ vect_analyze_stmt (vec_info *vinfo,
|| vectorizable_condition (vinfo, stmt_info,
NULL, NULL, node, cost_vec)
|| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
- cost_vec));
+ cost_vec)
+ || vectorizable_phi (vinfo, stmt_info, NULL, node));
}
if (!ok)
@@ -10885,6 +10886,11 @@ vect_transform_stmt (vec_info *vinfo,
gcc_assert (done);
break;
+ case phi_info_type:
+ done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node);
+ gcc_assert (done);
+ break;
+
default:
if (!STMT_VINFO_LIVE_P (stmt_info))
{
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 0e08652..b63dda3 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -684,7 +684,8 @@ vec_info::new_stmt_vec_info (gimple *stmt)
STMT_VINFO_SLP_VECT_ONLY (res) = false;
STMT_VINFO_VEC_STMTS (res) = vNULL;
- if (gimple_code (stmt) == GIMPLE_PHI
+ if (is_a <loop_vec_info> (this)
+ && gimple_code (stmt) == GIMPLE_PHI
&& is_loop_header_bb_p (gimple_bb (stmt)))
STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
else
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9c55383..13a02cd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -881,6 +881,7 @@ enum stmt_vec_info_type {
type_conversion_vec_info_type,
cycle_phi_info_type,
lc_phi_info_type,
+ phi_info_type,
loop_exit_ctrl_vec_info_type
};
@@ -1939,6 +1940,7 @@ extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info,
slp_tree, slp_instance);
extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
gimple **, slp_tree);
+extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree);
extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *,