diff options
author | Richard Biener <rguenther@suse.de> | 2023-09-29 12:54:17 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2024-09-06 11:19:48 +0200 |
commit | d34cda720988674bcf8a24267c9e1ec61335d6de (patch) | |
tree | edafe2b69dd5ab7cdc3bb3c344812bd38808cc3f | |
parent | f9c5c12d24cc3a9da5c1d38e69a8aa5f58224c4a (diff) | |
download | gcc-d34cda720988674bcf8a24267c9e1ec61335d6de.zip gcc-d34cda720988674bcf8a24267c9e1ec61335d6de.tar.gz gcc-d34cda720988674bcf8a24267c9e1ec61335d6de.tar.bz2 |
Handle non-grouped stores as single-lane SLP
The following enables single-lane loop SLP discovery for non-grouped stores
and adjusts vectorizable_store to properly handle those.
For gfortran.dg/vect/vect-8.f90 we vectorize one additional loop,
not running into the "not falling back to strided accesses" bail-out.
I have not investigated in detail.
There is a set of i386 target assembler test FAILs,
gcc.target/i386/pr88531-2[bc].c in particular fail because the
target cannot identify SLP emulated gathers, see another mail from me.
Others need adjustment, I've adjusted one with this patch only.
In particular there are gcc.target/i386/cond_op_fma_*-1.c FAILs
that are because we no longer fold a VEC_COND_EXPR during the
region value-numbering we do after vectorization since we
code-generate a { 0.0, ... } constant in the VEC_COND_EXPR now
instead of having a separate statement which gets forwarded
and then triggers folding. This leads to sligtly different
code generation. The solution is probably to use gimple_build
when building stmts or, in this case, directly emit .COND_FMA
instead of .FMA and a VEC_COND_EXPR.
gcc.dg/vect/slp-19a.c mixes contiguous 8-lane SLP with a single
lane contiguous store from one lane of the 8-lane load and we
expect to use load-lanes for this reason but the heuristic for
forcing single-lane rediscovery as implemented doesn't trigger
here as it treats both SLP instances separately. FAILs on RISC-V
gcc.dg/vect/slp-19c.c shows we fail to implement an interleaving
scheme for group_size 12 (by extension using the group_size 3
scheme to reduce to 4 lanes and then continue with a pow2 scheme
would work); we are also not considering load-lanes because of
the above reason, but aarch64 cannot do ld12. FAILs on AARCH64
(load requires three vectors) and x86_64.
gcc.dg/vect/slp-19c.c FAILs with variable-length vectors because
of "SLP induction not supported for variable-length vectors".
gcc.target/aarch64/pr110449.c will FAIL because the (contested)
optimization in r14-2367-g224fd59b2dc8a5 was only applied to
loop-vect but not SLP vect. I'll leave it to target maintainers
to either XFAIL (the optimization is bad) or remove the test.
* tree-vect-slp.cc (vect_analyze_slp): Perform single-lane
loop SLP discovery for non-grouped stores. Move check on the root
for re-doing SLP analysis with a single lane for load/store-lanes
earlier and make sure we are dealing with a grouped access.
* tree-vect-stmts.cc (vectorizable_store): Always set
vec_num for SLP.
* gcc.dg/vect/O3-pr39675-2.c: Adjust expected number of SLP.
* gcc.dg/vect/fast-math-vect-call-1.c: Likewise.
* gcc.dg/vect/no-scevccp-slp-31.c: Likewise.
* gcc.dg/vect/slp-12b.c: Likewise.
* gcc.dg/vect/slp-12c.c: Likewise.
* gcc.dg/vect/slp-19a.c: Likewise.
* gcc.dg/vect/slp-19b.c: Likewise.
* gcc.dg/vect/slp-4-big-array.c: Likewise.
* gcc.dg/vect/slp-4.c: Likewise.
* gcc.dg/vect/slp-5.c: Likewise.
* gcc.dg/vect/slp-7.c: Likewise.
* gcc.dg/vect/slp-perm-7.c: Likewise.
* gcc.dg/vect/slp-37.c: Likewise.
* gcc.dg/vect/fast-math-vect-call-2.c: Likewise.
* gcc.dg/vect/slp-26.c: RISC-V can now SLP two instances.
* gcc.dg/vect/vect-outer-slp-3.c: Disable vectorization of
initialization loop.
* gcc.dg/vect/slp-reduc-5.c: Likewise.
* gcc.dg/vect/no-scevccp-outer-12.c: Un-XFAIL. SLP can handle
inner loop inductions with multiple vector stmt copies.
* gfortran.dg/vect/vect-8.f90: Adjust expected number of
vectorized loops.
* gcc.target/i386/vectorize1.c: Adjust what we scan for.
22 files changed, 69 insertions, 36 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c b/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c index c3f0f6d..ddaac56 100644 --- a/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c +++ b/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c @@ -27,5 +27,5 @@ foo () } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c index ad22f6e..6c9b7c3 100644 --- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c +++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c @@ -101,4 +101,4 @@ main () } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { { vect_call_copysignf && vect_call_sqrtf } && vect_perm3_int } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { { vect_call_copysignf && vect_call_sqrtf } && vect_perm3_int } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c index d51e17f..ed42a21 100644 --- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c +++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-2.c @@ -132,4 +132,4 @@ main () } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" { target vect_call_lrint } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_call_lrint } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { target vect_call_lrint } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c index c2d3031..6ace6ad 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c @@ -46,5 +46,4 @@ int main (void) return 0; } -/* Until we support multiple types in the inner loop */ -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! { aarch64*-*-* riscv*-*-* } } } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c index 22817a5..f6ac5f6 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c @@ -53,6 +53,7 @@ int main (void) return 0; } +/* We cannot handle grouped accesses in outer loops. */ +/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-12b.c b/gcc/testsuite/gcc.dg/vect/slp-12b.c index e2ea24d..8e06e3b 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-12b.c +++ b/gcc/testsuite/gcc.dg/vect/slp-12b.c @@ -47,6 +47,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided2 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-12c.c b/gcc/testsuite/gcc.dg/vect/slp-12c.c index 9c48dff..a3536e3 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-12c.c +++ b/gcc/testsuite/gcc.dg/vect/slp-12c.c @@ -49,5 +49,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-19a.c b/gcc/testsuite/gcc.dg/vect/slp-19a.c index ca7a0a8..6c21416 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-19a.c +++ b/gcc/testsuite/gcc.dg/vect/slp-19a.c @@ -57,5 +57,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided8 } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-19b.c b/gcc/testsuite/gcc.dg/vect/slp-19b.c index 4d53ac6..10b84aa 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-19b.c +++ b/gcc/testsuite/gcc.dg/vect/slp-19b.c @@ -54,5 +54,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-26.c b/gcc/testsuite/gcc.dg/vect/slp-26.c index cfb763b..cdb5d9c6 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-26.c +++ b/gcc/testsuite/gcc.dg/vect/slp-26.c @@ -50,4 +50,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { mips_msa || { amdgcn-*-* || { riscv_v || loongarch_sx } } } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { mips_msa || { amdgcn-*-* || { riscv_v || loongarch_sx } } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { mips_msa || { amdgcn-*-* || { riscv_v || loongarch_sx } } } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { mips_msa || { amdgcn-*-* || { riscv_v || loongarch_sx } } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { mips_msa || { amdgcn-*-* || loongarch_sx } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target riscv_v } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-37.c b/gcc/testsuite/gcc.dg/vect/slp-37.c index caee2bb..8a430e6 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-37.c +++ b/gcc/testsuite/gcc.dg/vect/slp-37.c @@ -60,4 +60,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_hw_misalign } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_hw_misalign } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c index fcda45f..f738a61 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c @@ -131,5 +131,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-4.c b/gcc/testsuite/gcc.dg/vect/slp-4.c index 29e741d..1ecad74 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-4.c @@ -125,5 +125,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-5.c b/gcc/testsuite/gcc.dg/vect/slp-5.c index 6d51f6a..484898c 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-5.c @@ -124,5 +124,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 5 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-7.c b/gcc/testsuite/gcc.dg/vect/slp-7.c index 2845a99..f83fdc9 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-7.c @@ -125,6 +125,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_short_mult } } }*/ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_short_mult } } } } }*/ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! { vect_short_mult } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 5 "vect" { target vect_short_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { ! { vect_short_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c index f15736e..9c522ba 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c @@ -97,6 +97,6 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int || vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int || vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c index 11f5a741..0cde79d 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c @@ -36,6 +36,7 @@ int main (void) check_vect (); +#pragma GCC novector for (i = 0; i < N; i++) c[i] = (i+3) * -1; @@ -44,6 +45,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_int_min_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_min_max } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_min_max } } } */ /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c index 3dce514..d315db5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c @@ -30,6 +30,7 @@ int main () { check_vect (); +#pragma GCC novector for (int i = 0; i < 40; ++i) image[i] = 1.; diff --git a/gcc/testsuite/gcc.target/i386/vectorize1.c b/gcc/testsuite/gcc.target/i386/vectorize1.c index f3b9bfb..14a8c5f2 100644 --- a/gcc/testsuite/gcc.target/i386/vectorize1.c +++ b/gcc/testsuite/gcc.target/i386/vectorize1.c @@ -1,6 +1,6 @@ /* PR middle-end/28915 */ /* { dg-do compile } */ -/* { dg-options "-msse -O2 -ftree-vectorize -fdump-tree-vect" } */ +/* { dg-options "-msse -O2 -ftree-vectorize -fdump-tree-vect-optimized" } */ extern char lanip[3][40]; typedef struct @@ -17,4 +17,4 @@ int set_names (void) tt1.t[ln] = lanip[1]; } -/* { dg-final { scan-tree-dump "vect_cst" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 index 557a523..2a3fa90 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 @@ -708,5 +708,5 @@ END SUBROUTINE kernel ! { dg-final { scan-tree-dump-times "vectorized 2\[56\] loops" 1 "vect" { target aarch64_sve } } } ! { dg-final { scan-tree-dump-times "vectorized 2\[45\] loops" 1 "vect" { target { aarch64*-*-* && { ! aarch64_sve } } } } } -! { dg-final { scan-tree-dump-times "vectorized 2\[234\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } +! { dg-final { scan-tree-dump-times "vectorized 2\[345\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } } diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index b6839c7..3d29736 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -4548,6 +4548,7 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, opt_result vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) { + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); unsigned int i; stmt_vec_info first_element; slp_instance instance; @@ -4564,6 +4565,28 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) vect_analyze_slp_instance (vinfo, bst_map, first_element, slp_inst_kind_store, max_tree_size, &limit); + /* For loops also start SLP discovery from non-grouped stores. */ + if (loop_vinfo) + { + data_reference_p dr; + FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr) + if (DR_IS_WRITE (dr)) + { + stmt_vec_info stmt_info = vinfo->lookup_dr (dr)->stmt; + /* Grouped stores are already handled above. */ + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + continue; + vec<stmt_vec_info> stmts; + vec<stmt_vec_info> roots = vNULL; + vec<tree> remain = vNULL; + stmts.create (1); + stmts.quick_push (stmt_info); + vect_build_slp_instance (vinfo, slp_inst_kind_store, + stmts, roots, remain, max_tree_size, + &limit, bst_map, NULL); + } + } + if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo)) { for (unsigned i = 0; i < bb_vinfo->roots.length (); ++i) @@ -4750,6 +4773,18 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) int group_size = SLP_TREE_LANES (slp_root); tree vectype = SLP_TREE_VECTYPE (slp_root); + stmt_vec_info rep_info = SLP_TREE_REPRESENTATIVE (slp_root); + gimple *rep = STMT_VINFO_STMT (rep_info); + bool masked = (is_gimple_call (rep) + && gimple_call_internal_p (rep) + && internal_fn_mask_index + (gimple_call_internal_fn (rep)) != -1); + if (!STMT_VINFO_GROUPED_ACCESS (rep_info) + || slp_root->ldst_lanes + || (vect_store_lanes_supported (vectype, group_size, masked) + == IFN_LAST)) + continue; + auto_vec<slp_tree> loads; hash_set<slp_tree> visited; vect_gather_slp_loads (loads, slp_root, visited); @@ -4773,17 +4808,9 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) } } - gimple *rep = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (slp_root)); - bool masked = (is_gimple_call (rep) - && gimple_call_internal_p (rep) - && internal_fn_mask_index - (gimple_call_internal_fn (rep)) != -1); /* If the loads and stores can use load/store-lanes force re-discovery with single lanes. */ - if (loads_permuted - && !slp_root->ldst_lanes - && vect_store_lanes_supported (vectype, group_size, masked) - != IFN_LAST) + if (loads_permuted) { bool can_use_lanes = true; FOR_EACH_VEC_ELT (loads, j, load_node) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 25b120c..f6c5b7a 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8355,10 +8355,12 @@ vectorizable_store (vec_info *vinfo, return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies); } - if (grouped_store) + if (grouped_store || slp) { /* FORNOW */ - gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info)); + gcc_assert (!grouped_store + || !loop + || !nested_in_vect_loop_p (loop, stmt_info)); if (slp) { @@ -8367,8 +8369,9 @@ vectorizable_store (vec_info *vinfo, group. */ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; - gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info) - == first_stmt_info); + gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info) + || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) + == first_stmt_info)); first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); op = vect_get_store_rhs (first_stmt_info); } |