aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 17:58:14 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 17:58:14 +0000
commit018b2744fc7a4fe6fea1a078eae69c5465585668 (patch)
tree33e1fa7adf115a1367ec68d2c663a6bb8aa9b72f /gcc
parent3ea518f6f63e66e48f2d41cfa41e1efae653a484 (diff)
downloadgcc-018b2744fc7a4fe6fea1a078eae69c5465585668.zip
gcc-018b2744fc7a4fe6fea1a078eae69c5465585668.tar.gz
gcc-018b2744fc7a4fe6fea1a078eae69c5465585668.tar.bz2
Handle more SLP constant and extern definitions for variable VF
This patch adds support for vectorising SLP definitions that are constant or external (i.e. from outside the loop) when the vectorisation factor isn't known at compile time. It can only handle cases where the number of SLP statements is a power of 2. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vect-slp.c: Include gimple-fold.h and internal-fn.h (can_duplicate_and_interleave_p): New function. (vect_get_and_check_slp_defs): Take the vector of statements rather than just the current one. Remove excess parentheses. Restriction rejectinon of vect_constant_def and vect_external_def for variable-length vectors to boolean types, or types for which can_duplicate_and_interleave_p is false. (vect_build_slp_tree_2): Update call to vect_get_and_check_slp_defs. (duplicate_and_interleave): New function. (vect_get_constant_vectors): Use gimple_build_vector for constant-length vectors and suitable variable-length constant vectors. Use duplicate_and_interleave for other variable-length vectors. Don't defer the update when inserting new statements. gcc/testsuite/ * gcc.dg/vect/no-scevccp-slp-30.c: Don't XFAIL for vect_variable_length && vect_load_lanes * gcc.dg/vect/slp-1.c: Likewise. * gcc.dg/vect/slp-10.c: Likewise. * gcc.dg/vect/slp-12b.c: Likewise. * gcc.dg/vect/slp-12c.c: Likewise. * gcc.dg/vect/slp-17.c: Likewise. * gcc.dg/vect/slp-19b.c: Likewise. * gcc.dg/vect/slp-20.c: Likewise. * gcc.dg/vect/slp-21.c: Likewise. * gcc.dg/vect/slp-22.c: Likewise. * gcc.dg/vect/slp-23.c: Likewise. * gcc.dg/vect/slp-24-big-array.c: Likewise. * gcc.dg/vect/slp-24.c: Likewise. * gcc.dg/vect/slp-28.c: Likewise. * gcc.dg/vect/slp-39.c: Likewise. * gcc.dg/vect/slp-6.c: Likewise. * gcc.dg/vect/slp-7.c: Likewise. * gcc.dg/vect/slp-cond-1.c: Likewise. * gcc.dg/vect/slp-cond-2-big-array.c: Likewise. * gcc.dg/vect/slp-cond-2.c: Likewise. * gcc.dg/vect/slp-multitypes-1.c: Likewise. * gcc.dg/vect/slp-multitypes-8.c: Likewise. * gcc.dg/vect/slp-multitypes-9.c: Likewise. * gcc.dg/vect/slp-multitypes-10.c: Likewise. * gcc.dg/vect/slp-multitypes-12.c: Likewise. * gcc.dg/vect/slp-perm-6.c: Likewise. * gcc.dg/vect/slp-widen-mult-half.c: Likewise. * gcc.dg/vect/vect-live-slp-1.c: Likewise. * gcc.dg/vect/vect-live-slp-2.c: Likewise. * gcc.dg/vect/pr33953.c: Don't XFAIL for vect_variable_length. * gcc.dg/vect/slp-12a.c: Likewise. * gcc.dg/vect/slp-14.c: Likewise. * gcc.dg/vect/slp-15.c: Likewise. * gcc.dg/vect/slp-multitypes-2.c: Likewise. * gcc.dg/vect/slp-multitypes-4.c: Likewise. * gcc.dg/vect/slp-multitypes-5.c: Likewise. * gcc.target/aarch64/sve/slp_1.c: New test. * gcc.target/aarch64/sve/slp_1_run.c: Likewise. * gcc.target/aarch64/sve/slp_2.c: Likewise. * gcc.target/aarch64/sve/slp_2_run.c: Likewise. * gcc.target/aarch64/sve/slp_3.c: Likewise. * gcc.target/aarch64/sve/slp_3_run.c: Likewise. * gcc.target/aarch64/sve/slp_4.c: Likewise. * gcc.target/aarch64/sve/slp_4_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256622
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog18
-rw-r--r--gcc/testsuite/ChangeLog50
-rw-r--r--gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr33953.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-10.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-12a.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-12b.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-12c.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-14.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-15.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-17.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-19b.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-20.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-21.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-22.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-23.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-24-big-array.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-24.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-28.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-39.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-6.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-7.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-cond-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-cond-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-6.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_1.c40
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_2.c37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_3.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_4.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c30
-rw-r--r--gcc/tree-vect-slp.c264
47 files changed, 635 insertions, 76 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 242ecb5..8bb12d3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -2,6 +2,24 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * tree-vect-slp.c: Include gimple-fold.h and internal-fn.h
+ (can_duplicate_and_interleave_p): New function.
+ (vect_get_and_check_slp_defs): Take the vector of statements
+ rather than just the current one. Remove excess parentheses.
+ Restriction rejectinon of vect_constant_def and vect_external_def
+ for variable-length vectors to boolean types, or types for which
+ can_duplicate_and_interleave_p is false.
+ (vect_build_slp_tree_2): Update call to vect_get_and_check_slp_defs.
+ (duplicate_and_interleave): New function.
+ (vect_get_constant_vectors): Use gimple_build_vector for
+ constant-length vectors and suitable variable-length constant
+ vectors. Use duplicate_and_interleave for other variable-length
+ vectors. Don't defer the update when inserting new statements.
+
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Make sure
min_profitable_iters doesn't go negative.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4b1b974..3f6b5d7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -2,6 +2,56 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * gcc.dg/vect/no-scevccp-slp-30.c: Don't XFAIL for vect_variable_length
+ && vect_load_lanes
+ * gcc.dg/vect/slp-1.c: Likewise.
+ * gcc.dg/vect/slp-10.c: Likewise.
+ * gcc.dg/vect/slp-12b.c: Likewise.
+ * gcc.dg/vect/slp-12c.c: Likewise.
+ * gcc.dg/vect/slp-17.c: Likewise.
+ * gcc.dg/vect/slp-19b.c: Likewise.
+ * gcc.dg/vect/slp-20.c: Likewise.
+ * gcc.dg/vect/slp-21.c: Likewise.
+ * gcc.dg/vect/slp-22.c: Likewise.
+ * gcc.dg/vect/slp-23.c: Likewise.
+ * gcc.dg/vect/slp-24-big-array.c: Likewise.
+ * gcc.dg/vect/slp-24.c: Likewise.
+ * gcc.dg/vect/slp-28.c: Likewise.
+ * gcc.dg/vect/slp-39.c: Likewise.
+ * gcc.dg/vect/slp-6.c: Likewise.
+ * gcc.dg/vect/slp-7.c: Likewise.
+ * gcc.dg/vect/slp-cond-1.c: Likewise.
+ * gcc.dg/vect/slp-cond-2-big-array.c: Likewise.
+ * gcc.dg/vect/slp-cond-2.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-1.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-8.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-9.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-10.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-12.c: Likewise.
+ * gcc.dg/vect/slp-perm-6.c: Likewise.
+ * gcc.dg/vect/slp-widen-mult-half.c: Likewise.
+ * gcc.dg/vect/vect-live-slp-1.c: Likewise.
+ * gcc.dg/vect/vect-live-slp-2.c: Likewise.
+ * gcc.dg/vect/pr33953.c: Don't XFAIL for vect_variable_length.
+ * gcc.dg/vect/slp-12a.c: Likewise.
+ * gcc.dg/vect/slp-14.c: Likewise.
+ * gcc.dg/vect/slp-15.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-2.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-4.c: Likewise.
+ * gcc.dg/vect/slp-multitypes-5.c: Likewise.
+ * gcc.target/aarch64/sve/slp_1.c: New test.
+ * gcc.target/aarch64/sve/slp_1_run.c: Likewise.
+ * gcc.target/aarch64/sve/slp_2.c: Likewise.
+ * gcc.target/aarch64/sve/slp_2_run.c: Likewise.
+ * gcc.target/aarch64/sve/slp_3.c: Likewise.
+ * gcc.target/aarch64/sve/slp_3_run.c: Likewise.
+ * gcc.target/aarch64/sve/slp_4.c: Likewise.
+ * gcc.target/aarch64/sve/slp_4_run.c: Likewise.
+
+2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* gcc.dg/vect/vect-ooo-group-1.c: New test.
* gcc.target/aarch64/sve/mask_struct_load_1.c: Likewise.
* gcc.target/aarch64/sve/mask_struct_load_1_run.c: Likewise.
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c
index 8f85659..fe9e7e7 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr33953.c b/gcc/testsuite/gcc.dg/vect/pr33953.c
index deb6682..4dd54cd 100644
--- a/gcc/testsuite/gcc.dg/vect/pr33953.c
+++ b/gcc/testsuite/gcc.dg/vect/pr33953.c
@@ -29,6 +29,6 @@ void blockmove_NtoN_blend_noremap32 (const UINT32 *srcdata, int srcwidth,
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-1.c b/gcc/testsuite/gcc.dg/vect/slp-1.c
index db06995..26b71d6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-1.c
@@ -118,5 +118,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-10.c b/gcc/testsuite/gcc.dg/vect/slp-10.c
index d5775ef..da44f26 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-10.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-10.c
@@ -107,7 +107,7 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult }} } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12a.c b/gcc/testsuite/gcc.dg/vect/slp-12a.c
index 522ab64..08a8f55 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12a.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12a.c
@@ -75,5 +75,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12b.c b/gcc/testsuite/gcc.dg/vect/slp-12b.c
index d1a28ac..48e7865 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12b.c
@@ -46,6 +46,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12c.c b/gcc/testsuite/gcc.dg/vect/slp-12c.c
index df76032..6650b8b 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12c.c
@@ -48,5 +48,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-14.c b/gcc/testsuite/gcc.dg/vect/slp-14.c
index a591604..6af7081 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-14.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-14.c
@@ -111,5 +111,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-15.c b/gcc/testsuite/gcc.dg/vect/slp-15.c
index e09e967..dbced88 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-15.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-15.c
@@ -112,6 +112,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! { vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-17.c b/gcc/testsuite/gcc.dg/vect/slp-17.c
index 7f26884..6fa11e4 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-17.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-17.c
@@ -51,5 +51,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-19b.c b/gcc/testsuite/gcc.dg/vect/slp-19b.c
index e268382..237b36d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-19b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19b.c
@@ -53,5 +53,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-20.c b/gcc/testsuite/gcc.dg/vect/slp-20.c
index fb825ff..dc5eab6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-20.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-20.c
@@ -110,5 +110,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c
index 25c75d6..1f8c82e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-21.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-21.c
@@ -201,6 +201,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-22.c b/gcc/testsuite/gcc.dg/vect/slp-22.c
index b7a2015..e2a0002 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-22.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-22.c
@@ -129,5 +129,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c
index 88708e6..3cda497 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-23.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-23.c
@@ -109,6 +109,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
/* We fail to vectorize the second loop with variable-length SVE but
fall back to 128-bit vectors, which does use SLP. */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } xfail aarch64_sve } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c
index 58cedb4..abd3a87 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c
@@ -91,4 +91,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-24.c b/gcc/testsuite/gcc.dg/vect/slp-24.c
index 8d298cb..a45ce7d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-24.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-24.c
@@ -77,4 +77,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-28.c b/gcc/testsuite/gcc.dg/vect/slp-28.c
index 95db418..7778bad 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-28.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-28.c
@@ -89,5 +89,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-39.c b/gcc/testsuite/gcc.dg/vect/slp-39.c
index 330a626..85d32ea 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-39.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-39.c
@@ -21,4 +21,4 @@ void bar (double w)
}
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-6.c b/gcc/testsuite/gcc.dg/vect/slp-6.c
index 8205d54..ec85eb7 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-6.c
@@ -116,6 +116,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-7.c b/gcc/testsuite/gcc.dg/vect/slp-7.c
index bd7d44b..e836a1a 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-7.c
@@ -122,6 +122,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_short_mult } } }*/
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_short_mult } } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! { vect_short_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
index fd9165f..482fc08 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
@@ -122,4 +122,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
index d5da5f2..57cc67e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c
@@ -125,4 +125,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
index e206aed..7350695 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-cond-2.c
@@ -125,4 +125,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
index 4316d81..1850f06 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
index 68946c2..62580c0 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
@@ -46,5 +46,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
index d374345..d4c929d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
@@ -62,5 +62,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
index 0eca73a..28a645c 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
@@ -77,5 +77,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
index 2ab689e..faf17d6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
index 1153e7b..fb4f720 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
index 43faec9..d88ebe4 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
@@ -40,5 +40,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
index ad9ffb6..872b20c 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
@@ -40,5 +40,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
index b7d7657..4eb648a 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
@@ -104,7 +104,7 @@ int main (int argc, const char* argv[])
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
index e06267c..f5fb63e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
@@ -46,7 +46,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
index fc6a924..aff37c1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
@@ -68,5 +68,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
index 6c66d29..3568966 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
@@ -62,5 +62,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
new file mode 100644
index 0000000..dffc7b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 2] += b; \
+ a[i * 2 + 1] += c; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* We should use one DUP for each of the 8-, 16- and 32-bit types,
+ although we currently use LD1RW for _Float16. We should use two
+ DUPs for each of the three 64-bit types. */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c
new file mode 100644
index 0000000..0ce056a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_1.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 3, 11 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b[0], b[1], N / 2); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 2]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
new file mode 100644
index 0000000..0a25887
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 2] += 10; \
+ a[i * 2 + 1] += 17; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-not {\tzip1\t} } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c
new file mode 100644
index 0000000..bb5ef66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_2.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 10, 17 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 2); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 2]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
new file mode 100644
index 0000000..534ad44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 4] += 41; \
+ a[i * 4 + 1] += 25; \
+ a[i * 4 + 2] += 31; \
+ a[i * 4 + 3] += 62; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type. */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
+/* 1 for each 16-bit type and 4 for double. */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 } } */
+/* 1 for each 32-bit type. */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */
+/* The 64-bit types need:
+
+ ZIP1 ZIP1 (2 ZIP2s optimized away)
+ ZIP1 ZIP2. */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c
new file mode 100644
index 0000000..0ec1cea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_3.c"
+
+#define N (77 * 4)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[4] = { 41, 25, 31, 62 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 4); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 4]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
new file mode 100644
index 0000000..09f9ded
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 8] += 99; \
+ a[i * 8 + 1] += 11; \
+ a[i * 8 + 2] += 17; \
+ a[i * 8 + 3] += 80; \
+ a[i * 8 + 4] += 63; \
+ a[i * 8 + 5] += 37; \
+ a[i * 8 + 6] += 24; \
+ a[i * 8 + 7] += 81; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double. */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 } } */
+/* 1 for each 16-bit type. */
+/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #80\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #63\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */
+/* The 32-bit types need:
+
+ ZIP1 ZIP1 (2 ZIP2s optimized away)
+ ZIP1 ZIP2
+
+ and the 64-bit types need:
+
+ ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away)
+ ZIP1 ZIP2 ZIP1 ZIP2
+ ZIP1 ZIP2 ZIP1 ZIP2. */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 33 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c
new file mode 100644
index 0000000..3ca9dbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4_run.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "slp_4.c"
+
+#define N (59 * 8)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 8); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 8]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f52d827..5f6a33a 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -43,6 +43,8 @@ along with GCC; see the file COPYING3. If not see
#include "dbgcnt.h"
#include "tree-vector-builder.h"
#include "vec-perm-indices.h"
+#include "gimple-fold.h"
+#include "internal-fn.h"
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
@@ -208,24 +210,87 @@ vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
return -1;
}
+/* Check whether it is possible to load COUNT elements of type ELT_MODE
+ using the method implemented by duplicate_and_interleave. Return true
+ if so, returning the number of intermediate vectors in *NVECTORS_OUT
+ (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
+ (if nonnull). */
+
+static bool
+can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
+ unsigned int *nvectors_out = NULL,
+ tree *vector_type_out = NULL,
+ tree *permutes = NULL)
+{
+ poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
+ poly_int64 nelts;
+ unsigned int nvectors = 1;
+ for (;;)
+ {
+ scalar_int_mode int_mode;
+ poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
+ if (multiple_p (current_vector_size, elt_bytes, &nelts)
+ && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+ {
+ tree int_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (int_mode), 1);
+ tree vector_type = build_vector_type (int_type, nelts);
+ if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
+ {
+ vec_perm_builder sel1 (nelts, 2, 3);
+ vec_perm_builder sel2 (nelts, 2, 3);
+ poly_int64 half_nelts = exact_div (nelts, 2);
+ for (unsigned int i = 0; i < 3; ++i)
+ {
+ sel1.quick_push (i);
+ sel1.quick_push (i + nelts);
+ sel2.quick_push (half_nelts + i);
+ sel2.quick_push (half_nelts + i + nelts);
+ }
+ vec_perm_indices indices1 (sel1, 2, nelts);
+ vec_perm_indices indices2 (sel2, 2, nelts);
+ if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
+ && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
+ {
+ if (nvectors_out)
+ *nvectors_out = nvectors;
+ if (vector_type_out)
+ *vector_type_out = vector_type;
+ if (permutes)
+ {
+ permutes[0] = vect_gen_perm_mask_checked (vector_type,
+ indices1);
+ permutes[1] = vect_gen_perm_mask_checked (vector_type,
+ indices2);
+ }
+ return true;
+ }
+ }
+ }
+ if (!multiple_p (elt_bytes, 2, &elt_bytes))
+ return false;
+ nvectors *= 2;
+ }
+}
/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
they are of a valid type and that they match the defs of the first stmt of
the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
- by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
- is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
- and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
- and code of comparison needs to be inverted. If there is any operand swap
- in this function, *SWAP is set to non-zero value.
+ by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
+ indicates swap is required for cond_expr stmts. Specifically, *SWAP
+ is 1 if STMT is cond and operands of comparison need to be swapped;
+ *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
+ If there is any operand swap in this function, *SWAP is set to non-zero
+ value.
If there was a fatal error return -1; if the error could be corrected by
swapping operands of father node of this one, return 1; if everything is
ok return 0. */
-
static int
vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
- gimple *stmt, unsigned stmt_num,
+ vec<gimple *> stmts, unsigned stmt_num,
vec<slp_oprnd_info> *oprnds_info)
{
+ gimple *stmt = stmts[stmt_num];
tree oprnd;
unsigned int i, number_of_oprnds;
gimple *def_stmt;
@@ -373,15 +438,15 @@ again:
types for reduction chains: the first stmt must be a
vect_reduction_def (a phi node), and the rest
vect_internal_def. */
- if (((oprnd_info->first_dt != dt
- && !(oprnd_info->first_dt == vect_reduction_def
- && dt == vect_internal_def)
- && !((oprnd_info->first_dt == vect_external_def
- || oprnd_info->first_dt == vect_constant_def)
- && (dt == vect_external_def
- || dt == vect_constant_def)))
- || !types_compatible_p (oprnd_info->first_op_type,
- TREE_TYPE (oprnd))))
+ tree type = TREE_TYPE (oprnd);
+ if ((oprnd_info->first_dt != dt
+ && !(oprnd_info->first_dt == vect_reduction_def
+ && dt == vect_internal_def)
+ && !((oprnd_info->first_dt == vect_external_def
+ || oprnd_info->first_dt == vect_constant_def)
+ && (dt == vect_external_def
+ || dt == vect_constant_def)))
+ || !types_compatible_p (oprnd_info->first_op_type, type))
{
/* Try swapping operands if we got a mismatch. */
if (i == 0
@@ -398,16 +463,12 @@ again:
return 1;
}
- }
-
- /* Check the types of the definitions. */
- switch (dt)
- {
- case vect_constant_def:
- case vect_external_def:
- /* We must already have set a vector size by now. */
- gcc_checking_assert (maybe_ne (current_vector_size, 0U));
- if (!current_vector_size.is_constant ())
+ if ((dt == vect_constant_def
+ || dt == vect_external_def)
+ && !current_vector_size.is_constant ()
+ && (TREE_CODE (type) == BOOLEAN_TYPE
+ || !can_duplicate_and_interleave_p (stmts.length (),
+ TYPE_MODE (type))))
{
if (dump_enabled_p ())
{
@@ -419,6 +480,13 @@ again:
}
return -1;
}
+ }
+
+ /* Check the types of the definitions. */
+ switch (dt)
+ {
+ case vect_constant_def:
+ case vect_external_def:
break;
case vect_reduction_def:
@@ -1119,7 +1187,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (stmts, i, stmt)
{
int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
- stmt, i, &oprnds_info);
+ stmts, i, &oprnds_info);
if (res != 0)
matches[(res == -1) ? 0 : i] = false;
if (!matches[0])
@@ -3219,6 +3287,118 @@ vect_mask_constant_operand_p (gimple *stmt, int opnum)
return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
}
+/* Build a variable-length vector in which the elements in ELTS are repeated
+ to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
+ RESULTS and add any new instructions to SEQ.
+
+ The approach we use is:
+
+ (1) Find a vector mode VM with integer elements of mode IM.
+
+ (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+ ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
+ from small vectors to IM.
+
+ (3) Duplicate each ELTS'[I] into a vector of mode VM.
+
+ (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
+ correct byte contents.
+
+ (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
+
+ We try to find the largest IM for which this sequence works, in order
+ to cut down on the number of interleaves. */
+
+static void
+duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+ unsigned int nresults, vec<tree> &results)
+{
+ unsigned int nelts = elts.length ();
+ tree element_type = TREE_TYPE (vector_type);
+
+ /* (1) Find a vector mode VM with integer elements of mode IM. */
+ unsigned int nvectors = 1;
+ tree new_vector_type;
+ tree permutes[2];
+ if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
+ &nvectors, &new_vector_type,
+ permutes))
+ gcc_unreachable ();
+
+ /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
+ unsigned int partial_nelts = nelts / nvectors;
+ tree partial_vector_type = build_vector_type (element_type, partial_nelts);
+
+ tree_vector_builder partial_elts;
+ auto_vec<tree, 32> pieces (nvectors * 2);
+ pieces.quick_grow (nvectors * 2);
+ for (unsigned int i = 0; i < nvectors; ++i)
+ {
+ /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+ ELTS' has mode IM. */
+ partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
+ for (unsigned int j = 0; j < partial_nelts; ++j)
+ partial_elts.quick_push (elts[i * partial_nelts + j]);
+ tree t = gimple_build_vector (seq, &partial_elts);
+ t = gimple_build (seq, VIEW_CONVERT_EXPR,
+ TREE_TYPE (new_vector_type), t);
+
+ /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
+ pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
+ }
+
+ /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
+ correct byte contents.
+
+ We need to repeat the following operation log2(nvectors) times:
+
+ out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
+ out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
+
+ However, if each input repeats every N elements and the VF is
+ a multiple of N * 2, the HI result is the same as the LO. */
+ unsigned int in_start = 0;
+ unsigned int out_start = nvectors;
+ unsigned int hi_start = nvectors / 2;
+ /* A bound on the number of outputs needed to produce NRESULTS results
+ in the final iteration. */
+ unsigned int noutputs_bound = nvectors * nresults;
+ for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
+ {
+ noutputs_bound /= 2;
+ unsigned int limit = MIN (noutputs_bound, nvectors);
+ for (unsigned int i = 0; i < limit; ++i)
+ {
+ if ((i & 1) != 0
+ && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
+ 2 * in_repeat))
+ {
+ pieces[out_start + i] = pieces[out_start + i - 1];
+ continue;
+ }
+
+ tree output = make_ssa_name (new_vector_type);
+ tree input1 = pieces[in_start + (i / 2)];
+ tree input2 = pieces[in_start + (i / 2) + hi_start];
+ gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
+ input1, input2,
+ permutes[i & 1]);
+ gimple_seq_add_stmt (seq, stmt);
+ pieces[out_start + i] = output;
+ }
+ std::swap (in_start, out_start);
+ }
+
+ /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
+ results.reserve (nresults);
+ for (unsigned int i = 0; i < nresults; ++i)
+ if (i < nvectors)
+ results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
+ pieces[in_start + i]));
+ else
+ results.quick_push (results[i - nvectors]);
+}
+
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
@@ -3235,7 +3415,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
gimple *stmt = stmts[0];
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
- unsigned nunits;
+ unsigned HOST_WIDE_INT nunits;
tree vec_cst;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
@@ -3249,6 +3429,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
tree neutral_op = NULL;
enum tree_code code = gimple_expr_code (stmt);
gimple_seq ctor_seq = NULL;
+ auto_vec<tree, 16> permute_results;
/* Check if vector type is a boolean vector. */
if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
@@ -3257,8 +3438,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
= build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
else
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
- /* Enforced by vect_get_and_check_slp_defs. */
- nunits = TYPE_VECTOR_SUBPARTS (vector_type).to_constant ();
if (STMT_VINFO_DATA_REF (stmt_vinfo))
{
@@ -3286,6 +3465,11 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
+ /* When using duplicate_and_interleave, we just need one element for
+ each scalar statement. */
+ if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
+ nunits = group_size;
+
number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits;
@@ -3407,16 +3591,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
if (number_of_places_left_in_vector == 0)
{
- if (constant_p)
- vec_cst = elts.build ();
+ if (constant_p
+ ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
+ : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ vec_cst = gimple_build_vector (&ctor_seq, &elts);
else
{
- vec<constructor_elt, va_gc> *v;
- unsigned k;
- vec_alloc (v, nunits);
- for (k = 0; k < nunits; ++k)
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
- vec_cst = build_constructor (vector_type, v);
+ if (vec_oprnds->is_empty ())
+ duplicate_and_interleave (&ctor_seq, vector_type, elts,
+ number_of_vectors,
+ permute_results);
+ vec_cst = permute_results[number_of_vectors - j - 1];
}
tree init;
gimple_stmt_iterator gsi;
@@ -3431,8 +3616,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
if (ctor_seq != NULL)
{
gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
- gsi_insert_seq_before_without_update (&gsi, ctor_seq,
- GSI_SAME_STMT);
+ gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
ctor_seq = NULL;
}
voprnds.quick_push (init);