aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIra Rosen <irar@il.ibm.com>2008-08-19 08:31:41 +0000
committerIra Rosen <irar@gcc.gnu.org>2008-08-19 08:31:41 +0000
commit5d59337297f9410a407e2e8e89f38ebbc9aa40df (patch)
treef4481b3ce0ab856f0a57386df3e28b42d737975d
parent45ea82c11f4e9984a8f902f1e70913d5472911d3 (diff)
downloadgcc-5d59337297f9410a407e2e8e89f38ebbc9aa40df.zip
gcc-5d59337297f9410a407e2e8e89f38ebbc9aa40df.tar.gz
gcc-5d59337297f9410a407e2e8e89f38ebbc9aa40df.tar.bz2
tree-vectorizer.c (supportable_widening_operation): Support multi-step conversion...
* tree-vectorizer.c (supportable_widening_operation): Support multi-step conversion, return the number of steps in such conversion and the required intermediate types. (supportable_narrowing_operation): Likewise. * tree-vectorizer.h (vect_pow2): New function. (supportable_widening_operation): Change argument types. (supportable_narrowing_operation): Likewise. (vectorizable_type_promotion): Add an argument. (vectorizable_type_demotion): Likewise. * tree-vect-analyze.c (vect_analyze_operations): Call vectorizable_type_promotion and vectorizable_type_demotion with additional argument. (vect_get_and_check_slp_defs): Detect patterns. (vect_build_slp_tree): Add an argument, don't fail in case of multiple types. (vect_analyze_slp_instance): Don't fail in case of multiple types. Call vect_build_slp_tree with correct arguments. Calculate unrolling factor according to the smallest type in the loop. (vect_detect_hybrid_slp_stmts): Include statements from patterns. * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Call supportable_widening_operation with correct arguments. * tree-vect-transform.c (vect_get_slp_defs): Allocate output vector operands lists according to the number of vector statements in left or right node, if exists. (vect_gen_widened_results_half): Remove unused argument. (vectorizable_conversion): Call supportable_widening_operation, supportable_narrowing_operation, and vect_gen_widened_results_half with correct arguments. (vectorizable_assignment): Change documentation, support multiple types in SLP. (vectorizable_operation): Likewise. (vect_get_loop_based_defs): New function. (vect_create_vectorized_demotion_stmts): Likewise. (vectorizable_type_demotion): Support loop-aware SLP and general multi-step conversion. Call vect_get_loop_based_defs and vect_create_vectorized_demotion_stmts for transformation. (vect_create_vectorized_promotion_stmts): New function. (vectorizable_type_promotion): Support loop-aware SLP and general multi-step conversion. Call vect_create_vectorized_promotion_stmts for transformation. (vectorizable_store): Change documentation, support multiple types in SLP. (vectorizable_load): Likewise. (vect_transform_stmt): Pass SLP_NODE to vectorizable_type_promotion and vectorizable_type_demotion. (vect_schedule_slp_instance): Move here the calculation of number of vectorized statements for each node from... (vect_schedule_slp): ... here. (vect_transform_loop): Call vect_schedule_slp without the last argument. From-SVN: r139225
-rw-r--r--gcc/ChangeLog53
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-14.c8
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-5.c7
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-9.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c52
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c55
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c68
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c83
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c94
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c46
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c46
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c44
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c52
-rw-r--r--gcc/testsuite/lib/target-supports.exp23
-rw-r--r--gcc/tree-vect-analyze.c125
-rw-r--r--gcc/tree-vect-patterns.c5
-rw-r--r--gcc/tree-vect-transform.c663
-rw-r--r--gcc/tree-vectorizer.c158
-rw-r--r--gcc/tree-vectorizer.h23
27 files changed, 1659 insertions, 349 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 472803f..0055802 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,56 @@
+2008-08-19 Ira Rosen <irar@il.ibm.com>
+
+ * tree-vectorizer.c (supportable_widening_operation): Support
+ multi-step conversion, return the number of steps in such conversion
+ and the required intermediate types.
+ (supportable_narrowing_operation): Likewise.
+ * tree-vectorizer.h (vect_pow2): New function.
+ (supportable_widening_operation): Change argument types.
+ (supportable_narrowing_operation): Likewise.
+ (vectorizable_type_promotion): Add an argument.
+ (vectorizable_type_demotion): Likewise.
+ * tree-vect-analyze.c (vect_analyze_operations): Call
+ vectorizable_type_promotion and vectorizable_type_demotion with
+ additional argument.
+ (vect_get_and_check_slp_defs): Detect patterns.
+ (vect_build_slp_tree): Add an argument, don't fail in case of multiple
+ types.
+ (vect_analyze_slp_instance): Don't fail in case of multiple types. Call
+ vect_build_slp_tree with correct arguments. Calculate unrolling factor
+ according to the smallest type in the loop.
+ (vect_detect_hybrid_slp_stmts): Include statements from patterns.
+ * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Call
+ supportable_widening_operation with correct arguments.
+ * tree-vect-transform.c (vect_get_slp_defs): Allocate output vector
+ operands lists according to the number of vector statements in left
+ or right node, if exists.
+ (vect_gen_widened_results_half): Remove unused argument.
+ (vectorizable_conversion): Call supportable_widening_operation,
+ supportable_narrowing_operation, and vect_gen_widened_results_half
+ with correct arguments.
+ (vectorizable_assignment): Change documentation, support multiple
+ types in SLP.
+ (vectorizable_operation): Likewise.
+ (vect_get_loop_based_defs): New function.
+ (vect_create_vectorized_demotion_stmts): Likewise.
+ (vectorizable_type_demotion): Support loop-aware SLP and general
+ multi-step conversion. Call vect_get_loop_based_defs and
+ vect_create_vectorized_demotion_stmts for transformation.
+ (vect_create_vectorized_promotion_stmts): New function.
+ (vectorizable_type_promotion): Support loop-aware SLP and general
+ multi-step conversion. Call vect_create_vectorized_promotion_stmts
+ for transformation.
+ (vectorizable_store): Change documentation, support multiple
+ types in SLP.
+ (vectorizable_load): Likewise.
+ (vect_transform_stmt): Pass SLP_NODE to
+ vectorizable_type_promotion and vectorizable_type_demotion.
+ (vect_schedule_slp_instance): Move here the calculation of number
+ of vectorized statements for each node from...
+ (vect_schedule_slp): ... here.
+ (vect_transform_loop): Call vect_schedule_slp without the last
+ argument.
+
2008-08-19 Dorit Nuzman <dorit@il.ibm.com>
PR bootstrap/37152
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1a25845..ef27c4b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2008-08-19 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/slp-multitypes-1.c: New testcase.
+ * gcc.dg/vect/slp-multitypes-2.c, gcc.dg/vect/slp-multitypes-3.c,
+ gcc.dg/vect/slp-multitypes-4.c, gcc.dg/vect/slp-multitypes-5.c,
+ gcc.dg/vect/slp-multitypes-6.c, gcc.dg/vect/slp-multitypes-7.c,
+ gcc.dg/vect/slp-multitypes-8.c, gcc.dg/vect/slp-multitypes-9.c,
+ gcc.dg/vect/slp-multitypes-10.c, gcc.dg/vect/slp-multitypes-11.c,
+ gcc.dg/vect/slp-multitypes-12.c, gcc.dg/vect/slp-widen-mult-u8.c,
+ gcc.dg/vect/slp-widen-mult-s16.c, gcc.dg/vect/vect-multitypes-16.c,
+ gcc.dg/vect/vect-multitypes-17.c: Likewise.
+ * gcc.dg/vect/slp-9.c: Now vectorizable using SLP.
+ * gcc.dg/vect/slp-14.c, gcc.dg/vect/slp-5.c: Likewise.
+ * lib/target-supports.exp (check_effective_target_vect_long_long): New.
+
2008-08-18 Adam Nemet <anemet@caviumnetworks.com>
* gcc.target/mips/ext-1.c: Add -mgp64 to dg-mips-options.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-14.c b/gcc/testsuite/gcc.dg/vect/slp-14.c
index 62610dc..2051e28 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-14.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-14.c
@@ -15,7 +15,7 @@ main1 (int n)
unsigned short in2[N*16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned short out2[N*16];
- /* Multiple types are not SLPable yet. */
+ /* Multiple types are now SLPable. */
for (i = 0; i < n; i++)
{
a0 = in[i*8] + 5;
@@ -110,9 +110,7 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_strided && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-5.c b/gcc/testsuite/gcc.dg/vect/slp-5.c
index 57e9e5d..033de77 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-5.c
@@ -15,7 +15,7 @@ main1 ()
unsigned short ia[N];
unsigned int ib[N*2];
- /* Not SLPable for now: multiple types with SLP of the smaller type. */
+ /* Multiple types with SLP of the smaller type. */
for (i = 0; i < N; i++)
{
out[i*8] = in[i*8];
@@ -121,8 +121,7 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided_wide } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_strided_wide } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-9.c b/gcc/testsuite/gcc.dg/vect/slp-9.c
index cfb30bd..461f32e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-9.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-9.c
@@ -41,7 +41,7 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_widen_mult_hi_to_si } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } }*/
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
new file mode 100644
index 0000000..a3b93b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned short sout[N*8];
+ unsigned int iout[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ sout[i*4] = 8;
+ sout[i*4 + 1] = 18;
+ sout[i*4 + 2] = 28;
+ sout[i*4 + 3] = 38;
+
+ iout[i*4] = 8;
+ iout[i*4 + 1] = 18;
+ iout[i*4 + 2] = 28;
+ iout[i*4 + 3] = 38;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (sout[i*4] != 8
+ || sout[i*4 + 1] != 18
+ || sout[i*4 + 2] != 28
+ || sout[i*4 + 3] != 38
+ || iout[i*4] != 8
+ || iout[i*4 + 1] != 18
+ || iout[i*4 + 2] != 28
+ || iout[i*4 + 3] != 38)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
new file mode 100644
index 0000000..2827401
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+struct s
+{
+ unsigned char a;
+ unsigned char b;
+};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ struct s out[N*4];
+
+ for (i = 0; i < N*4; i++)
+ {
+ out[i].a = (unsigned char) in[i*2] + 1;
+ out[i].b = (unsigned char) in[i*2 + 1] + 2;
+ }
+
+ /* check results: */
+ for (i = 0; i < N*4; i++)
+ {
+ if (out[i].a != (unsigned char) in[i*2] + 1
+ || out[i].b != (unsigned char) in[i*2 + 1] + 2)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
new file mode 100644
index 0000000..2a04ce0c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 18
+
+struct s
+{
+ int a;
+ int b;
+ int c;
+};
+
+char in[N*3] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ struct s out[N];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i].a = (int) in[i*3] + 1;
+ out[i].b = (int) in[i*3 + 1] + 2;
+ out[i].c = (int) in[i*3 + 2] + 3;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i].a != (int) in[i*3] + 1
+ || out[i].b != (int) in[i*3 + 1] + 2
+ || out[i].c != (int) in[i*3 + 2] + 3)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
new file mode 100644
index 0000000..5d2140f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
@@ -0,0 +1,68 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned short sout[N*8];
+ unsigned int iout[N*8];
+ unsigned char cout[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ sout[i*4] = 8;
+ sout[i*4 + 1] = 18;
+ sout[i*4 + 2] = 28;
+ sout[i*4 + 3] = 38;
+
+ iout[i*4] = 8;
+ iout[i*4 + 1] = 18;
+ iout[i*4 + 2] = 28;
+ iout[i*4 + 3] = 38;
+
+ cout[i*4] = 1;
+ cout[i*4 + 1] = 2;
+ cout[i*4 + 2] = 3;
+ cout[i*4 + 3] = 4;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (sout[i*4] != 8
+ || sout[i*4 + 1] != 18
+ || sout[i*4 + 2] != 28
+ || sout[i*4 + 3] != 38
+ || iout[i*4] != 8
+ || iout[i*4 + 1] != 18
+ || iout[i*4 + 2] != 28
+ || iout[i*4 + 3] != 38
+ || cout[i*4] != 1
+ || cout[i*4 + 1] != 2
+ || cout[i*4 + 2] != 3
+ || cout[i*4 + 3] != 4)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
new file mode 100644
index 0000000..02faec7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
@@ -0,0 +1,83 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__ ((noinline)) int
+main1 (unsigned short a0, unsigned short a1, unsigned short a2,
+ unsigned short a3, unsigned short a4, unsigned short a5,
+ unsigned short a6, unsigned short a7, unsigned short a8,
+ unsigned short a9, unsigned short a10, unsigned short a11,
+ unsigned short a12, unsigned short a13, unsigned short a14,
+ unsigned short a15, unsigned char b0, unsigned char b1)
+{
+ int i;
+ unsigned short out[N*16];
+ unsigned char out2[N*16];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i*16] = a8;
+ out[i*16 + 1] = a7;
+ out[i*16 + 2] = a1;
+ out[i*16 + 3] = a2;
+ out[i*16 + 4] = a8;
+ out[i*16 + 5] = a5;
+ out[i*16 + 6] = a5;
+ out[i*16 + 7] = a4;
+ out[i*16 + 8] = a12;
+ out[i*16 + 9] = a13;
+ out[i*16 + 10] = a14;
+ out[i*16 + 11] = a15;
+ out[i*16 + 12] = a6;
+ out[i*16 + 13] = a9;
+ out[i*16 + 14] = a0;
+ out[i*16 + 15] = a7;
+
+ out2[i*2] = b1;
+ out2[i*2+1] = b0;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*16] != a8
+ || out[i*16 + 1] != a7
+ || out[i*16 + 2] != a1
+ || out[i*16 + 3] != a2
+ || out[i*16 + 4] != a8
+ || out[i*16 + 5] != a5
+ || out[i*16 + 6] != a5
+ || out[i*16 + 7] != a4
+ || out[i*16 + 8] != a12
+ || out[i*16 + 9] != a13
+ || out[i*16 + 10] != a14
+ || out[i*16 + 11] != a15
+ || out[i*16 + 12] != a6
+ || out[i*16 + 13] != a9
+ || out[i*16 + 14] != a0
+ || out[i*16 + 15] != a7
+ || out2[i*2] != b1
+ || out2[i*2 + 1] != b0)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,20,21);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c
new file mode 100644
index 0000000..0764441
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c
@@ -0,0 +1,94 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+unsigned char in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned int out[N*8];
+ unsigned char out2[N*8];
+
+ for (i = 0; i < N/2; i++)
+ {
+ out[i*8] = in[i*8] + 5;
+ out[i*8 + 1] = in[i*8 + 1] + 6;
+ out[i*8 + 2] = in[i*8 + 2] + 7;
+ out[i*8 + 3] = in[i*8 + 3] + 8;
+ out[i*8 + 4] = in[i*8 + 4] + 9;
+ out[i*8 + 5] = in[i*8 + 5] + 10;
+ out[i*8 + 6] = in[i*8 + 6] + 11;
+ out[i*8 + 7] = in[i*8 + 7] + 12;
+
+ out2[i*16] = in2[i*16] + 2;
+ out2[i*16 + 1] = in2[i*16 + 1] + 3;
+ out2[i*16 + 2] = in2[i*16 + 2] + 4;
+ out2[i*16 + 3] = in2[i*16 + 3] + 3;
+ out2[i*16 + 4] = in2[i*16 + 4] + 2;
+ out2[i*16 + 5] = in2[i*16 + 5] + 3;
+ out2[i*16 + 6] = in2[i*16 + 6] + 2;
+ out2[i*16 + 7] = in2[i*16 + 7] + 4;
+ out2[i*16 + 8] = in2[i*16 + 8] + 2;
+ out2[i*16 + 9] = in2[i*16 + 9] + 5;
+ out2[i*16 + 10] = in2[i*16 + 10] + 2;
+ out2[i*16 + 11] = in2[i*16 + 11] + 3;
+ out2[i*16 + 12] = in2[i*16 + 12] + 4;
+ out2[i*16 + 13] = in2[i*16 + 13] + 4;
+ out2[i*16 + 14] = in2[i*16 + 14] + 3;
+ out2[i*16 + 15] = in2[i*16 + 15] + 2;
+
+ }
+
+ /* check results: */
+ for (i = 0; i < N/2; i++)
+ {
+ if (out[i*8] != in[i*8] + 5
+ || out[i*8 + 1] != in[i*8 + 1] + 6
+ || out[i*8 + 2] != in[i*8 + 2] + 7
+ || out[i*8 + 3] != in[i*8 + 3] + 8
+ || out[i*8 + 4] != in[i*8 + 4] + 9
+ || out[i*8 + 5] != in[i*8 + 5] + 10
+ || out[i*8 + 6] != in[i*8 + 6] + 11
+ || out[i*8 + 7] != in[i*8 + 7] + 12
+ || out2[i*16] != in2[i*16] + 2
+ || out2[i*16 + 1] != in2[i*16 + 1] + 3
+ || out2[i*16 + 2] != in2[i*16 + 2] + 4
+ || out2[i*16 + 3] != in2[i*16 + 3] + 3
+ || out2[i*16 + 4] != in2[i*16 + 4] + 2
+ || out2[i*16 + 5] != in2[i*16 + 5] + 3
+ || out2[i*16 + 6] != in2[i*16 + 6] + 2
+ || out2[i*16 + 7] != in2[i*16 + 7] + 4
+ || out2[i*16 + 8] != in2[i*16 + 8] + 2
+ || out2[i*16 + 9] != in2[i*16 + 9] + 5
+ || out2[i*16 + 10] != in2[i*16 + 10] + 2
+ || out2[i*16 + 11] != in2[i*16 + 11] + 3
+ || out2[i*16 + 12] != in2[i*16 + 12] + 4
+ || out2[i*16 + 13] != in2[i*16 + 13] + 4
+ || out2[i*16 + 14] != in2[i*16 + 14] + 3
+ || out2[i*16 + 15] != in2[i*16 + 15] + 2)
+
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
new file mode 100644
index 0000000..770ccfc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ int out[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i*8] = (int) in[i*8] + 1;
+ out[i*8 + 1] = (int) in[i*8 + 1] + 2;
+ out[i*8 + 2] = (int) in[i*8 + 2] + 3;
+ out[i*8 + 3] = (int) in[i*8 + 3] + 4;
+ out[i*8 + 4] = (int) in[i*8 + 4] + 5;
+ out[i*8 + 5] = (int) in[i*8 + 5] + 6;
+ out[i*8 + 6] = (int) in[i*8 + 6] + 7;
+ out[i*8 + 7] = (int) in[i*8 + 7] + 8;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != (int) in[i*8] + 1
+ || out[i*8 + 1] != (int) in[i*8 + 1] + 2
+ || out[i*8 + 2] != (int) in[i*8 + 2] + 3
+ || out[i*8 + 3] != (int) in[i*8 + 3] + 4
+ || out[i*8 + 4] != (int) in[i*8 + 4] + 5
+ || out[i*8 + 5] != (int) in[i*8 + 5] + 6
+ || out[i*8 + 6] != (int) in[i*8 + 6] + 7
+ || out[i*8 + 7] != (int) in[i*8 + 7] + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
new file mode 100644
index 0000000..869d87d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ int out[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i*8] = (short) in[i*8] + 1;
+ out[i*8 + 1] = (short) in[i*8 + 1] + 2;
+ out[i*8 + 2] = (short) in[i*8 + 2] + 3;
+ out[i*8 + 3] = (short) in[i*8 + 3] + 4;
+ out[i*8 + 4] = (short) in[i*8 + 4] + 5;
+ out[i*8 + 5] = (short) in[i*8 + 5] + 6;
+ out[i*8 + 6] = (short) in[i*8 + 6] + 7;
+ out[i*8 + 7] = (short) in[i*8 + 7] + 8;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != (short) in[i*8] + 1
+ || out[i*8 + 1] != (short) in[i*8 + 1] + 2
+ || out[i*8 + 2] != (short) in[i*8 + 2] + 3
+ || out[i*8 + 3] != (short) in[i*8 + 3] + 4
+ || out[i*8 + 4] != (short) in[i*8 + 4] + 5
+ || out[i*8 + 5] != (short) in[i*8 + 5] + 6
+ || out[i*8 + 6] != (short) in[i*8 + 6] + 7
+ || out[i*8 + 7] != (short) in[i*8 + 7] + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c
new file mode 100644
index 0000000..34f4f0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned char out[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i*8] = (unsigned char) in[i*8] + 1;
+ out[i*8 + 1] = (unsigned char) in[i*8 + 1] + 2;
+ out[i*8 + 2] = (unsigned char) in[i*8 + 2] + 3;
+ out[i*8 + 3] = (unsigned char) in[i*8 + 3] + 4;
+ out[i*8 + 4] = (unsigned char) in[i*8 + 4] + 5;
+ out[i*8 + 5] = (unsigned char) in[i*8 + 5] + 6;
+ out[i*8 + 6] = (unsigned char) in[i*8 + 6] + 7;
+ out[i*8 + 7] = (unsigned char) in[i*8 + 7] + 8;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != (unsigned char) in[i*8] + 1
+ || out[i*8 + 1] != (unsigned char) in[i*8 + 1] + 2
+ || out[i*8 + 2] != (unsigned char) in[i*8 + 2] + 3
+ || out[i*8 + 3] != (unsigned char) in[i*8 + 3] + 4
+ || out[i*8 + 4] != (unsigned char) in[i*8 + 4] + 5
+ || out[i*8 + 5] != (unsigned char) in[i*8 + 5] + 6
+ || out[i*8 + 6] != (unsigned char) in[i*8 + 6] + 7
+ || out[i*8 + 7] != (unsigned char) in[i*8 + 7] + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c
new file mode 100644
index 0000000..8021c49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+char in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ int out[N*8];
+
+ for (i = 0; i < N; i++)
+ {
+ out[i*8] = (int) in[i*8] + 1;
+ out[i*8 + 1] = (int) in[i*8 + 1] + 2;
+ out[i*8 + 2] = (int) in[i*8 + 2] + 3;
+ out[i*8 + 3] = (int) in[i*8 + 3] + 4;
+ out[i*8 + 4] = (int) in[i*8 + 4] + 5;
+ out[i*8 + 5] = (int) in[i*8 + 5] + 6;
+ out[i*8 + 6] = (int) in[i*8 + 6] + 7;
+ out[i*8 + 7] = (int) in[i*8 + 7] + 8;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (out[i*8] != (int) in[i*8] + 1
+ || out[i*8 + 1] != (int) in[i*8 + 1] + 2
+ || out[i*8 + 2] != (int) in[i*8 + 2] + 3
+ || out[i*8 + 3] != (int) in[i*8 + 3] + 4
+ || out[i*8 + 4] != (int) in[i*8 + 4] + 5
+ || out[i*8 + 5] != (int) in[i*8 + 5] + 6
+ || out[i*8 + 6] != (int) in[i*8 + 6] + 7
+ || out[i*8 + 7] != (int) in[i*8 + 7] + 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
new file mode 100644
index 0000000..3d3340a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+char in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ int out[N*8];
+
+ for (i = 0; i < N*4; i++)
+ {
+ out[i*2] = (int) in[i*2] + 1;
+ out[i*2 + 1] = (int) in[i*2 + 1] + 2;
+ }
+
+ /* check results: */
+ for (i = 0; i < N*4; i++)
+ {
+ if (out[i*2] != (int) in[i*2] + 1
+ || out[i*2 + 1] != (int) in[i*2 + 1] + 2)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
new file mode 100644
index 0000000..7d317e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 8
+
+unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+__attribute__ ((noinline)) int
+main1 ()
+{
+ int i;
+ unsigned char out[N*8];
+
+ for (i = 0; i < N*4; i++)
+ {
+ out[i*2] = (unsigned char) in[i*2] + 1;
+ out[i*2 + 1] = (unsigned char) in[i*2 + 1] + 2;
+ }
+
+ /* check results: */
+ for (i = 0; i < N*4; i++)
+ {
+ if (out[i*2] != (unsigned char) in[i*2] + 1
+ || out[i*2 + 1] != (unsigned char) in[i*2 + 1] + 2)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
new file mode 100644
index 0000000..ebc4b50
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+short X[N] __attribute__ ((__aligned__(16)));
+short Y[N] __attribute__ ((__aligned__(16)));
+int result[N];
+
+/* short->int widening-mult */
+__attribute__ ((noinline)) int
+foo1(int len) {
+ int i;
+
+ for (i=0; i<len/2; i++) {
+ result[2*i] = X[2*i] * Y[2*i];
+ result[2*i+1] = X[2*i+1] * Y[2*i+1];
+ }
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i=0; i<N; i++) {
+ X[i] = i;
+ Y[i] = 64-i;
+ }
+
+ foo1 (N);
+
+ for (i=0; i<N; i++) {
+ if (result[i] != X[i] * Y[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
new file mode 100644
index 0000000..d5104218
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+unsigned char X[N] __attribute__ ((__aligned__(16)));
+unsigned char Y[N] __attribute__ ((__aligned__(16)));
+unsigned short result[N];
+
+/* char->short widening-mult */
+__attribute__ ((noinline)) int
+foo1(int len) {
+ int i;
+
+ for (i=0; i<len/2; i++) {
+ result[2*i] = X[2*i] * Y[2*i];
+ result[2*i+1] = X[2*i+1] * Y[2*i+1];
+ }
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i=0; i<N; i++) {
+ X[i] = i;
+ Y[i] = 64-i;
+ }
+
+ foo1 (N);
+
+ for (i=0; i<N; i++) {
+ if (result[i] != X[i] * Y[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
new file mode 100644
index 0000000..171de50
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_long_long } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+char x[N] __attribute__ ((__aligned__(16)));
+
+__attribute__ ((noinline)) int
+foo (int len, long long *z) {
+ int i;
+
+ for (i=0; i<len; i++) {
+ z[i] = x[i];
+ }
+}
+
+
+int main (void)
+{
+ char i;
+ long long z[N+4];
+
+ check_vect ();
+
+ for (i=0; i<N; i++) {
+ x[i] = i;
+ }
+
+ foo (N,z+2);
+
+ for (i=0; i<N; i++) {
+ if (z[i+2] != x[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c
new file mode 100644
index 0000000..61670e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_long_long } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+unsigned char uX[N] __attribute__ ((__aligned__(16)));
+unsigned char uresultX[N];
+unsigned long long uY[N] __attribute__ ((__aligned__(16)));
+unsigned char uresultY[N];
+
+/* Unsigned type demotion (si->qi) */
+
+__attribute__ ((noinline)) int
+foo1(int len) {
+ int i;
+
+ for (i=0; i<len; i++) {
+ uresultX[i] = uX[i];
+ uresultY[i] = (unsigned char)uY[i];
+ }
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i=0; i<N; i++) {
+ uX[i] = 16-i;
+ uY[i] = 16-i;
+ if (i%5 == 0)
+ uX[i] = 16-i;
+ }
+
+ foo1 (N);
+
+ for (i=0; i<N; i++) {
+ if (uresultX[i] != uX[i])
+ abort ();
+ if (uresultY[i] != (unsigned char)uY[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 2c9165c..78c143c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1526,6 +1526,29 @@ proc check_effective_target_vect_double { } {
return $et_vect_double_saved
}
+# Return 1 if the target supports hardware vectors of long long, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_long_long { } {
+ global et_vect_long_long_saved
+
+ if [info exists et_vect_long_long_saved] {
+ verbose "check_effective_target_vect_long_long: using cached result" 2
+ } else {
+ set et_vect_long_long_saved 0
+ if { [istarget i?86-*-*]
+ || [istarget x86_64-*-*]
+ || [istarget spu-*-*] } {
+ set et_vect_long_long_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_long_long: returning $et_vect_long_long_saved" 2
+ return $et_vect_long_long_saved
+}
+
+
# Return 1 if the target plus current options does not support a vector
# max instruction on "int", 0 otherwise.
#
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index c9753a0..fcb9cbd 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -462,8 +462,8 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
ok = true;
if (STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
- ok = (vectorizable_type_promotion (stmt, NULL, NULL)
- || vectorizable_type_demotion (stmt, NULL, NULL)
+ ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
+ || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
|| vectorizable_conversion (stmt, NULL, NULL, NULL)
|| vectorizable_operation (stmt, NULL, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL, NULL)
@@ -2497,7 +2497,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
tree *first_stmt_def0_type,
tree *first_stmt_def1_type,
tree *first_stmt_const_oprnd,
- int ncopies_for_cost)
+ int ncopies_for_cost,
+ bool *pattern0, bool *pattern1)
{
tree oprnd;
unsigned int i, number_of_oprnds;
@@ -2527,6 +2528,58 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
return false;
}
+ /* Check if DEF_STMT is a part of a pattern and get the def stmt from
+ the pattern. Check that all the stmts of the node are in the
+ pattern. */
+ if (def_stmt && vinfo_for_stmt (def_stmt)
+ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
+ {
+ if (!*first_stmt_dt0)
+ *pattern0 = true;
+ else
+ {
+ if (i == 1 && !*first_stmt_dt1)
+ *pattern1 = true;
+ else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "Build SLP failed: some of the stmts"
+ " are in a pattern, and others are not ");
+ print_generic_expr (vect_dump, oprnd, TDF_SLIM);
+ }
+
+ return false;
+ }
+ }
+
+ def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+ dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
+
+ if (*dt == vect_unknown_def_type)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Unsupported pattern.");
+ return false;
+ }
+
+ switch (gimple_code (def_stmt))
+ {
+ case GIMPLE_PHI:
+ def = gimple_phi_result (def_stmt);
+ break;
+
+ case GIMPLE_ASSIGN:
+ def = gimple_assign_lhs (def_stmt);
+ break;
+
+ default:
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unsupported defining stmt: ");
+ return false;
+ }
+ }
+
if (!*first_stmt_dt0)
{
/* op0 of the first stmt of the group - store its info. */
@@ -2624,15 +2677,13 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
/* Recursively build an SLP tree starting from NODE.
Fail (and return FALSE) if def-stmts are not isomorphic, require data
permutation or are of unsupported types of operation. Otherwise, return
- TRUE.
- SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
- in the case of multiple types for now. */
+ TRUE. */
static bool
vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
- unsigned int group_size, bool *slp_impossible,
+ unsigned int group_size,
int *inside_cost, int *outside_cost,
- int ncopies_for_cost)
+ int ncopies_for_cost, unsigned int *max_nunits)
{
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
@@ -2653,6 +2704,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
enum machine_mode vec_mode;
tree first_stmt_const_oprnd = NULL_TREE;
struct data_reference *first_dr;
+ bool pattern0 = false, pattern1 = false;
/* For every stmt in NODE find its def stmt/s. */
for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
@@ -2691,16 +2743,13 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
- if (ncopies > 1)
- {
- /* FORNOW. */
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP failed - multiple types ");
-
- *slp_impossible = true;
- return false;
- }
+ if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
+ fprintf (vect_dump, "SLP with multiple types ");
+ /* In case of multiple types we need to detect the smallest type. */
+ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
if (is_gimple_call (stmt))
rhs_code = CALL_EXPR;
else
@@ -2799,7 +2848,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
&first_stmt_def0_type,
&first_stmt_def1_type,
&first_stmt_const_oprnd,
- ncopies_for_cost))
+ ncopies_for_cost,
+ &pattern0, &pattern1))
return false;
}
else
@@ -2807,6 +2857,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
/* Load. */
if (i == 0)
{
+ /* In case of multiple types we need to detect the smallest
+ type. */
+ if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
+ *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
/* First stmt of the SLP group should be the first load of
the interleaving loop if data permutation is not allowed.
Check that there is no gap between the loads. */
@@ -2905,7 +2960,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
&first_stmt_def0_type,
&first_stmt_def1_type,
&first_stmt_const_oprnd,
- ncopies_for_cost))
+ ncopies_for_cost,
+ &pattern0, &pattern1))
return false;
}
}
@@ -2929,8 +2985,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
- slp_impossible, inside_cost, outside_cost,
- ncopies_for_cost))
+ inside_cost, outside_cost,
+ ncopies_for_cost, max_nunits))
return false;
SLP_TREE_LEFT (*node) = left_node;
@@ -2946,8 +3002,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
- slp_impossible, inside_cost, outside_cost,
- ncopies_for_cost))
+ inside_cost, outside_cost,
+ ncopies_for_cost, max_nunits))
return false;
SLP_TREE_RIGHT (*node) = right_node;
@@ -3003,7 +3059,7 @@ vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
/* Analyze an SLP instance starting from a group of strided stores. Call
- vect_build_slp_tree to build a tree of packed stmts if possible.
+ vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the loop. */
static bool
@@ -3018,8 +3074,8 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
unsigned int vectorization_factor = 0, ncopies;
bool slp_impossible = false;
int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
+ unsigned int max_nunits = 0;
- /* FORNOW: multiple types are not supported. */
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
@@ -3035,13 +3091,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
nunits = TYPE_VECTOR_SUBPARTS (vectype);
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
ncopies = vectorization_factor / nunits;
- if (ncopies > 1)
- {
- if (vect_print_dump_info (REPORT_SLP))
- fprintf (vect_dump, "SLP failed - multiple types ");
-
- return false;
- }
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
@@ -3069,13 +3118,18 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
ncopies_for_cost = unrolling_factor * group_size / nunits;
/* Build the tree for the SLP instance. */
- if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
- &inside_cost, &outside_cost, ncopies_for_cost))
+ if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
+ &outside_cost, ncopies_for_cost, &max_nunits))
{
/* Create a new SLP instance. */
new_instance = XNEW (struct _slp_instance);
SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+ /* Calculate the unrolling factor based on the smallest type. */
+ if (max_nunits > nunits)
+ unrolling_factor = least_common_multiple (max_nunits, group_size)
+ / group_size;
+
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
@@ -3181,7 +3235,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node)
&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
if (vinfo_for_stmt (use_stmt)
- && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
+ && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
+ && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
vect_mark_slp_stmts (node, hybrid, i);
vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index d5bff5a..8486775 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -374,7 +374,8 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
tree dummy;
tree var;
enum tree_code dummy_code;
- bool dummy_bool;
+ int dummy_int;
+ VEC (tree, heap) *dummy_vec;
if (!is_gimple_assign (last_stmt))
return NULL;
@@ -415,7 +416,7 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
if (!vectype
|| !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype,
&dummy, &dummy, &dummy_code,
- &dummy_code, &dummy_bool, &dummy))
+ &dummy_code, &dummy_int, &dummy_vec))
return NULL;
*type_in = vectype;
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index ab7c6bf..9736633 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -1492,7 +1492,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
/* Get vectorized definitions from SLP_NODE that contains corresponding
vectorized def-stmts. */
-
+
static void
vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
{
@@ -1502,7 +1502,7 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
- for (i = 0;
+ for (i = 0;
VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
i++)
{
@@ -1520,7 +1520,7 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
vect_get_slp_vect_defs() to retrieve them.
If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
- the right node. This is used when the second operand must remain scalar. */
+ the right node. This is used when the second operand must remain scalar. */
static void
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
@@ -1528,15 +1528,22 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
{
gimple first_stmt;
enum tree_code code;
+ int number_of_vects;
+
+ /* The number of vector defs is determined by the number of vector statements
+ in the node from which we get those statements. */
+ if (SLP_TREE_LEFT (slp_node))
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
+ else
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Allocate memory for vectorized defs. */
- *vec_oprnds0 = VEC_alloc (tree, heap,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
- /* SLP_NODE corresponds either to a group of stores or to a group of
+ /* SLP_NODE corresponds either to a group of stores or to a group of
unary/binary operations. We don't call this function for loads. */
- if (SLP_TREE_LEFT (slp_node))
- /* The defs are already vectorized. */
+ if (SLP_TREE_LEFT (slp_node))
+ /* The defs are already vectorized. */
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
else
/* Build vectors from scalar defs. */
@@ -1544,7 +1551,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
- /* Since we don't call this function with loads, this is a group of
+ /* Since we don't call this function with loads, this is a group of
stores. */
return;
@@ -1552,11 +1559,17 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
return;
- *vec_oprnds1 = VEC_alloc (tree, heap,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
+ /* The number of vector defs is determined by the number of vector statements
+ in the node from which we get those statements. */
+ if (SLP_TREE_RIGHT (slp_node))
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
+ else
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
+ *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
if (SLP_TREE_RIGHT (slp_node))
- /* The defs are already vectorized. */
+ /* The defs are already vectorized. */
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
else
/* Build vectors from scalar defs. */
@@ -3425,7 +3438,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
/* Function vect_gen_widened_results_half
Create a vector stmt whose code, type, number of arguments, and result
- variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
+ variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
In the case that CODE is a CALL_EXPR, this means that a call to DECL
needs to be created (DECL is a function-decl of a target-builtin).
@@ -3433,7 +3446,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
static gimple
vect_gen_widened_results_half (enum tree_code code,
- tree vectype ATTRIBUTE_UNUSED,
tree decl,
tree vec_oprnd0, tree vec_oprnd1, int op_type,
tree vec_dest, gimple_stmt_iterator *gsi,
@@ -3517,8 +3529,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
VEC(tree,heap) *vec_oprnds0 = NULL;
tree vop0;
tree integral_type;
- tree dummy;
- bool dummy_bool;
+ VEC(tree,heap) *dummy = NULL;
+ int dummy_int;
/* Is STMT a vectorizable conversion? */
@@ -3602,10 +3614,10 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
&& !supportable_widening_operation (code, stmt, vectype_in,
&decl1, &decl2,
&code1, &code2,
- &dummy_bool, &dummy))
+ &dummy_int, &dummy))
|| (modifier == NARROW
&& !supportable_narrowing_operation (code, stmt, vectype_in,
- &code1, &dummy_bool, &dummy)))
+ &code1, &dummy_int, &dummy)))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "conversion not supported by target.");
@@ -3646,7 +3658,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
ssa_op_iter iter;
if (j == 0)
- vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
+ vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
else
vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
@@ -3694,7 +3706,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
/* Generate first half of the widened result: */
new_stmt
- = vect_gen_widened_results_half (code1, vectype_out, decl1,
+ = vect_gen_widened_results_half (code1, decl1,
vec_oprnd0, vec_oprnd1,
unary_op, vec_dest, gsi, stmt);
if (j == 0)
@@ -3705,7 +3717,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
/* Generate second half of the widened result: */
new_stmt
- = vect_gen_widened_results_half (code2, vectype_out, decl2,
+ = vect_gen_widened_results_half (code2, decl2,
vec_oprnd0, vec_oprnd1,
unary_op, vec_dest, gsi, stmt);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
@@ -3780,15 +3792,18 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int i;
VEC(tree,heap) *vec_oprnds = NULL;
tree vop;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis
- verifies this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp_node)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
if (ncopies > 1)
@@ -3978,7 +3993,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
int nunits_out;
tree vectype_out;
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+ int ncopies;
int j, i;
VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
tree vop0, vop1;
@@ -3986,10 +4001,14 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
bool shift_p = false;
bool scalar_shift_arg = false;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp_node)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
gcc_assert (ncopies >= 1);
if (!STMT_VINFO_RELEVANT_P (stmt_info))
@@ -4276,6 +4295,9 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
}
+ if (slp_node)
+ continue;
+
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
@@ -4291,6 +4313,109 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
}
+/* Get vectorized definitions for loop-based vectorization. For the first
+ operand we call vect_get_vec_def_for_operand() (with OPRND containing
+ scalar operand), and for the rest we get a copy with
+ vect_get_vec_def_for_stmt_copy() using the previous vector definition
+ (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
+ The vectors are collected into VEC_OPRNDS. */
+
+static void
+vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
+ VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
+{
+ tree vec_oprnd;
+
+ /* Get first vector operand. */
+ /* All the vector operands except the very first one (that is scalar oprnd)
+ are stmt copies. */
+ if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
+ vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
+ else
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
+
+ VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
+
+ /* Get second vector operand. */
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+ VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
+
+ *oprnd = vec_oprnd;
+
+ /* For conversion in multiple steps, continue to get operands
+ recursively. */
+ if (multi_step_cvt)
+ vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
+}
+
+
+/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
+ For multi-step conversions store the resulting vectors and call the function
+ recursively. */
+
+static void
+vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
+ int multi_step_cvt, gimple stmt,
+ VEC (tree, heap) *vec_dsts,
+ gimple_stmt_iterator *gsi,
+ slp_tree slp_node, enum tree_code code,
+ stmt_vec_info *prev_stmt_info)
+{
+ unsigned int i;
+ tree vop0, vop1, new_tmp, vec_dest;
+ gimple new_stmt;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ vec_dest = VEC_pop (tree, vec_dsts);
+
+ for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
+ {
+ /* Create demotion operation. */
+ vop0 = VEC_index (tree, *vec_oprnds, i);
+ vop1 = VEC_index (tree, *vec_oprnds, i + 1);
+ new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
+ new_tmp = make_ssa_name (vec_dest, new_stmt);
+ gimple_assign_set_lhs (new_stmt, new_tmp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+ if (multi_step_cvt)
+ /* Store the resulting vector for next recursive call. */
+ VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
+ else
+ {
+ /* This is the last step of the conversion sequence. Store the
+ vectors in SLP_NODE or in vector info of the scalar statement
+ (or in STMT_VINFO_RELATED_STMT chain). */
+ if (slp_node)
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+ else
+ {
+ if (!*prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
+
+ *prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+ }
+ }
+
+ /* For multi-step demotion operations we first generate demotion operations
+ from the source type to the intermediate types, and then combine the
+ results (stored in VEC_OPRNDS) in demotion operation to the destination
+ type. */
+ if (multi_step_cvt)
+ {
+ /* At each level of recursion we have have of the operands we had at the
+ previous level. */
+ VEC_truncate (tree, *vec_oprnds, (i+1)/2);
+ vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
+ stmt, vec_dsts, gsi, slp_node,
+ code, prev_stmt_info);
+ }
+}
+
+
/* Function vectorizable_type_demotion
Check if STMT performs a binary or unary operation that involves
@@ -4301,31 +4426,28 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
bool
vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
- gimple *vec_stmt)
+ gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
tree op0;
- tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code, code1 = ERROR_MARK;
- tree new_temp;
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
- gimple new_stmt;
stmt_vec_info prev_stmt_info;
int nunits_in;
int nunits_out;
tree vectype_out;
int ncopies;
- int j;
+ int j, i;
tree vectype_in;
- tree intermediate_type = NULL_TREE, narrow_type, double_vec_dest;
- bool double_op = false;
- tree first_vector, second_vector;
- tree vec_oprnd2 = NULL_TREE, vec_oprnd3 = NULL_TREE, last_oprnd = NULL_TREE;
+ int multi_step_cvt = 0;
+ VEC (tree, heap) *vec_oprnds0 = NULL;
+ VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+ tree last_oprnd, intermediate_type;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -4355,11 +4477,17 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
if (!vectype_out)
return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_in != nunits_out / 2
- && nunits_in != nunits_out/4)
+ if (nunits_in >= nunits_out)
return false;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
+ if (slp_node)
+ ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+
gcc_assert (ncopies >= 1);
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
@@ -4379,7 +4507,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Supportable by target? */
if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
- &double_op, &intermediate_type))
+ &multi_step_cvt, &interm_types))
return false;
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
@@ -4398,89 +4526,157 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
ncopies);
- /* Handle def. */
- /* In case of double demotion, we first generate demotion operation to the
- intermediate type, and then from that type to the final one. */
- if (double_op)
- narrow_type = intermediate_type;
+ /* In case of multi-step demotion, we first generate demotion operations to
+ the intermediate types, and then from that types to the final one.
+ We create vector destinations for the intermediate type (TYPES) received
+ from supportable_narrowing_operation, and store them in the correct order
+ for future use in vect_create_vectorized_demotion_stmts(). */
+ if (multi_step_cvt)
+ vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
else
- narrow_type = vectype_out;
- vec_dest = vect_create_destination_var (scalar_dest, narrow_type);
- double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ vec_dsts = VEC_alloc (tree, heap, 1);
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+
+ if (multi_step_cvt)
+ {
+ for (i = VEC_length (tree, interm_types) - 1;
+ VEC_iterate (tree, interm_types, i, intermediate_type); i--)
+ {
+ vec_dest = vect_create_destination_var (scalar_dest,
+ intermediate_type);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+ }
+ }
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. */
+ last_oprnd = op0;
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
/* Handle uses. */
- if (j == 0)
- {
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- if (double_op)
- {
- /* For double demotion we need four operands. */
- vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
- vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2);
- }
- }
+ if (slp_node)
+ vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
else
- {
- vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], last_oprnd);
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- if (double_op)
- {
- /* For double demotion we need four operands. */
- vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
- vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2);
- }
- }
+ {
+ VEC_free (tree, heap, vec_oprnds0);
+ vec_oprnds0 = VEC_alloc (tree, heap,
+ (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
+ vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
+ vect_pow2 (multi_step_cvt) - 1);
+ }
/* Arguments are ready. Create the new vector stmts. */
- new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
- vec_oprnd1);
- first_vector = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, first_vector);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
+ vect_create_vectorized_demotion_stmts (&vec_oprnds0,
+ multi_step_cvt, stmt, tmp_vec_dsts,
+ gsi, slp_node, code1,
+ &prev_stmt_info);
+ }
+
+ VEC_free (tree, heap, vec_oprnds0);
+ VEC_free (tree, heap, vec_dsts);
+ VEC_free (tree, heap, tmp_vec_dsts);
+ VEC_free (tree, heap, interm_types);
+
+ *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ return true;
+}
- /* In the next iteration we will get copy for this operand. */
- last_oprnd = vec_oprnd1;
- if (double_op)
+/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
+ and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
+ the resulting vectors and call the function recursively. */
+
+static void
+vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
+ VEC (tree, heap) **vec_oprnds1,
+ int multi_step_cvt, gimple stmt,
+ VEC (tree, heap) *vec_dsts,
+ gimple_stmt_iterator *gsi,
+ slp_tree slp_node, enum tree_code code1,
+ enum tree_code code2, tree decl1,
+ tree decl2, int op_type,
+ stmt_vec_info *prev_stmt_info)
+{
+ int i;
+ tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
+ gimple new_stmt1, new_stmt2;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ VEC (tree, heap) *vec_tmp;
+
+ vec_dest = VEC_pop (tree, vec_dsts);
+ vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
+
+ for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
+ {
+ if (op_type == binary_op)
+ vop1 = VEC_index (tree, *vec_oprnds1, i);
+ else
+ vop1 = NULL_TREE;
+
+ /* Generate the two halves of promotion operation. */
+ new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
+ op_type, vec_dest, gsi, stmt);
+ new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
+ op_type, vec_dest, gsi, stmt);
+ if (is_gimple_call (new_stmt1))
{
- /* For double demotion operation we first generate two demotion
- operations from the source type to the intermediate type, and
- then combine the results in one demotion to the destination
- type. */
- new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd2,
- vec_oprnd3);
- second_vector = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, second_vector);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
- new_stmt = gimple_build_assign_with_ops (code1, double_vec_dest,
- first_vector, second_vector);
- new_temp = make_ssa_name (double_vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
- /* In the next iteration we will get copy for this operand. */
- last_oprnd = vec_oprnd3;
+ new_tmp1 = gimple_call_lhs (new_stmt1);
+ new_tmp2 = gimple_call_lhs (new_stmt2);
}
-
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ {
+ new_tmp1 = gimple_assign_lhs (new_stmt1);
+ new_tmp2 = gimple_assign_lhs (new_stmt2);
+ }
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ if (multi_step_cvt)
+ {
+ /* Store the results for the recursive call. */
+ VEC_quick_push (tree, vec_tmp, new_tmp1);
+ VEC_quick_push (tree, vec_tmp, new_tmp2);
+ }
+ else
+ {
+ /* Last step of promotion sequience - store the results. */
+ if (slp_node)
+ {
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
+ VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
+ }
+ else
+ {
+ if (!*prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
+ else
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
+
+ *prev_stmt_info = vinfo_for_stmt (new_stmt1);
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
+ *prev_stmt_info = vinfo_for_stmt (new_stmt2);
+ }
+ }
}
- *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- return true;
+ if (multi_step_cvt)
+ {
+ /* For multi-step promotion operation we first generate we call the
+ function recurcively for every stage. We start from the input type,
+ create promotion operations to the intermediate types, and then
+ create promotions to the output type. */
+ *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
+ VEC_free (tree, heap, vec_tmp);
+ vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
+ multi_step_cvt - 1, stmt,
+ vec_dsts, gsi, slp_node, code1,
+ code2, decl2, decl2, op_type,
+ prev_stmt_info);
+ }
}
@@ -4494,7 +4690,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
bool
vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
- gimple *vec_stmt)
+ gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
@@ -4508,17 +4704,17 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
- gimple new_stmt;
stmt_vec_info prev_stmt_info;
int nunits_in;
int nunits_out;
tree vectype_out;
int ncopies;
- int j;
+ int j, i;
tree vectype_in;
- tree intermediate_type = NULL_TREE, first_vector, second_vector;
- bool double_op;
- tree wide_type, double_vec_dest;
+ tree intermediate_type = NULL_TREE;
+ int multi_step_cvt = 0;
+ VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+ VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -4549,10 +4745,17 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
if (!vectype_out)
return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- if (nunits_out != nunits_in / 2 && nunits_out != nunits_in/4)
+ if (nunits_in <= nunits_out)
return false;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
+ if (slp_node)
+ ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
gcc_assert (ncopies >= 1);
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
@@ -4585,12 +4788,12 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Supportable by target? */
if (!supportable_widening_operation (code, stmt, vectype_in,
&decl1, &decl2, &code1, &code2,
- &double_op, &intermediate_type))
+ &multi_step_cvt, &interm_types))
return false;
/* Binary widening operation can only be supported directly by the
architecture. */
- gcc_assert (!(double_op && op_type == binary_op));
+ gcc_assert (!(multi_step_cvt && op_type == binary_op));
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
@@ -4610,13 +4813,38 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
ncopies);
/* Handle def. */
- if (double_op)
- wide_type = intermediate_type;
+ /* In case of multi-step promotion, we first generate promotion operations
+ to the intermediate types, and then from that types to the final one.
+ We store vector destination in VEC_DSTS in the correct order for
+ recursive creation of promotion operations in
+ vect_create_vectorized_promotion_stmts(). Vector destinations are created
+ according to TYPES recieved from supportable_widening_operation(). */
+ if (multi_step_cvt)
+ vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
else
- wide_type = vectype_out;
+ vec_dsts = VEC_alloc (tree, heap, 1);
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
- vec_dest = vect_create_destination_var (scalar_dest, wide_type);
- double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ if (multi_step_cvt)
+ {
+ for (i = VEC_length (tree, interm_types) - 1;
+ VEC_iterate (tree, interm_types, i, intermediate_type); i--)
+ {
+ vec_dest = vect_create_destination_var (scalar_dest,
+ intermediate_type);
+ VEC_quick_push (tree, vec_dsts, vec_dest);
+ }
+ }
+
+ if (!slp_node)
+ {
+ vec_oprnds0 = VEC_alloc (tree, heap,
+ (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
+ if (op_type == binary_op)
+ vec_oprnds1 = VEC_alloc (tree, heap, 1);
+ }
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
@@ -4629,90 +4857,45 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Handle uses. */
if (j == 0)
{
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
- if (op_type == binary_op)
- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
- }
- else
- {
- vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
- if (op_type == binary_op)
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
- }
-
- /* Arguments are ready. Create the new vector stmt. We are creating
- two vector defs because the widened result does not fit in one vector.
- The vectorized stmt can be expressed as a call to a target builtin,
- or a using a tree-code. In case of double promotion (from char to int,
- for example), the promotion is performed in two phases: first we
- generate a promotion operation from the source type to the intermediate
- type (short in case of char->int promotion), and then for each of the
- created vectors we generate a promotion statement from the intermediate
- type to the destination type. */
- /* Generate first half of the widened result: */
- new_stmt = vect_gen_widened_results_half (code1, wide_type, decl1,
- vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
- if (is_gimple_call (new_stmt))
- first_vector = gimple_call_lhs (new_stmt);
- else
- first_vector = gimple_assign_lhs (new_stmt);
-
- if (!double_op)
- {
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
+ if (slp_node)
+ vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
- }
-
- /* Generate second half of the widened result: */
- new_stmt = vect_gen_widened_results_half (code2, wide_type, decl2,
- vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
- if (is_gimple_call (new_stmt))
- second_vector = gimple_call_lhs (new_stmt);
- else
- second_vector = gimple_assign_lhs (new_stmt);
-
- if (!double_op)
- {
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ {
+ vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
+ VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
+ if (op_type == binary_op)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+ }
+ }
}
else
{
- /* FIRST_VECTOR and SECOND_VECTOR are the results of source type
- to intermediate type promotion. Now we generate promotions
- for both of them to the destination type (i.e., four
- statements). */
- new_stmt = vect_gen_widened_results_half (code1, vectype_out,
- decl1, first_vector, NULL_TREE, op_type,
- double_vec_dest, gsi, stmt);
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
-
- new_stmt = vect_gen_widened_results_half (code2, vectype_out,
- decl2, first_vector, NULL_TREE, op_type,
- double_vec_dest, gsi, stmt);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
-
- new_stmt = vect_gen_widened_results_half (code1, vectype_out,
- decl1, second_vector, NULL_TREE, op_type,
- double_vec_dest, gsi, stmt);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
-
- new_stmt = vect_gen_widened_results_half (code2, vectype_out,
- decl2, second_vector, NULL_TREE, op_type,
- double_vec_dest, gsi, stmt);
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
+ VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
+ if (op_type == binary_op)
+ {
+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+ VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
+ }
}
- }
+
+ /* Arguments are ready. Create the new vector stmts. */
+ tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
+ vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
+ multi_step_cvt, stmt,
+ tmp_vec_dsts,
+ gsi, slp_node, code1, code2,
+ decl1, decl2, op_type,
+ &prev_stmt_info);
+ }
+
+ VEC_free (tree, heap, vec_dsts);
+ VEC_free (tree, heap, tmp_vec_dsts);
+ VEC_free (tree, heap, interm_types);
+ VEC_free (tree, heap, vec_oprnds0);
+ VEC_free (tree, heap, vec_oprnds1);
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
return true;
@@ -4925,7 +5108,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
stmt_vec_info prev_stmt_info = NULL;
tree dataref_ptr = NULL_TREE;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int j;
gimple next_stmt, first_stmt = NULL;
bool strided_store = false;
@@ -4937,10 +5120,13 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
stmt_vec_info first_stmt_vinfo;
unsigned int vec_num;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
@@ -5066,7 +5252,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
strided_store = false;
/* VEC_NUM is the number of vect stmts to be created for this group. */
- if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
+ if (slp)
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
vec_num = group_size;
@@ -5179,9 +5365,6 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
else
{
- /* FORNOW SLP doesn't work for multiple types. */
- gcc_assert (!slp);
-
/* For interleaved stores we created vectorized defs for all the
defs stored in OPRNDS in the previous iteration (previous copy).
DR_CHAIN is then used as an input to vect_permute_store_chain(),
@@ -5230,6 +5413,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
mark_symbols_for_renaming (new_stmt);
+
+ if (slp)
+ continue;
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
@@ -5795,7 +5981,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree dataref_ptr = NULL_TREE;
gimple ptr_incr;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
- int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ int ncopies;
int i, j, group_size;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
@@ -5812,10 +5998,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
bool slp = (slp_node != NULL);
enum tree_code code;
- /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
- this, so we can safely override NCOPIES with 1 here. */
+ /* Multiple types in SLP are handled by creating the appropriate number of
+ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+ case of SLP. */
if (slp)
ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
gcc_assert (ncopies >= 1);
@@ -5909,7 +6098,6 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
- dr_chain = VEC_alloc (tree, heap, group_size);
/* VEC_NUM is the number of vect stmts to be created for this group. */
if (slp)
@@ -5919,6 +6107,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
else
vec_num = group_size;
+
+ dr_chain = VEC_alloc (tree, heap, vec_num);
}
else
{
@@ -6203,9 +6393,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
}
- /* FORNOW: SLP with multiple types is unsupported. */
if (slp)
- return true;
+ continue;
if (strided_load)
{
@@ -6500,14 +6689,12 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
switch (STMT_VINFO_TYPE (stmt_info))
{
case type_demotion_vec_info_type:
- gcc_assert (!slp_node);
- done = vectorizable_type_demotion (stmt, gsi, &vec_stmt);
+ done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
break;
case type_promotion_vec_info_type:
- gcc_assert (!slp_node);
- done = vectorizable_type_promotion (stmt, gsi, &vec_stmt);
+ done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
break;
@@ -6540,7 +6727,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
case store_vec_info_type:
done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);
- if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
{
/* In case of interleaving, the whole chain is vectorized when the
last store in the chain is reached. Store stmts before the last
@@ -7598,21 +7785,38 @@ vect_remove_stores (gimple first_stmt)
/* Vectorize SLP instance tree in postorder. */
static bool
-vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
+vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+ unsigned int vectorization_factor)
{
gimple stmt;
bool strided_store, is_store;
gimple_stmt_iterator si;
stmt_vec_info stmt_info;
+ unsigned int vec_stmts_size, nunits, group_size;
+ tree vectype;
if (!node)
return false;
- vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
- vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
+ vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
+ vectorization_factor);
+ vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
+ vectorization_factor);
- stmt = VEC_index(gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
stmt_info = vinfo_for_stmt (stmt);
+ /* VECTYPE is the type of the destination. */
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
+ group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+
+ /* For each SLP instance calculate number of vector stmts to be created
+ for the scalar stmts in each node of the SLP tree. Number of vector
+ elements in one vector iteration is the number of scalar elements in
+ one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
+ size. */
+ vec_stmts_size = (vectorization_factor * group_size) / nunits;
+
SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
@@ -7644,30 +7848,21 @@ vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
static bool
-vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
+vect_schedule_slp (loop_vec_info loop_vinfo)
{
VEC (slp_instance, heap) *slp_instances =
LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
slp_instance instance;
- unsigned int vec_stmts_size;
- unsigned int group_size, i;
- unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned int i;
bool is_store = false;
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
{
- group_size = SLP_INSTANCE_GROUP_SIZE (instance);
- /* For each SLP instance calculate number of vector stmts to be created
- for the scalar stmts in each node of the SLP tree. Number of vector
- elements in one vector iteration is the number of scalar elements in
- one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
- size. */
- vec_stmts_size = vectorization_factor * group_size / nunits;
-
/* Schedule the tree of INSTANCE. */
- is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
- vec_stmts_size);
-
+ is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
+ instance,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+
if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
|| vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
fprintf (vect_dump, "vectorizing stmts using SLP.");
@@ -7826,7 +8021,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== scheduling SLP instances ===");
- is_store = vect_schedule_slp (loop_vinfo, nunits);
+ is_store = vect_schedule_slp (loop_vinfo);
/* IS_STORE is true if STMT is a store. Stores cannot be of
hybrid SLP type. They are removed in
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 78f8262..437b145 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -2138,30 +2138,30 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
- DECL1 and DECL2 are decls of target builtin functions to be used
when vectorizing the operation, if available. In this case,
CODE1 and CODE2 are CALL_EXPR.
- - DOUBLE_OP determines if the operation is a double cast, like
- char->short->int
- - INTERM_TYPE is the intermediate type required to perform the
- widening operation (short in the above example) */
+ - MULTI_STEP_CVT determines the number of required intermediate steps in
+ case of multi-step conversion (like char->short->int - in that case
+ MULTI_STEP_CVT will be 1).
+ - INTERM_TYPES contains the intermediate type required to perform the
+ widening operation (short in the above example). */
bool
supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
tree *decl1, tree *decl2,
enum tree_code *code1, enum tree_code *code2,
- bool *double_op, tree *interm_type)
+ int *multi_step_cvt,
+ VEC (tree, heap) **interm_types)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
bool ordered_p;
enum machine_mode vec_mode;
- enum insn_code icode1, icode2;
+ enum insn_code icode1 = 0, icode2 = 0;
optab optab1, optab2;
tree type = gimple_expr_type (stmt);
tree wide_vectype = get_vectype_for_scalar_type (type);
enum tree_code c1, c2;
- *double_op = false;
-
/* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
vector results would normally be expected to be generated in the same
@@ -2272,52 +2272,60 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
vec_mode = TYPE_MODE (vectype);
if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
- || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
- == CODE_FOR_nothing)
+ || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
+ == CODE_FOR_nothing)
return false;
- /* Check if it's a double cast, like char->int. In such case the intermediate
- type is short, and we check that char->short->int operaion is supported by
- the target. */
+ /* Check if it's a multi-step conversion that can be done using intermediate
+ types. */
if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
- || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
+ || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
{
- if (code == NOP_EXPR)
- {
- enum machine_mode intermediate_mode =
- insn_data[icode1].operand[0].mode;
- tree intermediate_type =
- lang_hooks.types.type_for_mode (intermediate_mode,
- TYPE_UNSIGNED (vectype));
- optab optab3 = optab_for_tree_code (c1, intermediate_type,
- optab_default);
- optab optab4 = optab_for_tree_code (c2, intermediate_type,
- optab_default);
-
- if (!optab3 || !optab4)
- return false;
+ int i;
+ tree prev_type = vectype, intermediate_type;
+ enum machine_mode intermediate_mode, prev_mode = vec_mode;
+ optab optab3, optab4;
- if ((icode1 = optab1->handlers[(int) vec_mode].insn_code)
+ if (!CONVERT_EXPR_CODE_P (code))
+ return false;
+
+ *code1 = c1;
+ *code2 = c2;
+
+ /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
+ intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
+ to get to NARROW_VECTYPE, and fail if we do not. */
+ *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
+ for (i = 0; i < 3; i++)
+ {
+ intermediate_mode = insn_data[icode1].operand[0].mode;
+ intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
+ TYPE_UNSIGNED (prev_type));
+ optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
+ optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
+
+ if (!optab3 || !optab4
+ || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
== CODE_FOR_nothing
|| insn_data[icode1].operand[0].mode != intermediate_mode
- || (icode2 = optab2->handlers[(int) vec_mode].insn_code)
+ || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
== CODE_FOR_nothing
|| insn_data[icode2].operand[0].mode != intermediate_mode
- || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
+ || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
== CODE_FOR_nothing
- || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|| (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
- == CODE_FOR_nothing
- || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
+ == CODE_FOR_nothing)
return false;
- else
- {
- *double_op = true;
- *interm_type = intermediate_type;
- *code1 = c1;
- *code2 = c2;
- return true;
- }
+
+ VEC_quick_push (tree, *interm_types, intermediate_type);
+ (*multi_step_cvt)++;
+
+ if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
+ && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
+ return true;
+
+ prev_type = intermediate_type;
+ prev_mode = intermediate_mode;
}
return false;
@@ -2342,16 +2350,17 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
Output:
- CODE1 is the code of a vector operation to be used when
vectorizing the operation, if available.
- - DOUBLE_OP determines if the operation is a double cast, like
- int->short->char
- - INTERMIDIATE_TYPE is the intermediate type required to perform the
- widening operation (short in the above example) */
+ - MULTI_STEP_CVT determines the number of required intermediate steps in
+ case of multi-step conversion (like int->short->char - in that case
+ MULTI_STEP_CVT will be 1).
+ - INTERM_TYPES contains the intermediate type required to perform the
+ narrowing operation (short in the above example). */
bool
supportable_narrowing_operation (enum tree_code code,
- const_gimple stmt, const_tree vectype,
- enum tree_code *code1, bool *double_op,
- tree *intermediate_type)
+ const_gimple stmt, tree vectype,
+ enum tree_code *code1, int *multi_step_cvt,
+ VEC (tree, heap) **interm_types)
{
enum machine_mode vec_mode;
enum insn_code icode1;
@@ -2359,6 +2368,8 @@ supportable_narrowing_operation (enum tree_code code,
tree type = gimple_expr_type (stmt);
tree narrow_vectype = get_vectype_for_scalar_type (type);
enum tree_code c1;
+ tree intermediate_type, prev_type;
+ int i;
switch (code)
{
@@ -2393,24 +2404,45 @@ supportable_narrowing_operation (enum tree_code code,
== CODE_FOR_nothing)
return false;
- /* In case of NUNITS_IN == NUNITS_OUT/4 check that the it is possible to
- perform the operation using an intermediate type of NUNITS_OUT/2. */
+ /* Check if it's a multi-step conversion that can be done using intermediate
+ types. */
if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
{
- enum machine_mode intermediate_mode = insn_data[icode1].operand[0].mode;
- *intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
- TYPE_UNSIGNED (vectype));
- interm_optab = optab_for_tree_code (VEC_PACK_TRUNC_EXPR,
- *intermediate_type, optab_default);
- if (!interm_optab)
- return false;
+ enum machine_mode intermediate_mode, prev_mode = vec_mode;
+
+ *code1 = c1;
+ prev_type = vectype;
+ /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
+ intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
+ to get to NARROW_VECTYPE, and fail if we do not. */
+ *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
+ for (i = 0; i < 3; i++)
+ {
+ intermediate_mode = insn_data[icode1].operand[0].mode;
+ intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
+ TYPE_UNSIGNED (prev_type));
+ interm_optab = optab_for_tree_code (c1, intermediate_type,
+ optab_default);
+ if (!interm_optab
+ || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
+ == CODE_FOR_nothing
+ || insn_data[icode1].operand[0].mode != intermediate_mode
+ || (icode1
+ = interm_optab->handlers[(int) intermediate_mode].insn_code)
+ == CODE_FOR_nothing)
+ return false;
- if ((icode1 = interm_optab->handlers[(int) intermediate_mode].insn_code)
- == CODE_FOR_nothing
- || insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
- return false;
+ VEC_quick_push (tree, *interm_types, intermediate_type);
+ (*multi_step_cvt)++;
+
+ if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
+ return true;
- *double_op = true;
+ prev_type = intermediate_type;
+ prev_mode = intermediate_mode;
+ }
+
+ return false;
}
*code1 = c1;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index cf7c5b1..a22353cc 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -522,6 +522,10 @@ typedef struct _stmt_vec_info {
#define TARG_VEC_STORE_COST 1
#endif
+/* The maximum number of intermediate steps required in multi-step type
+ conversion. */
+#define MAX_INTERM_CVT_STEPS 3
+
/* Avoid GTY(()) on stmt_vec_info. */
typedef void *vec_void_p;
DEF_VEC_P (vec_void_p);
@@ -602,6 +606,16 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
}
+static inline int
+vect_pow2 (int x)
+{
+ int i, res = 1;
+
+ for (i = 0; i < x; i++)
+ res *= 2;
+
+ return res;
+}
/*-----------------------------------------------------------------*/
/* Info on data references alignment. */
@@ -671,9 +685,10 @@ extern enum dr_alignment_support vect_supportable_dr_alignment
(struct data_reference *);
extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
extern bool supportable_widening_operation (enum tree_code, gimple, tree,
- tree *, tree *, enum tree_code *, enum tree_code *, bool *, tree *);
+ tree *, tree *, enum tree_code *, enum tree_code *,
+ int *, VEC (tree, heap) **);
extern bool supportable_narrowing_operation (enum tree_code, const_gimple,
- const_tree, enum tree_code *, bool *, tree *);
+ tree, enum tree_code *, int *, VEC (tree, heap) **);
/* Creation and deletion of loop and stmt info structs. */
extern loop_vec_info new_loop_vec_info (struct loop *loop);
@@ -705,9 +720,9 @@ extern bool vectorizable_store (gimple, gimple_stmt_iterator *, gimple *,
extern bool vectorizable_operation (gimple, gimple_stmt_iterator *, gimple *,
slp_tree);
extern bool vectorizable_type_promotion (gimple, gimple_stmt_iterator *,
- gimple *);
+ gimple *, slp_tree);
extern bool vectorizable_type_demotion (gimple, gimple_stmt_iterator *,
- gimple *);
+ gimple *, slp_tree);
extern bool vectorizable_conversion (gimple, gimple_stmt_iterator *, gimple *,
slp_tree);
extern bool vectorizable_assignment (gimple, gimple_stmt_iterator *, gimple *,