aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/i386/i386-builtin.def8
-rw-r--r--gcc/config/i386/sse.md8
-rw-r--r--gcc/doc/md.texi8
-rw-r--r--gcc/internal-fn.def1
-rw-r--r--gcc/optabs.def1
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsub-2.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsub-3.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsubv2df.c42
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsubv4df.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c46
-rw-r--r--gcc/tree-vect-slp-patterns.c100
-rw-r--r--gcc/tree-vect-slp.c1
-rw-r--r--gcc/tree-vectorizer.h3
14 files changed, 350 insertions, 9 deletions
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 31df3a6..ea79e0b 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -855,8 +855,8 @@ BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv1di3, "__
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
-BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
@@ -996,8 +996,8 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128
/* AVX */
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5bd65dd..1f1db82 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2410,7 +2410,7 @@
(set_attr "prefix" "<round_saeonly_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx_addsubv4df3"
+(define_insn "vec_addsubv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
(minus:V4DF
@@ -2424,7 +2424,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_insn "sse3_addsubv2df3"
+(define_insn "vec_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(minus:V2DF
@@ -2442,7 +2442,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
-(define_insn "avx_addsubv8sf3"
+(define_insn "vec_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
(minus:V8SF
@@ -2456,7 +2456,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_addsubv4sf3"
+(define_insn "vec_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(minus:V4SF
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 00caf38..1b91814 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5682,6 +5682,14 @@ signed/unsigned elements of size S@. Subtract the high/low elements of 2 from
1 and widen the resulting elements. Put the N/2 results of size 2*S in the
output vector (operand 0).
+@cindex @code{vec_addsub@var{m}3} instruction pattern
+@item @samp{vec_addsub@var{m}3}
+Alternating subtract, add with even lanes doing subtract and odd
+lanes doing addition. Operands 1 and 2 and the outout operand are vectors
+with mode @var{m}.
+
+These instructions are not allowed to @code{FAIL}.
+
@cindex @code{mulhisi3} instruction pattern
@item @samp{mulhisi3}
Multiply operands 1 and 2, which have mode @code{HImode}, and store
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index b2f414d..c3b8e73 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -281,6 +281,7 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary)
DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary)
+DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary)
/* FP scales. */
diff --git a/gcc/optabs.def b/gcc/optabs.def
index b192a9d..41ab259 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -407,6 +407,7 @@ OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a")
OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a")
OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a")
OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a")
+OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
OPTAB_D (sync_add_optab, "sync_add$I$a")
OPTAB_D (sync_and_optab, "sync_and$I$a")
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-2.c b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c
new file mode 100644
index 0000000..a6b9414
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3" } */
+
+float a[1024], b[1024];
+
+void foo()
+{
+ for (int i = 0; i < 256; i++)
+ {
+ a[4*i+0] = a[4*i+0] - b[4*i+0];
+ a[4*i+1] = a[4*i+1] + b[4*i+1];
+ a[4*i+2] = a[4*i+2] - b[4*i+2];
+ a[4*i+3] = a[4*i+3] + b[4*i+3];
+ }
+}
+
+/* We should be able to vectorize this with SLP using the addsub
+ SLP pattern. */
+/* { dg-final { scan-assembler "addsubps" } } */
+/* { dg-final { scan-assembler-not "shuf" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-3.c b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c
new file mode 100644
index 0000000..b27ee56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+double a[2], b[2], c[2];
+
+void __attribute__((noipa))
+foo ()
+{
+ /* When we want to use addsubpd we have to keep permuting both
+ loads, if instead we blend the result of an add and a sub we
+ can combine the blend with the permute. Both are similar in cost,
+ verify we did not wrongly apply both. */
+ double tem0 = a[1] - b[1];
+ double tem1 = a[0] + b[0];
+ c[0] = tem0;
+ c[1] = tem1;
+}
+
+static void
+TEST (void)
+{
+ a[0] = 1.; a[1] = 2.;
+ b[0] = 2.; b[1] = 4.;
+ foo ();
+ if (c[0] != -2. || c[1] != 3.)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c
new file mode 100644
index 0000000..547485d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+double x[2], y[2], z[2];
+void __attribute__((noipa)) foo ()
+{
+ x[0] = y[0] - z[0];
+ x[1] = y[1] + z[1];
+}
+void __attribute__((noipa)) bar ()
+{
+ x[0] = y[0] + z[0];
+ x[1] = y[1] - z[1];
+}
+static void
+TEST (void)
+{
+ for (int i = 0; i < 2; ++i)
+ {
+ y[i] = i + 1;
+ z[i] = 2 * i + 1;
+ }
+ foo ();
+ if (x[0] != 0 || x[1] != 5)
+ __builtin_abort ();
+ bar ();
+ if (x[0] != 2 || x[1] != -1)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c
new file mode 100644
index 0000000..e0a1b3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target avx_runtime } } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */
+
+double x[4], y[4], z[4];
+void __attribute__((noipa)) foo ()
+{
+ x[0] = y[0] - z[0];
+ x[1] = y[1] + z[1];
+ x[2] = y[2] - z[2];
+ x[3] = y[3] + z[3];
+}
+void __attribute__((noipa)) bar ()
+{
+ x[0] = y[0] + z[0];
+ x[1] = y[1] - z[1];
+ x[2] = y[2] + z[2];
+ x[3] = y[3] - z[3];
+}
+int main()
+{
+ for (int i = 0; i < 4; ++i)
+ {
+ y[i] = i + 1;
+ z[i] = 2 * i + 1;
+ }
+ foo ();
+ if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11)
+ __builtin_abort ();
+ bar ();
+ if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c
new file mode 100644
index 0000000..b524f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse3 } */
+/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse3_test
+#endif
+
+#include CHECK_H
+
+float x[4], y[4], z[4];
+void __attribute__((noipa)) foo ()
+{
+ x[0] = y[0] - z[0];
+ x[1] = y[1] + z[1];
+ x[2] = y[2] - z[2];
+ x[3] = y[3] + z[3];
+}
+void __attribute__((noipa)) bar ()
+{
+ x[0] = y[0] + z[0];
+ x[1] = y[1] - z[1];
+ x[2] = y[2] + z[2];
+ x[3] = y[3] - z[3];
+}
+static void
+TEST (void)
+{
+ for (int i = 0; i < 4; ++i)
+ {
+ y[i] = i + 1;
+ z[i] = 2 * i + 1;
+ }
+ foo ();
+ if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11)
+ __builtin_abort ();
+ bar ();
+ if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c
new file mode 100644
index 0000000..0eed33b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c
@@ -0,0 +1,46 @@
+/* { dg-do run { target avx_runtime } } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */
+
+float x[8], y[8], z[8];
+void __attribute__((noipa)) foo ()
+{
+ x[0] = y[0] - z[0];
+ x[1] = y[1] + z[1];
+ x[2] = y[2] - z[2];
+ x[3] = y[3] + z[3];
+ x[4] = y[4] - z[4];
+ x[5] = y[5] + z[5];
+ x[6] = y[6] - z[6];
+ x[7] = y[7] + z[7];
+}
+void __attribute__((noipa)) bar ()
+{
+ x[0] = y[0] + z[0];
+ x[1] = y[1] - z[1];
+ x[2] = y[2] + z[2];
+ x[3] = y[3] - z[3];
+ x[4] = y[4] + z[4];
+ x[5] = y[5] - z[5];
+ x[6] = y[6] + z[6];
+ x[7] = y[7] - z[7];
+}
+int main()
+{
+ for (int i = 0; i < 8; ++i)
+ {
+ y[i] = i + 1;
+ z[i] = 2 * i + 1;
+ }
+ foo ();
+ if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11
+ || x[4] != -4 || x[5] != 17 || x[6] != -6 || x[7] != 23)
+ __builtin_abort ();
+ bar ();
+ if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3
+ || x[4] != 14 || x[5] != -5 || x[6] != 20 || x[7] != -7)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
index 2ed49cd..d536494 100644
--- a/gcc/tree-vect-slp-patterns.c
+++ b/gcc/tree-vect-slp-patterns.c
@@ -1490,6 +1490,105 @@ complex_operations_pattern::build (vec_info * /* vinfo */)
gcc_unreachable ();
}
+
+/* The addsub_pattern. */
+
+class addsub_pattern : public vect_pattern
+{
+ public:
+ addsub_pattern (slp_tree *node)
+ : vect_pattern (node, NULL, IFN_VEC_ADDSUB) {};
+
+ void build (vec_info *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+};
+
+vect_pattern *
+addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
+{
+ slp_tree node = *node_;
+ if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+ || SLP_TREE_CHILDREN (node).length () != 2)
+ return NULL;
+
+ /* Match a blend of a plus and a minus op with the same number of plus and
+ minus lanes on the same operands. */
+ slp_tree sub = SLP_TREE_CHILDREN (node)[0];
+ slp_tree add = SLP_TREE_CHILDREN (node)[1];
+ bool swapped_p = false;
+ if (vect_match_expression_p (sub, PLUS_EXPR))
+ {
+ std::swap (add, sub);
+ swapped_p = true;
+ }
+ if (!(vect_match_expression_p (add, PLUS_EXPR)
+ && vect_match_expression_p (sub, MINUS_EXPR)))
+ return NULL;
+ if (!((SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[0]
+ && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[1])
+ || (SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[1]
+ && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[0])))
+ return NULL;
+
+ for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
+ {
+ std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
+ if (swapped_p)
+ perm.first = perm.first == 0 ? 1 : 0;
+ /* It has to be alternating -, +, -, ...
+ While we could permute the .ADDSUB inputs and the .ADDSUB output
+ that's only profitable over the add + sub + blend if at least
+ one of the permute is optimized which we can't determine here. */
+ if (perm.first != (i & 1)
+ || perm.second != i)
+ return NULL;
+ }
+
+ if (!vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
+ return NULL;
+
+ return new addsub_pattern (node_);
+}
+
+void
+addsub_pattern::build (vec_info *vinfo)
+{
+ slp_tree node = *m_node;
+
+ slp_tree sub = SLP_TREE_CHILDREN (node)[0];
+ slp_tree add = SLP_TREE_CHILDREN (node)[1];
+ if (vect_match_expression_p (sub, PLUS_EXPR))
+ std::swap (add, sub);
+
+ /* Modify the blend node in-place. */
+ SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
+ SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
+
+ /* Build IFN_VEC_ADDSUB from the sub representative operands. */
+ stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
+ gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
+ gimple_assign_rhs1 (rep->stmt),
+ gimple_assign_rhs2 (rep->stmt));
+ gimple_call_set_lhs (call, make_ssa_name
+ (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
+ gimple_call_set_nothrow (call, true);
+ gimple_set_bb (call, gimple_bb (rep->stmt));
+ SLP_TREE_REPRESENTATIVE (node) = vinfo->add_pattern_stmt (call, rep);
+ STMT_VINFO_RELEVANT (SLP_TREE_REPRESENTATIVE (node)) = vect_used_in_scope;
+ STMT_SLP_TYPE (SLP_TREE_REPRESENTATIVE (node)) = pure_slp;
+ STMT_VINFO_VECTYPE (SLP_TREE_REPRESENTATIVE (node)) = SLP_TREE_VECTYPE (node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (SLP_TREE_REPRESENTATIVE (node)) = true;
+ SLP_TREE_CODE (node) = ERROR_MARK;
+ SLP_TREE_LANE_PERMUTATION (node).release ();
+
+ vect_free_slp_tree (sub);
+ vect_free_slp_tree (add);
+}
+
/*******************************************************************************
* Pattern matching definitions
******************************************************************************/
@@ -1502,6 +1601,7 @@ vect_pattern_decl_t slp_patterns[]
overlap in what they can detect. */
SLP_PATTERN (complex_operations_pattern),
+ SLP_PATTERN (addsub_pattern)
};
#undef SLP_PATTERN
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 69ee8fa..227d6aa 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3705,6 +3705,7 @@ vect_optimize_slp (vec_info *vinfo)
case CFN_COMPLEX_ADD_ROT270:
case CFN_COMPLEX_MUL:
case CFN_COMPLEX_MUL_CONJ:
+ case CFN_VEC_ADDSUB:
continue;
default:;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5c71fbc..fa28336 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2100,7 +2100,8 @@ class vect_pattern
this->m_ifn = ifn;
this->m_node = node;
this->m_ops.create (0);
- this->m_ops.safe_splice (*m_ops);
+ if (m_ops)
+ this->m_ops.safe_splice (*m_ops);
}
public: