diff options
author | Richard Biener <rguenther@suse.de> | 2021-07-05 15:51:49 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2021-07-06 11:56:47 +0200 |
commit | 7d810646d421f6975300c0d06f4e9af27a48f26d (patch) | |
tree | ab76365a066c48d72a332a290a099c3f3d53a335 | |
parent | 9f489a5731f12b8e6b49994e8f61acb5d26f508e (diff) | |
download | gcc-7d810646d421f6975300c0d06f4e9af27a48f26d.zip gcc-7d810646d421f6975300c0d06f4e9af27a48f26d.tar.gz gcc-7d810646d421f6975300c0d06f4e9af27a48f26d.tar.bz2 |
Add FMADDSUB and FMSUBADD SLP vectorization patterns and optabs
This adds named expanders for vec_fmaddsub<mode>4 and
vec_fmsubadd<mode>4 which map to x86 vfmaddsubXXXp{ds} and
vfmsubaddXXXp{ds} instructions. This complements the previous
addition of ADDSUB support.
x86 lacks SUBADD and the negate variants of FMA with mixed
plus minus so I did not add optabs or patterns for those but
it would not be difficult if there's a target that has them.
2021-07-05 Richard Biener <rguenther@suse.de>
* doc/md.texi (vec_fmaddsub<mode>4): Document.
(vec_fmsubadd<mode>4): Likewise.
* optabs.def (vec_fmaddsub$a4): Add.
(vec_fmsubadd$a4): Likewise.
* internal-fn.def (IFN_VEC_FMADDSUB): Add.
(IFN_VEC_FMSUBADD): Likewise.
* tree-vect-slp-patterns.c (addsub_pattern::recognize):
Refactor to handle IFN_VEC_FMADDSUB and IFN_VEC_FMSUBADD.
(addsub_pattern::build): Likewise.
* tree-vect-slp.c (vect_optimize_slp): CFN_VEC_FMADDSUB
and CFN_VEC_FMSUBADD are not transparent for permutes.
* config/i386/sse.md (vec_fmaddsub<mode>4): New expander.
(vec_fmsubadd<mode>4): Likewise.
* gcc.target/i386/vect-fmaddsubXXXpd.c: New testcase.
* gcc.target/i386/vect-fmaddsubXXXps.c: Likewise.
* gcc.target/i386/vect-fmsubaddXXXpd.c: Likewise.
* gcc.target/i386/vect-fmsubaddXXXps.c: Likewise.
-rw-r--r-- | gcc/config/i386/sse.md | 19 | ||||
-rw-r--r-- | gcc/doc/md.texi | 14 | ||||
-rw-r--r-- | gcc/internal-fn.def | 3 | ||||
-rw-r--r-- | gcc/optabs.def | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c | 34 | ||||
-rw-r--r-- | gcc/tree-vect-slp-patterns.c | 192 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 2 |
10 files changed, 311 insertions, 57 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index bcf1605..17c9e57 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4644,6 +4644,25 @@ ;; ;; But this doesn't seem useful in practice. +(define_expand "vec_fmaddsub<mode>4" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand") + (match_operand:VF 2 "nonimmediate_operand") + (match_operand:VF 3 "nonimmediate_operand")] + UNSPEC_FMADDSUB))] + "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)") + +(define_expand "vec_fmsubadd<mode>4" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand") + (match_operand:VF 2 "nonimmediate_operand") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand"))] + UNSPEC_FMADDSUB))] + "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)") + (define_expand "fmaddsub_<mode>" [(set (match_operand:VF 0 "register_operand") (unspec:VF diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 1b91814..cc92ebd 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5688,6 +5688,20 @@ Alternating subtract, add with even lanes doing subtract and odd lanes doing addition. Operands 1 and 2 and the outout operand are vectors with mode @var{m}. +@cindex @code{vec_fmaddsub@var{m}4} instruction pattern +@item @samp{vec_fmaddsub@var{m}4} +Alternating multiply subtract, add with even lanes doing subtract and odd +lanes doing addition of the third operand to the multiplication result +of the first two operands. Operands 1, 2 and 3 and the outout operand are vectors +with mode @var{m}. + +@cindex @code{vec_fmsubadd@var{m}4} instruction pattern +@item @samp{vec_fmsubadd@var{m}4} +Alternating multiply add, subtract with even lanes doing addition and odd +lanes doing subtraction of the third operand to the multiplication result +of the first two operands. Operands 1, 2 and 3 and the outout operand are vectors +with mode @var{m}. + These instructions are not allowed to @code{FAIL}. @cindex @code{mulhisi3} instruction pattern diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index c3b8e73..a7003d5 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -282,7 +282,8 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary) DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary) - +DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary) +DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) /* FP scales. */ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 41ab259..51acc1b 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -408,6 +408,8 @@ OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a") OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a") OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a") OPTAB_D (vec_addsub_optab, "vec_addsub$a3") +OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4") +OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4") OPTAB_D (sync_add_optab, "sync_add$I$a") OPTAB_D (sync_and_optab, "sync_and$I$a") diff --git a/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c b/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c new file mode 100644 index 0000000..b30d107 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXpd.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O3 -mfma -save-temps" } */ + +#include "fma-check.h" + +void __attribute__((noipa)) +check_fmaddsub (double * __restrict a, double *b, double *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[2*i + 0] = b[2*i + 0] * c[2*i + 0] - a[2*i + 0]; + a[2*i + 1] = b[2*i + 1] * c[2*i + 1] + a[2*i + 1]; + } +} + +static void +fma_test (void) +{ + double a[4], b[4], c[4]; + for (int i = 0; i < 4; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 2); + const double d[4] = { 0., 22., 82., 192. }; + for (int i = 0; i < 4; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler "fmaddsub...pd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c b/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c new file mode 100644 index 0000000..cd2af87 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-fmaddsubXXXps.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O3 -mfma -save-temps" } */ + +#include "fma-check.h" + +void __attribute__((noipa)) +check_fmaddsub (float * __restrict a, float *b, float *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[2*i + 0] = b[2*i + 0] * c[2*i + 0] - a[2*i + 0]; + a[2*i + 1] = b[2*i + 1] * c[2*i + 1] + a[2*i + 1]; + } +} + +static void +fma_test (void) +{ + float a[4], b[4], c[4]; + for (int i = 0; i < 4; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 2); + const float d[4] = { 0., 22., 82., 192. }; + for (int i = 0; i < 4; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler "fmaddsub...ps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c b/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c new file mode 100644 index 0000000..7ca2a27 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXpd.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O3 -mfma -save-temps" } */ + +#include "fma-check.h" + +void __attribute__((noipa)) +check_fmsubadd (double * __restrict a, double *b, double *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[2*i + 0] = b[2*i + 0] * c[2*i + 0] + a[2*i + 0]; + a[2*i + 1] = b[2*i + 1] * c[2*i + 1] - a[2*i + 1]; + } +} + +static void +fma_test (void) +{ + double a[4], b[4], c[4]; + for (int i = 0; i < 4; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmsubadd (a, b, c, 2); + const double d[4] = { 0., 20., 86., 186. }; + for (int i = 0; i < 4; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler "fmsubadd...pd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c b/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c new file mode 100644 index 0000000..9ddd0e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-fmsubaddXXXps.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O3 -mfma -save-temps" } */ + +#include "fma-check.h" + +void __attribute__((noipa)) +check_fmsubadd (float * __restrict a, float *b, float *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[2*i + 0] = b[2*i + 0] * c[2*i + 0] + a[2*i + 0]; + a[2*i + 1] = b[2*i + 1] * c[2*i + 1] - a[2*i + 1]; + } +} + +static void +fma_test (void) +{ + float a[4], b[4], c[4]; + for (int i = 0; i < 4; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmsubadd (a, b, c, 2); + const float d[4] = { 0., 20., 86., 186. }; + for (int i = 0; i < 4; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler "fmsubadd...ps" } } */ diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c index 2671f91..f774cac 100644 --- a/gcc/tree-vect-slp-patterns.c +++ b/gcc/tree-vect-slp-patterns.c @@ -1496,8 +1496,8 @@ complex_operations_pattern::build (vec_info * /* vinfo */) class addsub_pattern : public vect_pattern { public: - addsub_pattern (slp_tree *node) - : vect_pattern (node, NULL, IFN_VEC_ADDSUB) {}; + addsub_pattern (slp_tree *node, internal_fn ifn) + : vect_pattern (node, NULL, ifn) {}; void build (vec_info *); @@ -1510,46 +1510,68 @@ addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_) { slp_tree node = *node_; if (SLP_TREE_CODE (node) != VEC_PERM_EXPR - || SLP_TREE_CHILDREN (node).length () != 2) + || SLP_TREE_CHILDREN (node).length () != 2 + || SLP_TREE_LANE_PERMUTATION (node).length () % 2) return NULL; /* Match a blend of a plus and a minus op with the same number of plus and minus lanes on the same operands. */ - slp_tree sub = SLP_TREE_CHILDREN (node)[0]; - slp_tree add = SLP_TREE_CHILDREN (node)[1]; - bool swapped_p = false; - if (vect_match_expression_p (sub, PLUS_EXPR)) - { - std::swap (add, sub); - swapped_p = true; - } - if (!(vect_match_expression_p (add, PLUS_EXPR) - && vect_match_expression_p (sub, MINUS_EXPR))) + unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; + unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; + if (l0 == l1) + return NULL; + bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], + PLUS_EXPR); + if (!l0add_p + && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR)) + return NULL; + bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], + PLUS_EXPR); + if (!l1add_p + && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR)) return NULL; - if (!((SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[0] - && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[1]) - || (SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[1] - && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[0]))) + + slp_tree l0node = SLP_TREE_CHILDREN (node)[l0]; + slp_tree l1node = SLP_TREE_CHILDREN (node)[l1]; + if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0] + && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1]) + || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1] + && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0]))) return NULL; for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i) { std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i]; - if (swapped_p) - perm.first = perm.first == 0 ? 1 : 0; - /* It has to be alternating -, +, -, ... + /* It has to be alternating -, +, -, While we could permute the .ADDSUB inputs and the .ADDSUB output that's only profitable over the add + sub + blend if at least one of the permute is optimized which we can't determine here. */ - if (perm.first != (i & 1) + if (perm.first != ((i & 1) ? l1 : l0) || perm.second != i) return NULL; } - if (!vect_pattern_validate_optab (IFN_VEC_ADDSUB, node)) - return NULL; + /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... } + (l0add_p), see whether we have FMA variants. */ + if (!l0add_p + && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR)) + { + /* (c * d) -+ a */ + if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node)) + return new addsub_pattern (node_, IFN_VEC_FMADDSUB); + } + else if (l0add_p + && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR)) + { + /* (c * d) +- a */ + if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node)) + return new addsub_pattern (node_, IFN_VEC_FMSUBADD); + } - return new addsub_pattern (node_); + if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node)) + return new addsub_pattern (node_, IFN_VEC_ADDSUB); + + return NULL; } void @@ -1557,38 +1579,96 @@ addsub_pattern::build (vec_info *vinfo) { slp_tree node = *m_node; - slp_tree sub = SLP_TREE_CHILDREN (node)[0]; - slp_tree add = SLP_TREE_CHILDREN (node)[1]; - if (vect_match_expression_p (sub, PLUS_EXPR)) - std::swap (add, sub); - - /* Modify the blend node in-place. */ - SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0]; - SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1]; - SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; - SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; - - /* Build IFN_VEC_ADDSUB from the sub representative operands. */ - stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub); - gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2, - gimple_assign_rhs1 (rep->stmt), - gimple_assign_rhs2 (rep->stmt)); - gimple_call_set_lhs (call, make_ssa_name - (TREE_TYPE (gimple_assign_lhs (rep->stmt)))); - gimple_call_set_nothrow (call, true); - gimple_set_bb (call, gimple_bb (rep->stmt)); - stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep); - SLP_TREE_REPRESENTATIVE (node) = new_rep; - STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; - STMT_SLP_TYPE (new_rep) = pure_slp; - STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); - STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; - STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep)); - SLP_TREE_CODE (node) = ERROR_MARK; - SLP_TREE_LANE_PERMUTATION (node).release (); - - vect_free_slp_tree (sub); - vect_free_slp_tree (add); + unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first; + unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first; + + switch (m_ifn) + { + case IFN_VEC_ADDSUB: + { + slp_tree sub = SLP_TREE_CHILDREN (node)[l0]; + slp_tree add = SLP_TREE_CHILDREN (node)[l1]; + + /* Modify the blend node in-place. */ + SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0]; + SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1]; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; + + /* Build IFN_VEC_ADDSUB from the sub representative operands. */ + stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub); + gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2, + gimple_assign_rhs1 (rep->stmt), + gimple_assign_rhs2 (rep->stmt)); + gimple_call_set_lhs (call, make_ssa_name + (TREE_TYPE (gimple_assign_lhs (rep->stmt)))); + gimple_call_set_nothrow (call, true); + gimple_set_bb (call, gimple_bb (rep->stmt)); + stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep); + SLP_TREE_REPRESENTATIVE (node) = new_rep; + STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; + STMT_SLP_TYPE (new_rep) = pure_slp; + STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); + STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; + STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep)); + SLP_TREE_CODE (node) = ERROR_MARK; + SLP_TREE_LANE_PERMUTATION (node).release (); + + vect_free_slp_tree (sub); + vect_free_slp_tree (add); + break; + } + case IFN_VEC_FMADDSUB: + case IFN_VEC_FMSUBADD: + { + slp_tree sub, add; + if (m_ifn == IFN_VEC_FMADDSUB) + { + sub = SLP_TREE_CHILDREN (node)[l0]; + add = SLP_TREE_CHILDREN (node)[l1]; + } + else /* m_ifn == IFN_VEC_FMSUBADD */ + { + sub = SLP_TREE_CHILDREN (node)[l1]; + add = SLP_TREE_CHILDREN (node)[l0]; + } + slp_tree mul = SLP_TREE_CHILDREN (sub)[0]; + /* Modify the blend node in-place. */ + SLP_TREE_CHILDREN (node).safe_grow (3, true); + SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0]; + SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1]; + SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1]; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++; + + /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */ + stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub); + stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul); + gcall *call = gimple_build_call_internal (m_ifn, 3, + gimple_assign_rhs1 (mrep->stmt), + gimple_assign_rhs2 (mrep->stmt), + gimple_assign_rhs2 (srep->stmt)); + gimple_call_set_lhs (call, make_ssa_name + (TREE_TYPE (gimple_assign_lhs (srep->stmt)))); + gimple_call_set_nothrow (call, true); + gimple_set_bb (call, gimple_bb (srep->stmt)); + stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep); + SLP_TREE_REPRESENTATIVE (node) = new_rep; + STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope; + STMT_SLP_TYPE (new_rep) = pure_slp; + STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node); + STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true; + STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep)); + SLP_TREE_CODE (node) = ERROR_MARK; + SLP_TREE_LANE_PERMUTATION (node).release (); + + vect_free_slp_tree (sub); + vect_free_slp_tree (add); + break; + } + default:; + } } /******************************************************************************* diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index f08797c..5357cd0 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3728,6 +3728,8 @@ vect_optimize_slp (vec_info *vinfo) case CFN_COMPLEX_MUL: case CFN_COMPLEX_MUL_CONJ: case CFN_VEC_ADDSUB: + case CFN_VEC_FMADDSUB: + case CFN_VEC_FMSUBADD: vertices[idx].perm_in = 0; vertices[idx].perm_out = 0; default:; |