aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp-patterns.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-slp-patterns.cc')
-rw-r--r--gcc/tree-vect-slp-patterns.cc1587
1 files changed, 1587 insertions, 0 deletions
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
new file mode 100644
index 0000000..63fe756
--- /dev/null
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -0,0 +1,1587 @@
+/* SLP - Pattern matcher on SLP trees
+ Copyright (C) 2020-2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "optabs-tree.h"
+#include "insn-config.h"
+#include "recog.h" /* FIXME: for insn_data */
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "langhooks.h"
+#include "gimple-walk.h"
+#include "dbgcnt.h"
+#include "tree-vector-builder.h"
+#include "vec-perm-indices.h"
+#include "gimple-fold.h"
+#include "internal-fn.h"
+
+/* SLP Pattern matching mechanism.
+
+ This extension to the SLP vectorizer allows one to transform the generated SLP
+ tree based on any pattern. The difference between this and the normal vect
+ pattern matcher is that unlike the former, this matcher allows you to match
+ with instructions that do not belong to the same SSA dominator graph.
+
+ The only requirement that this pattern matcher has is that you are only
+ only allowed to either match an entire group or none.
+
+ The pattern matcher currently only allows you to perform replacements to
+ internal functions.
+
+ Once the patterns are matched it is one way, these cannot be undone. It is
+ currently not supported to match patterns recursively.
+
+ To add a new pattern, implement the vect_pattern class and add the type to
+ slp_patterns.
+
+*/
+
+/*******************************************************************************
+ * vect_pattern class
+ ******************************************************************************/
+
+/* Default implementation of recognize that performs matching, validation and
+ replacement of nodes but that can be overriden if required. */
+
+static bool
+vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
+{
+ tree vectype = SLP_TREE_VECTYPE (node);
+ if (ifn == IFN_LAST || !vectype)
+ return false;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Found %s pattern in SLP tree\n",
+ internal_fn_name (ifn));
+
+ if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Target supports %s vectorization with mode %T\n",
+ internal_fn_name (ifn), vectype);
+ }
+ else
+ {
+ if (dump_enabled_p ())
+ {
+ if (!vectype)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Target does not support vector type for %T\n",
+ SLP_TREE_DEF_TYPE (node));
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Target does not support %s for vector type "
+ "%T\n", internal_fn_name (ifn), vectype);
+ }
+ return false;
+ }
+ return true;
+}
+
+/*******************************************************************************
+ * General helper types
+ ******************************************************************************/
+
+/* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
+ be matched when looking for expressions that we are interested matching for
+ complex numbers addition and mla. */
+
+typedef enum _complex_operation : unsigned {
+ PLUS_PLUS,
+ MINUS_PLUS,
+ PLUS_MINUS,
+ MULT_MULT,
+ CMPLX_NONE
+} complex_operation_t;
+
+/*******************************************************************************
+ * General helper functions
+ ******************************************************************************/
+
+/* Helper function of linear_loads_p that checks to see if the load permutation
+ is sequential and in monotonically increasing order of loads with no gaps.
+*/
+
+static inline complex_perm_kinds_t
+is_linear_load_p (load_permutation_t loads)
+{
+ if (loads.length() == 0)
+ return PERM_UNKNOWN;
+
+ unsigned load, i;
+ complex_perm_kinds_t candidates[4]
+ = { PERM_ODDODD
+ , PERM_EVENEVEN
+ , PERM_EVENODD
+ , PERM_ODDEVEN
+ };
+
+ int valid_patterns = 4;
+ FOR_EACH_VEC_ELT (loads, i, load)
+ {
+ if (candidates[0] != PERM_UNKNOWN && load != 1)
+ {
+ candidates[0] = PERM_UNKNOWN;
+ valid_patterns--;
+ }
+ if (candidates[1] != PERM_UNKNOWN && load != 0)
+ {
+ candidates[1] = PERM_UNKNOWN;
+ valid_patterns--;
+ }
+ if (candidates[2] != PERM_UNKNOWN && load != i)
+ {
+ candidates[2] = PERM_UNKNOWN;
+ valid_patterns--;
+ }
+ if (candidates[3] != PERM_UNKNOWN
+ && load != (i % 2 == 0 ? i + 1 : i - 1))
+ {
+ candidates[3] = PERM_UNKNOWN;
+ valid_patterns--;
+ }
+
+ if (valid_patterns == 0)
+ return PERM_UNKNOWN;
+ }
+
+ for (i = 0; i < sizeof(candidates); i++)
+ if (candidates[i] != PERM_UNKNOWN)
+ return candidates[i];
+
+ return PERM_UNKNOWN;
+}
+
+/* Combine complex_perm_kinds A and B into a new permute kind that describes the
+ resulting operation. */
+
+static inline complex_perm_kinds_t
+vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
+{
+ if (a == b)
+ return a;
+
+ if (a == PERM_TOP)
+ return b;
+
+ if (b == PERM_TOP)
+ return a;
+
+ return PERM_UNKNOWN;
+}
+
+/* Check to see if all loads rooted in ROOT are linear. Linearity is
+ defined as having no gaps between values loaded. */
+
+static complex_perm_kinds_t
+linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
+{
+ if (!root)
+ return PERM_UNKNOWN;
+
+ unsigned i;
+ complex_perm_kinds_t *tmp;
+
+ if ((tmp = perm_cache->get (root)) != NULL)
+ return *tmp;
+
+ complex_perm_kinds_t retval = PERM_UNKNOWN;
+ perm_cache->put (root, retval);
+
+ /* If it's a load node, then just read the load permute. */
+ if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
+ {
+ retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
+ perm_cache->put (root, retval);
+ return retval;
+ }
+ else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
+ {
+ retval = PERM_TOP;
+ perm_cache->put (root, retval);
+ return retval;
+ }
+
+ complex_perm_kinds_t kind = PERM_TOP;
+
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
+ {
+ complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
+ kind = vect_merge_perms (kind, res);
+ /* Unknown and Top are not valid on blends as they produce no permute. */
+ retval = kind;
+ if (kind == PERM_UNKNOWN || kind == PERM_TOP)
+ return retval;
+ }
+
+ retval = kind;
+
+ perm_cache->put (root, retval);
+ return retval;
+}
+
+
+/* This function attempts to make a node rooted in NODE is linear. If the node
+ if already linear than the node itself is returned in RESULT.
+
+ If the node is not linear then a new VEC_PERM_EXPR node is created with a
+ lane permute that when applied will make the node linear. If such a
+ permute cannot be created then FALSE is returned from the function.
+
+ Here linearity is defined as having a sequential, monotically increasing
+ load position inside the load permute generated by the loads reachable from
+ NODE. */
+
+static slp_tree
+vect_build_swap_evenodd_node (slp_tree node)
+{
+ /* Attempt to linearise the permute. */
+ vec<std::pair<unsigned, unsigned> > zipped;
+ zipped.create (SLP_TREE_LANES (node));
+
+ for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
+ {
+ zipped.quick_push (std::make_pair (0, x+1));
+ zipped.quick_push (std::make_pair (0, x));
+ }
+
+ /* Create the new permute node and store it instead. */
+ slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
+ SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
+ SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
+ SLP_TREE_CHILDREN (vnode).quick_push (node);
+ SLP_TREE_REF_COUNT (vnode) = 1;
+ SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
+ SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
+ SLP_TREE_REF_COUNT (node)++;
+ return vnode;
+}
+
+/* Checks to see of the expression represented by NODE is a gimple assign with
+ code CODE. */
+
+static inline bool
+vect_match_expression_p (slp_tree node, tree_code code)
+{
+ if (!node
+ || !SLP_TREE_REPRESENTATIVE (node))
+ return false;
+
+ gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
+ if (!is_gimple_assign (expr)
+ || gimple_assign_rhs_code (expr) != code)
+ return false;
+
+ return true;
+}
+
+/* Check if the given lane permute in PERMUTES matches an alternating sequence
+ of {even odd even odd ...}. This to account for unrolled loops. Further
+ mode there resulting permute must be linear. */
+
+static inline bool
+vect_check_evenodd_blend (lane_permutation_t &permutes,
+ unsigned even, unsigned odd)
+{
+ if (permutes.length () == 0
+ || permutes.length () % 2 != 0)
+ return false;
+
+ unsigned val[2] = {even, odd};
+ unsigned seed = 0;
+ for (unsigned i = 0; i < permutes.length (); i++)
+ if (permutes[i].first != val[i % 2]
+ || permutes[i].second != seed++)
+ return false;
+
+ return true;
+}
+
+/* This function will match the two gimple expressions representing NODE1 and
+ NODE2 in parallel and returns the pair operation that represents the two
+ expressions in the two statements.
+
+ If match is successful then the corresponding complex_operation is
+ returned and the arguments to the two matched operations are returned in OPS.
+
+ If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
+ from the two nodes alternatingly.
+
+ If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
+
+ e.g. the following gimple statements
+
+ stmt 0 _39 = _37 + _12;
+ stmt 1 _6 = _38 - _36;
+
+ will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
+*/
+
+static complex_operation_t
+vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
+ bool two_operands = true, vec<slp_tree> *ops = NULL)
+{
+ complex_operation_t result = CMPLX_NONE;
+
+ if (vect_match_expression_p (node1, MINUS_EXPR)
+ && vect_match_expression_p (node2, PLUS_EXPR)
+ && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
+ result = MINUS_PLUS;
+ else if (vect_match_expression_p (node1, PLUS_EXPR)
+ && vect_match_expression_p (node2, MINUS_EXPR)
+ && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
+ result = PLUS_MINUS;
+ else if (vect_match_expression_p (node1, PLUS_EXPR)
+ && vect_match_expression_p (node2, PLUS_EXPR))
+ result = PLUS_PLUS;
+ else if (vect_match_expression_p (node1, MULT_EXPR)
+ && vect_match_expression_p (node2, MULT_EXPR))
+ result = MULT_MULT;
+
+ if (result != CMPLX_NONE && ops != NULL)
+ {
+ if (two_operands)
+ {
+ auto l0node = SLP_TREE_CHILDREN (node1);
+ auto l1node = SLP_TREE_CHILDREN (node2);
+
+ /* Check if the tree is connected as we expect it. */
+ if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
+ || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
+ return CMPLX_NONE;
+ }
+ ops->safe_push (node1);
+ ops->safe_push (node2);
+ }
+ return result;
+}
+
+/* Overload of vect_detect_pair_op that matches against the representative
+ statements in the children of NODE. It is expected that NODE has exactly
+ two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
+
+static complex_operation_t
+vect_detect_pair_op (slp_tree node, bool two_operands = true,
+ vec<slp_tree> *ops = NULL)
+{
+ if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
+ return CMPLX_NONE;
+
+ if (SLP_TREE_CHILDREN (node).length () != 2)
+ return CMPLX_NONE;
+
+ vec<slp_tree> children = SLP_TREE_CHILDREN (node);
+ lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
+
+ return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
+ ops);
+}
+
+/*******************************************************************************
+ * complex_pattern class
+ ******************************************************************************/
+
+/* SLP Complex Numbers pattern matching.
+
+ As an example, the following simple loop:
+
+ double a[restrict N]; double b[restrict N]; double c[restrict N];
+
+ for (int i=0; i < N; i+=2)
+ {
+ c[i] = a[i] - b[i+1];
+ c[i+1] = a[i+1] + b[i];
+ }
+
+ which represents a complex addition on with a rotation of 90* around the
+ argand plane. i.e. if `a` and `b` were complex numbers then this would be the
+ same as `a + (b * I)`.
+
+ Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
+ both recognized in order for the pattern to work. As an SLP tree this is
+ represented as
+
+ +--------------------------------+
+ | stmt 0 *_9 = _10; |
+ | stmt 1 *_15 = _16; |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | stmt 0 _10 = _4 - _8; |
+ | stmt 1 _16 = _12 + _14; |
+ | lane permutation { 0[0] 1[1] } |
+ +--------------------------------+
+ | |
+ | |
+ | |
+ +-----+ | | +-----+
+ | | | | | |
+ +-----| { } |<-----+ +----->| { } --------+
+ | | | +------------------| | |
+ | +-----+ | +-----+ |
+ | | | |
+ | | | |
+ | +------|------------------+ |
+ | | | |
+ v v v v
+ +--------------------------+ +--------------------------------+
+ | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
+ | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
+ | load permutation { 1 0 } | | load permutation { 0 1 } |
+ +--------------------------+ +--------------------------------+
+
+ The pattern matcher allows you to replace both statements 0 and 1 or none at
+ all. Because this operation is a two operands operation the actual nodes
+ being replaced are those in the { } nodes. The actual scalar statements
+ themselves are not replaced or used during the matching but instead the
+ SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
+ replace and match on any number of nodes.
+
+ Because the pattern matcher matches on the representative statement for the
+ SLP node the case of two_operators it allows you to match the children of the
+ node. This is done using the method `recognize ()`.
+
+*/
+
+/* The complex_pattern class contains common code for pattern matchers that work
+ on complex numbers. These provide functionality to allow de-construction and
+ validation of sequences depicting/transforming REAL and IMAG pairs. */
+
+class complex_pattern : public vect_pattern
+{
+ protected:
+ auto_vec<slp_tree> m_workset;
+ complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ : vect_pattern (node, m_ops, ifn)
+ {
+ this->m_workset.safe_push (*node);
+ }
+
+ public:
+ void build (vec_info *);
+
+ static internal_fn
+ matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+ vec<slp_tree> *);
+};
+
+/* Create a replacement pattern statement for each node in m_node and inserts
+ the new statement into m_node as the new representative statement. The old
+ statement is marked as being in a pattern defined by the new statement. The
+ statement is created as call to internal function IFN with m_num_args
+ arguments.
+
+ Futhermore the new pattern is also added to the vectorization information
+ structure VINFO and the old statement STMT_INFO is marked as unused while
+ the new statement is marked as used and the number of SLP uses of the new
+ statement is incremented.
+
+ The newly created SLP nodes are marked as SLP only and will be dissolved
+ if SLP is aborted.
+
+ The newly created gimple call is returned and the BB remains unchanged.
+
+ This default method is designed to only match against simple operands where
+ all the input and output types are the same.
+*/
+
+void
+complex_pattern::build (vec_info *vinfo)
+{
+ stmt_vec_info stmt_info;
+
+ auto_vec<tree> args;
+ args.create (this->m_num_args);
+ args.quick_grow_cleared (this->m_num_args);
+ slp_tree node;
+ unsigned ix;
+ stmt_vec_info call_stmt_info;
+ gcall *call_stmt = NULL;
+
+ /* Now modify the nodes themselves. */
+ FOR_EACH_VEC_ELT (this->m_workset, ix, node)
+ {
+ /* Calculate the location of the statement in NODE to replace. */
+ stmt_info = SLP_TREE_REPRESENTATIVE (node);
+ stmt_vec_info reduc_def
+ = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
+ gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
+ tree lhs_old_stmt = gimple_get_lhs (old_stmt);
+ tree type = TREE_TYPE (lhs_old_stmt);
+
+ /* Create the argument set for use by gimple_build_call_internal_vec. */
+ for (unsigned i = 0; i < this->m_num_args; i++)
+ args[i] = lhs_old_stmt;
+
+ /* Create the new pattern statements. */
+ call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
+ tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
+ gimple_call_set_lhs (call_stmt, var);
+ gimple_set_location (call_stmt, gimple_location (old_stmt));
+ gimple_call_set_nothrow (call_stmt, true);
+
+ /* Adjust the book-keeping for the new and old statements for use during
+ SLP. This is required to get the right VF and statement during SLP
+ analysis. These changes are created after relevancy has been set for
+ the nodes as such we need to manually update them. Any changes will be
+ undone if SLP is cancelled. */
+ call_stmt_info
+ = vinfo->add_pattern_stmt (call_stmt, stmt_info);
+
+ /* Make sure to mark the representative statement pure_slp and
+ relevant and transfer reduction info. */
+ STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
+ STMT_SLP_TYPE (call_stmt_info) = pure_slp;
+ STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
+
+ gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
+ STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
+
+ /* Since we are replacing all the statements in the group with the same
+ thing it doesn't really matter. So just set it every time a new stmt
+ is created. */
+ SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
+ SLP_TREE_LANE_PERMUTATION (node).release ();
+ SLP_TREE_CODE (node) = CALL_EXPR;
+ }
+}
+
+/*******************************************************************************
+ * complex_add_pattern class
+ ******************************************************************************/
+
+class complex_add_pattern : public complex_pattern
+{
+ protected:
+ complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ : complex_pattern (node, m_ops, ifn)
+ {
+ this->m_num_args = 2;
+ }
+
+ public:
+ void build (vec_info *);
+ static internal_fn
+ matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+ vec<slp_tree> *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+
+ static vect_pattern*
+ mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ {
+ return new complex_add_pattern (node, m_ops, ifn);
+ }
+};
+
+/* Perform a replacement of the detected complex add pattern with the new
+ instruction sequences. */
+
+void
+complex_add_pattern::build (vec_info *vinfo)
+{
+ SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
+
+ slp_tree node = this->m_ops[0];
+ vec<slp_tree> children = SLP_TREE_CHILDREN (node);
+
+ /* First re-arrange the children. */
+ SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
+ SLP_TREE_CHILDREN (*this->m_node)[1] =
+ vect_build_swap_evenodd_node (children[1]);
+
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
+ vect_free_slp_tree (this->m_ops[0]);
+ vect_free_slp_tree (this->m_ops[1]);
+
+ complex_pattern::build (vinfo);
+}
+
+/* Pattern matcher for trying to match complex addition pattern in SLP tree.
+
+ If no match is found then IFN is set to IFN_LAST.
+ This function matches the patterns shaped as:
+
+ c[i] = a[i] - b[i+1];
+ c[i+1] = a[i+1] + b[i];
+
+ If a match occurred then TRUE is returned, else FALSE. The initial match is
+ expected to be in OP1 and the initial match operands in args0. */
+
+internal_fn
+complex_add_pattern::matches (complex_operation_t op,
+ slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node, vec<slp_tree> *ops)
+{
+ internal_fn ifn = IFN_LAST;
+
+ /* Find the two components. Rotation in the complex plane will modify
+ the operations:
+
+ * Rotation 0: + +
+ * Rotation 90: - +
+ * Rotation 180: - -
+ * Rotation 270: + -
+
+ Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
+ to care about them here. */
+ if (op == MINUS_PLUS)
+ ifn = IFN_COMPLEX_ADD_ROT90;
+ else if (op == PLUS_MINUS)
+ ifn = IFN_COMPLEX_ADD_ROT270;
+ else
+ return ifn;
+
+ /* verify that there is a permute, otherwise this isn't a pattern we
+ we support. */
+ gcc_assert (ops->length () == 2);
+
+ vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
+
+ /* First node must be unpermuted. */
+ if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
+ return IFN_LAST;
+
+ /* Second node must be permuted. */
+ if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
+ return IFN_LAST;
+
+ if (!vect_pattern_validate_optab (ifn, *node))
+ return IFN_LAST;
+
+ return ifn;
+}
+
+/* Attempt to recognize a complex add pattern. */
+
+vect_pattern*
+complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node)
+{
+ auto_vec<slp_tree> ops;
+ complex_operation_t op
+ = vect_detect_pair_op (*node, true, &ops);
+ internal_fn ifn
+ = complex_add_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn == IFN_LAST)
+ return NULL;
+
+ return new complex_add_pattern (node, &ops, ifn);
+}
+
+/*******************************************************************************
+ * complex_mul_pattern
+ ******************************************************************************/
+
+/* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first
+ child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
+
+ If a negate is found then the values in ARGS are reordered such that the
+ negate node is always the second one and the entry is replaced by the child
+ of the negate node. */
+
+static inline bool
+vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL)
+{
+ gcc_assert (args.length () == 2);
+ bool neg_found = false;
+
+ if (vect_match_expression_p (args[0], NEGATE_EXPR))
+ {
+ std::swap (args[0], args[1]);
+ neg_found = true;
+ if (neg_first_p)
+ *neg_first_p = true;
+ }
+ else if (vect_match_expression_p (args[1], NEGATE_EXPR))
+ {
+ neg_found = true;
+ if (neg_first_p)
+ *neg_first_p = false;
+ }
+
+ if (neg_found)
+ args[1] = SLP_TREE_CHILDREN (args[1])[0];
+
+ return neg_found;
+}
+
+/* Helper function to check if PERM is KIND or PERM_TOP. */
+
+static inline bool
+is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
+{
+ return perm == kind || perm == PERM_TOP;
+}
+
+/* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
+ nodes but also that they represent an operation that is either a complex
+ multiplication or a complex multiplication by conjugated value.
+
+ Of the negation is expected to be in the first half of the tree (As required
+ by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate
+ operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
+ second operand contains the conjugate operation. */
+
+static inline bool
+vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
+ const vec<slp_tree> &left_op,
+ const vec<slp_tree> &right_op,
+ bool neg_first, bool *conj_first_operand,
+ bool fms)
+{
+ /* The presence of a negation indicates that we have either a conjugate or a
+ rotation. We need to distinguish which one. */
+ *conj_first_operand = false;
+ complex_perm_kinds_t kind;
+
+ /* Complex conjugates have the negation on the imaginary part of the
+ number where rotations affect the real component. So check if the
+ negation is on a dup of lane 1. */
+ if (fms)
+ {
+ /* Canonicalization for fms is not consistent. So have to test both
+ variants to be sure. This needs to be fixed in the mid-end so
+ this part can be simpler. */
+ kind = linear_loads_p (perm_cache, right_op[0]);
+ if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
+ && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
+ PERM_ODDEVEN))
+ || (kind == PERM_ODDEVEN
+ && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
+ PERM_ODDODD))))
+ return false;
+ }
+ else
+ {
+ if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
+ && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
+ PERM_ODDEVEN))
+ return false;
+ }
+
+ /* Deal with differences in indexes. */
+ int index1 = fms ? 1 : 0;
+ int index2 = fms ? 0 : 1;
+
+ /* Check if the conjugate is on the second first or second operand. The
+ order of the node with the conjugate value determines this, and the dup
+ node must be one of lane 0 of the same DR as the neg node. */
+ kind = linear_loads_p (perm_cache, left_op[index1]);
+ if (kind == PERM_TOP)
+ {
+ if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
+ return true;
+ }
+ else if (kind == PERM_EVENODD && !neg_first)
+ {
+ if ((kind = linear_loads_p (perm_cache, left_op[index2])) != PERM_EVENEVEN)
+ return false;
+ return true;
+ }
+ else if (kind == PERM_EVENEVEN && neg_first)
+ {
+ if ((kind = linear_loads_p (perm_cache, left_op[index2])) != PERM_EVENODD)
+ return false;
+
+ *conj_first_operand = true;
+ return true;
+ }
+ else
+ return false;
+
+ if (kind != PERM_EVENEVEN)
+ return false;
+
+ return true;
+}
+
+/* Helper function to help distinguish between a conjugate and a rotation in a
+ complex multiplication. The operations have similar shapes but the order of
+ the load permutes are different. This function returns TRUE when the order
+ is consistent with a multiplication or multiplication by conjugated
+ operand but returns FALSE if it's a multiplication by rotated operand. */
+
+static inline bool
+vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
+ const vec<slp_tree> &op,
+ complex_perm_kinds_t permKind)
+{
+ /* The left node is the more common case, test it first. */
+ if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
+ {
+ if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
+ return false;
+ }
+ return true;
+}
+
+/* This function combines two nodes containing only even and only odd lanes
+ together into a single node which contains the nodes in even/odd order
+ by using a lane permute.
+
+ The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
+ So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
+
+ The tree REPRESENTATION is taken from the supplied REP along with the
+ vectype which must be the same between all three nodes.
+*/
+
+static slp_tree
+vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
+{
+ vec<std::pair<unsigned, unsigned> > perm;
+ perm.create (SLP_TREE_LANES (rep));
+
+ for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
+ {
+ perm.quick_push (std::make_pair (0, x));
+ perm.quick_push (std::make_pair (1, x+1));
+ }
+
+ slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
+ SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
+ SLP_TREE_LANE_PERMUTATION (vnode) = perm;
+
+ SLP_TREE_CHILDREN (vnode).create (2);
+ SLP_TREE_CHILDREN (vnode).quick_push (even);
+ SLP_TREE_CHILDREN (vnode).quick_push (odd);
+ SLP_TREE_REF_COUNT (even)++;
+ SLP_TREE_REF_COUNT (odd)++;
+ SLP_TREE_REF_COUNT (vnode) = 1;
+
+ SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
+ gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
+ /* Representation is set to that of the current node as the vectorizer
+ can't deal with VEC_PERMs with no representation, as would be the
+ case with invariants. */
+ SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
+ SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
+ return vnode;
+}
+
+class complex_mul_pattern : public complex_pattern
+{
+ protected:
+ complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ : complex_pattern (node, m_ops, ifn)
+ {
+ this->m_num_args = 2;
+ }
+
+ public:
+ void build (vec_info *);
+ static internal_fn
+ matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+ vec<slp_tree> *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+
+ static vect_pattern*
+ mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ {
+ return new complex_mul_pattern (node, m_ops, ifn);
+ }
+
+};
+
+/* Pattern matcher for trying to match complex multiply and complex multiply
+ and accumulate pattern in SLP tree. If the operation matches then IFN
+ is set to the operation it matched and the arguments to the two
+ replacement statements are put in m_ops.
+
+ If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
+
+ This function matches the patterns shaped as:
+
+ double ax = (b[i+1] * a[i]);
+ double bx = (a[i+1] * b[i]);
+
+ c[i] = c[i] - ax;
+ c[i+1] = c[i+1] + bx;
+
+ If a match occurred then TRUE is returned, else FALSE. The initial match is
+ expected to be in OP1 and the initial match operands in args0. */
+
+internal_fn
+complex_mul_pattern::matches (complex_operation_t op,
+ slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node, vec<slp_tree> *ops)
+{
+ internal_fn ifn = IFN_LAST;
+
+ if (op != MINUS_PLUS)
+ return IFN_LAST;
+
+ auto childs = *ops;
+ auto l0node = SLP_TREE_CHILDREN (childs[0]);
+
+ bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
+ bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
+ if (!mul0 && !mul1)
+ return IFN_LAST;
+
+ /* Now operand2+4 may lead to another expression. */
+ auto_vec<slp_tree> left_op, right_op;
+ slp_tree add0 = NULL;
+
+ /* Check if we may be a multiply add. */
+ if (!mul0
+ && vect_match_expression_p (l0node[0], PLUS_EXPR))
+ {
+ auto vals = SLP_TREE_CHILDREN (l0node[0]);
+ /* Check if it's a multiply, otherwise no idea what this is. */
+ if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
+ return IFN_LAST;
+
+ /* Check if the ADD is linear, otherwise it's not valid complex FMA. */
+ if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
+ return IFN_LAST;
+
+ left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
+ add0 = vals[0];
+ }
+ else
+ left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
+
+ right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
+
+ if (left_op.length () != 2
+ || right_op.length () != 2
+ || !mul0
+ || !mul1
+ || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
+ return IFN_LAST;
+
+ bool neg_first = false;
+ bool conj_first_operand = false;
+ bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
+
+ if (!is_neg)
+ {
+ /* A multiplication needs to multiply agains the real pair, otherwise
+ the pattern matches that of FMS. */
+ if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
+ || vect_normalize_conj_loc (left_op))
+ return IFN_LAST;
+ if (add0)
+ ifn = IFN_COMPLEX_FMA;
+ else
+ ifn = IFN_COMPLEX_MUL;
+ }
+ else
+ {
+ if (!vect_validate_multiplication (perm_cache, left_op, right_op,
+ neg_first, &conj_first_operand,
+ false))
+ return IFN_LAST;
+
+ if(add0)
+ ifn = IFN_COMPLEX_FMA_CONJ;
+ else
+ ifn = IFN_COMPLEX_MUL_CONJ;
+ }
+
+ if (!vect_pattern_validate_optab (ifn, *node))
+ return IFN_LAST;
+
+ ops->truncate (0);
+ ops->create (add0 ? 4 : 3);
+
+ if (add0)
+ ops->quick_push (add0);
+
+ complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
+ if (kind == PERM_EVENODD)
+ {
+ ops->quick_push (left_op[1]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (left_op[0]);
+ }
+ else if (kind == PERM_TOP)
+ {
+ ops->quick_push (left_op[1]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (left_op[0]);
+ }
+ else if (kind == PERM_EVENEVEN && !conj_first_operand)
+ {
+ ops->quick_push (left_op[0]);
+ ops->quick_push (right_op[0]);
+ ops->quick_push (left_op[1]);
+ }
+ else
+ {
+ ops->quick_push (left_op[0]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (left_op[1]);
+ }
+
+ return ifn;
+}
+
+/* Attempt to recognize a complex mul pattern. */
+
+vect_pattern*
+complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node)
+{
+ auto_vec<slp_tree> ops;
+ complex_operation_t op
+ = vect_detect_pair_op (*node, true, &ops);
+ internal_fn ifn
+ = complex_mul_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn == IFN_LAST)
+ return NULL;
+
+ return new complex_mul_pattern (node, &ops, ifn);
+}
+
+/* Perform a replacement of the detected complex mul pattern with the new
+ instruction sequences. */
+
+void
+complex_mul_pattern::build (vec_info *vinfo)
+{
+ slp_tree node;
+ unsigned i;
+ switch (this->m_ifn)
+ {
+ case IFN_COMPLEX_MUL:
+ case IFN_COMPLEX_MUL_CONJ:
+ {
+ slp_tree newnode
+ = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
+ *this->m_node);
+ SLP_TREE_REF_COUNT (this->m_ops[2])++;
+
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
+ vect_free_slp_tree (node);
+
+ /* First re-arrange the children. */
+ SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
+ SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
+ SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
+ break;
+ }
+ case IFN_COMPLEX_FMA:
+ case IFN_COMPLEX_FMA_CONJ:
+ {
+ SLP_TREE_REF_COUNT (this->m_ops[0])++;
+ slp_tree newnode
+ = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
+ *this->m_node);
+ SLP_TREE_REF_COUNT (this->m_ops[3])++;
+
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
+ vect_free_slp_tree (node);
+
+ /* First re-arrange the children. */
+ SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
+ SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[0];
+ SLP_TREE_CHILDREN (*this->m_node)[1] = this->m_ops[3];
+ SLP_TREE_CHILDREN (*this->m_node)[2] = newnode;
+
+ /* Tell the builder to expect an extra argument. */
+ this->m_num_args++;
+ break;
+ }
+ default:
+ gcc_unreachable ();
+ }
+
+ /* And then rewrite the node itself. */
+ complex_pattern::build (vinfo);
+}
+
+/*******************************************************************************
+ * complex_fms_pattern class
+ ******************************************************************************/
+
+class complex_fms_pattern : public complex_pattern
+{
+ protected:
+ complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ : complex_pattern (node, m_ops, ifn)
+ {
+ this->m_num_args = 3;
+ }
+
+ public:
+ void build (vec_info *);
+ static internal_fn
+ matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+ vec<slp_tree> *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+
+ static vect_pattern*
+ mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
+ {
+ return new complex_fms_pattern (node, m_ops, ifn);
+ }
+};
+
+
+/* Pattern matcher for trying to match complex multiply and subtract pattern
+ in SLP tree. If the operation matches then IFN is set to the operation
+ it matched and the arguments to the two replacement statements are put in
+ m_ops.
+
+ If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
+
+ This function matches the patterns shaped as:
+
+ double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
+ double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
+
+ c[i] = c[i] - ax;
+ c[i+1] = c[i+1] + bx;
+
+ If a match occurred then TRUE is returned, else FALSE. The initial match is
+ expected to be in OP1 and the initial match operands in args0. */
+
+internal_fn
+complex_fms_pattern::matches (complex_operation_t op,
+ slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree * ref_node, vec<slp_tree> *ops)
+{
+ internal_fn ifn = IFN_LAST;
+
+ /* We need to ignore the two_operands nodes that may also match,
+ for that we can check if they have any scalar statements and also
+ check that it's not a permute node as we're looking for a normal
+ MINUS_EXPR operation. */
+ if (op != CMPLX_NONE)
+ return IFN_LAST;
+
+ slp_tree root = *ref_node;
+ if (!vect_match_expression_p (root, MINUS_EXPR))
+ return IFN_LAST;
+
+ auto nodes = SLP_TREE_CHILDREN (root);
+ if (!vect_match_expression_p (nodes[1], MULT_EXPR)
+ || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
+ return IFN_LAST;
+
+ auto childs = SLP_TREE_CHILDREN (nodes[0]);
+ auto l0node = SLP_TREE_CHILDREN (childs[0]);
+
+ /* Now operand2+4 may lead to another expression. */
+ auto_vec<slp_tree> left_op, right_op;
+ left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
+ right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
+
+ /* If these nodes don't have any children then they're
+ not ones we're interested in. */
+ if (left_op.length () != 2
+ || right_op.length () != 2
+ || !vect_match_expression_p (l0node[1], MULT_EXPR))
+ return IFN_LAST;
+
+ bool is_neg = vect_normalize_conj_loc (left_op);
+
+ bool conj_first_operand = false;
+ if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
+ &conj_first_operand, true))
+ return IFN_LAST;
+
+ if (!is_neg)
+ ifn = IFN_COMPLEX_FMS;
+ else if (is_neg)
+ ifn = IFN_COMPLEX_FMS_CONJ;
+
+ if (!vect_pattern_validate_optab (ifn, *ref_node))
+ return IFN_LAST;
+
+ ops->truncate (0);
+ ops->create (4);
+
+ complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
+ if (kind == PERM_EVENODD)
+ {
+ ops->quick_push (l0node[0]);
+ ops->quick_push (right_op[0]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (left_op[1]);
+ }
+ else if (kind == PERM_TOP)
+ {
+ ops->quick_push (l0node[0]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (right_op[0]);
+ ops->quick_push (left_op[0]);
+ }
+ else if (kind == PERM_EVENEVEN && !is_neg)
+ {
+ ops->quick_push (l0node[0]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (right_op[0]);
+ ops->quick_push (left_op[0]);
+ }
+ else
+ {
+ ops->quick_push (l0node[0]);
+ ops->quick_push (right_op[1]);
+ ops->quick_push (right_op[0]);
+ ops->quick_push (left_op[1]);
+ }
+
+ return ifn;
+}
+
+/* Attempt to recognize a complex mul pattern. */
+
+vect_pattern*
+complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node)
+{
+ auto_vec<slp_tree> ops;
+ complex_operation_t op
+ = vect_detect_pair_op (*node, true, &ops);
+ internal_fn ifn
+ = complex_fms_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn == IFN_LAST)
+ return NULL;
+
+ return new complex_fms_pattern (node, &ops, ifn);
+}
+
+/* Perform a replacement of the detected complex mul pattern with the new
+ instruction sequences. */
+
+void
+complex_fms_pattern::build (vec_info *vinfo)
+{
+ slp_tree node;
+ unsigned i;
+ slp_tree newnode =
+ vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
+ SLP_TREE_REF_COUNT (this->m_ops[0])++;
+ SLP_TREE_REF_COUNT (this->m_ops[1])++;
+
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
+ vect_free_slp_tree (node);
+
+ SLP_TREE_CHILDREN (*this->m_node).release ();
+ SLP_TREE_CHILDREN (*this->m_node).create (3);
+
+ /* First re-arrange the children. */
+ SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
+ SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
+ SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
+
+ /* And then rewrite the node itself. */
+ complex_pattern::build (vinfo);
+}
+
+/*******************************************************************************
+ * complex_operations_pattern class
+ ******************************************************************************/
+
+/* This function combines all the existing pattern matchers above into one class
+ that shares the functionality between them. The initial match is shared
+ between all complex operations. */
+
+class complex_operations_pattern : public complex_pattern
+{
+ protected:
+ complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
+ internal_fn ifn)
+ : complex_pattern (node, m_ops, ifn)
+ {
+ this->m_num_args = 0;
+ }
+
+ public:
+ void build (vec_info *);
+ static internal_fn
+ matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
+ vec<slp_tree> *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+};
+
+/* Dummy matches implementation for proxy object. */
+
+internal_fn
+complex_operations_pattern::
+matches (complex_operation_t /* op */,
+ slp_tree_to_load_perm_map_t * /* perm_cache */,
+ slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
+{
+ return IFN_LAST;
+}
+
+/* Attempt to recognize a complex mul pattern. */
+
+vect_pattern*
+complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
+ slp_tree *node)
+{
+ auto_vec<slp_tree> ops;
+ complex_operation_t op
+ = vect_detect_pair_op (*node, true, &ops);
+ internal_fn ifn = IFN_LAST;
+
+ ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn != IFN_LAST)
+ return complex_fms_pattern::mkInstance (node, &ops, ifn);
+
+ ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn != IFN_LAST)
+ return complex_mul_pattern::mkInstance (node, &ops, ifn);
+
+ ifn = complex_add_pattern::matches (op, perm_cache, node, &ops);
+ if (ifn != IFN_LAST)
+ return complex_add_pattern::mkInstance (node, &ops, ifn);
+
+ return NULL;
+}
+
+/* Dummy implementation of build. */
+
+void
+complex_operations_pattern::build (vec_info * /* vinfo */)
+{
+ gcc_unreachable ();
+}
+
+
+/* The addsub_pattern. */
+
+class addsub_pattern : public vect_pattern
+{
+ public:
+ addsub_pattern (slp_tree *node, internal_fn ifn)
+ : vect_pattern (node, NULL, ifn) {};
+
+ void build (vec_info *);
+
+ static vect_pattern*
+ recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
+};
+
+vect_pattern *
+addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_)
+{
+ slp_tree node = *node_;
+ if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+ || SLP_TREE_CHILDREN (node).length () != 2
+ || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
+ return NULL;
+
+ /* Match a blend of a plus and a minus op with the same number of plus and
+ minus lanes on the same operands. */
+ unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
+ unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
+ if (l0 == l1)
+ return NULL;
+ bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
+ PLUS_EXPR);
+ if (!l0add_p
+ && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
+ return NULL;
+ bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
+ PLUS_EXPR);
+ if (!l1add_p
+ && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
+ return NULL;
+
+ slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
+ slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
+ if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
+ && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
+ || (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
+ && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
+ return NULL;
+
+ for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
+ {
+ std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
+ /* It has to be alternating -, +, -,
+ While we could permute the .ADDSUB inputs and the .ADDSUB output
+ that's only profitable over the add + sub + blend if at least
+ one of the permute is optimized which we can't determine here. */
+ if (perm.first != ((i & 1) ? l1 : l0)
+ || perm.second != i)
+ return NULL;
+ }
+
+ /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
+ (l0add_p), see whether we have FMA variants. */
+ if (!l0add_p
+ && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
+ {
+ /* (c * d) -+ a */
+ if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
+ return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
+ }
+ else if (l0add_p
+ && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
+ {
+ /* (c * d) +- a */
+ if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
+ return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
+ }
+
+ if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
+ return new addsub_pattern (node_, IFN_VEC_ADDSUB);
+
+ return NULL;
+}
+
+void
+addsub_pattern::build (vec_info *vinfo)
+{
+ slp_tree node = *m_node;
+
+ unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
+ unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
+
+ switch (m_ifn)
+ {
+ case IFN_VEC_ADDSUB:
+ {
+ slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
+ slp_tree add = SLP_TREE_CHILDREN (node)[l1];
+
+ /* Modify the blend node in-place. */
+ SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
+ SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
+
+ /* Build IFN_VEC_ADDSUB from the sub representative operands. */
+ stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
+ gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
+ gimple_assign_rhs1 (rep->stmt),
+ gimple_assign_rhs2 (rep->stmt));
+ gimple_call_set_lhs (call, make_ssa_name
+ (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
+ gimple_call_set_nothrow (call, true);
+ gimple_set_bb (call, gimple_bb (rep->stmt));
+ stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
+ SLP_TREE_REPRESENTATIVE (node) = new_rep;
+ STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
+ STMT_SLP_TYPE (new_rep) = pure_slp;
+ STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
+ STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
+ SLP_TREE_CODE (node) = ERROR_MARK;
+ SLP_TREE_LANE_PERMUTATION (node).release ();
+
+ vect_free_slp_tree (sub);
+ vect_free_slp_tree (add);
+ break;
+ }
+ case IFN_VEC_FMADDSUB:
+ case IFN_VEC_FMSUBADD:
+ {
+ slp_tree sub, add;
+ if (m_ifn == IFN_VEC_FMADDSUB)
+ {
+ sub = SLP_TREE_CHILDREN (node)[l0];
+ add = SLP_TREE_CHILDREN (node)[l1];
+ }
+ else /* m_ifn == IFN_VEC_FMSUBADD */
+ {
+ sub = SLP_TREE_CHILDREN (node)[l1];
+ add = SLP_TREE_CHILDREN (node)[l0];
+ }
+ slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
+ /* Modify the blend node in-place. */
+ SLP_TREE_CHILDREN (node).safe_grow (3, true);
+ SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
+ SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
+ SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
+ SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
+
+ /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands. */
+ stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
+ stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
+ gcall *call = gimple_build_call_internal (m_ifn, 3,
+ gimple_assign_rhs1 (mrep->stmt),
+ gimple_assign_rhs2 (mrep->stmt),
+ gimple_assign_rhs2 (srep->stmt));
+ gimple_call_set_lhs (call, make_ssa_name
+ (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
+ gimple_call_set_nothrow (call, true);
+ gimple_set_bb (call, gimple_bb (srep->stmt));
+ stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
+ SLP_TREE_REPRESENTATIVE (node) = new_rep;
+ STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
+ STMT_SLP_TYPE (new_rep) = pure_slp;
+ STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
+ STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
+ SLP_TREE_CODE (node) = ERROR_MARK;
+ SLP_TREE_LANE_PERMUTATION (node).release ();
+
+ vect_free_slp_tree (sub);
+ vect_free_slp_tree (add);
+ break;
+ }
+ default:;
+ }
+}
+
+/*******************************************************************************
+ * Pattern matching definitions
+ ******************************************************************************/
+
+#define SLP_PATTERN(x) &x::recognize
+vect_pattern_decl_t slp_patterns[]
+{
+ /* For least amount of back-tracking and more efficient matching
+ order patterns from the largest to the smallest. Especially if they
+ overlap in what they can detect. */
+
+ SLP_PATTERN (complex_operations_pattern),
+ SLP_PATTERN (addsub_pattern)
+};
+#undef SLP_PATTERN
+
+/* Set the number of SLP pattern matchers available. */
+size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);