aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-01-14 20:50:57 +0000
committerTamar Christina <tamar.christina@arm.com>2021-01-14 20:54:31 +0000
commit500600c78408ab4c6450c7eeaea2e3657e305ed2 (patch)
treed8f89de744aebd6b685529596e11a49bbaa65070 /gcc
parentfe70119531395c8105b098bfa915c003c78be038 (diff)
downloadgcc-500600c78408ab4c6450c7eeaea2e3657e305ed2.zip
gcc-500600c78408ab4c6450c7eeaea2e3657e305ed2.tar.gz
gcc-500600c78408ab4c6450c7eeaea2e3657e305ed2.tar.bz2
slp: Support optimizing load distribution
This introduces a post processing step for the pattern matcher to flatten permutes introduced by the complex multiplications patterns. This performs a blend early such that SLP is not cancelled by the LOAD_LANES permute. This is a temporary workaround to the fact that loads are not CSEd during building and is required to produce efficient code. gcc/ChangeLog: * tree-vect-slp.c (optimize_load_redistribution_1): New. (optimize_load_redistribution, vect_is_slp_load_node): New. (vect_match_slp_patterns): Use it.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/tree-vect-slp.c154
1 files changed, 136 insertions, 18 deletions
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index f7f656a..6b6c9cc 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2258,6 +2258,123 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
return exact_div (common_multiple (nunits, group_size), group_size);
}
+/* Helper that checks to see if a node is a load node. */
+
+static inline bool
+vect_is_slp_load_node (slp_tree root)
+{
+ return SLP_TREE_DEF_TYPE (root) == vect_internal_def
+ && STMT_VINFO_GROUPED_ACCESS (SLP_TREE_REPRESENTATIVE (root))
+ && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root)));
+}
+
+
+/* Helper function of optimize_load_redistribution that performs the operation
+ recursively. */
+
+static slp_tree
+optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
+ vec_info *vinfo, unsigned int group_size,
+ hash_map<slp_tree, slp_tree> *load_map,
+ slp_tree root)
+{
+ if (slp_tree *leader = load_map->get (root))
+ return *leader;
+
+ load_map->put (root, NULL);
+
+ slp_tree node;
+ unsigned i;
+
+ /* For now, we don't know anything about externals so do not do anything. */
+ if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
+ return NULL;
+ else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
+ {
+ /* First convert this node into a load node and add it to the leaves
+ list and flatten the permute from a lane to a load one. If it's
+ unneeded it will be elided later. */
+ vec<stmt_vec_info> stmts;
+ stmts.create (SLP_TREE_LANES (root));
+ lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
+ for (unsigned j = 0; j < lane_perm.length (); j++)
+ {
+ std::pair<unsigned, unsigned> perm = lane_perm[j];
+ node = SLP_TREE_CHILDREN (root)[perm.first];
+
+ if (!vect_is_slp_load_node (node)
+ || SLP_TREE_CHILDREN (node).exists ())
+ {
+ stmts.release ();
+ goto next;
+ }
+
+ stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "converting stmts on permute node %p\n", root);
+
+ bool *matches = XALLOCAVEC (bool, group_size);
+ poly_uint64 max_nunits = 1;
+ unsigned tree_size = 0, limit = 1;
+ node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits,
+ matches, &limit, &tree_size, bst_map);
+ if (!node)
+ stmts.release ();
+
+ load_map->put (root, node);
+ return node;
+ }
+
+next:
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
+ {
+ slp_tree value
+ = optimize_load_redistribution_1 (bst_map, vinfo, group_size, load_map,
+ node);
+ if (value)
+ {
+ SLP_TREE_REF_COUNT (value)++;
+ SLP_TREE_CHILDREN (root)[i] = value;
+ vect_free_slp_tree (node);
+ }
+ }
+
+ return NULL;
+}
+
+/* Temporary workaround for loads not being CSEd during SLP build. This
+ function will traverse the SLP tree rooted in ROOT for INSTANCE and find
+ VEC_PERM nodes that blend vectors from multiple nodes that all read from the
+ same DR such that the final operation is equal to a permuted load. Such
+ NODES are then directly converted into LOADS themselves. The nodes are
+ CSEd using BST_MAP. */
+
+static void
+optimize_load_redistribution (scalar_stmts_to_slp_tree_map_t *bst_map,
+ vec_info *vinfo, unsigned int group_size,
+ hash_map<slp_tree, slp_tree> *load_map,
+ slp_tree root)
+{
+ slp_tree node;
+ unsigned i;
+
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
+ {
+ slp_tree value
+ = optimize_load_redistribution_1 (bst_map, vinfo, group_size, load_map,
+ node);
+ if (value)
+ {
+ SLP_TREE_REF_COUNT (value)++;
+ SLP_TREE_CHILDREN (root)[i] = value;
+ vect_free_slp_tree (node);
+ }
+ }
+}
+
/* Helper function of vect_match_slp_patterns.
Attempts to match patterns against the slp tree rooted in REF_NODE using
@@ -2305,8 +2422,7 @@ vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
static bool
vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
hash_set<slp_tree> *visited,
- slp_tree_to_load_perm_map_t *perm_cache,
- scalar_stmts_to_slp_tree_map_t * /* bst_map */)
+ slp_tree_to_load_perm_map_t *perm_cache)
{
DUMP_VECT_SCOPE ("vect_match_slp_patterns");
slp_tree *ref_node = &SLP_INSTANCE_TREE (instance);
@@ -2316,20 +2432,7 @@ vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
"Analyzing SLP tree %p for patterns\n",
SLP_INSTANCE_TREE (instance));
- bool found_p
- = vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
-
- if (found_p)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Pattern matched SLP tree\n");
- vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node);
- }
- }
-
- return found_p;
+ return vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
}
/* Analyze an SLP instance starting from a group of grouped stores. Call
@@ -2768,10 +2871,25 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
hash_set<slp_tree> visited_patterns;
slp_tree_to_load_perm_map_t perm_cache;
+ hash_map<slp_tree, slp_tree> load_map;
+
/* See if any patterns can be found in the SLP tree. */
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance)
- vect_match_slp_patterns (instance, vinfo, &visited_patterns, &perm_cache,
- bst_map);
+ if (vect_match_slp_patterns (instance, vinfo, &visited_patterns,
+ &perm_cache))
+ {
+ slp_tree root = SLP_INSTANCE_TREE (instance);
+ optimize_load_redistribution (bst_map, vinfo, SLP_TREE_LANES (root),
+ &load_map, root);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Pattern matched SLP tree\n");
+ vect_print_slp_graph (MSG_NOTE, vect_location, root);
+ }
+ }
+
+
/* The map keeps a reference on SLP nodes built, release that. */
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();