aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-03-25 14:42:49 +0100
committerRichard Biener <rguenther@suse.de>2020-06-18 12:05:37 +0200
commitda2b7c7f0a136b4d00520a08d4c443fc2e3a467d (patch)
tree93af9b90f477e9e91da6c5a873dc5286340212bb /gcc/tree-vectorizer.c
parent2ab42c70a62fe10f40a623adf48002ac8cdb9bf8 (diff)
downloadgcc-da2b7c7f0a136b4d00520a08d4c443fc2e3a467d.zip
gcc-da2b7c7f0a136b4d00520a08d4c443fc2e3a467d.tar.gz
gcc-da2b7c7f0a136b4d00520a08d4c443fc2e3a467d.tar.bz2
remove SLP_TREE_TWO_OPERATORS, add SLP permutation node
This removes the SLP_TREE_TWO_OPERATORS hack in favor of having explicit SLP nodes for both computations and the blend operation. For this introduce a generic merge + select + permute SLP node (with implementation limits). Building upon earlier patches it adds vect_stmt_dominates_stmt_p and the ability to compute a vector insertion place from vectorized stmts (which now have UID zero) as needed for the permute node. 2020-06-17 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (_slp_tree::two_operators): Remove. (_slp_tree::lane_permutation): New member. (_slp_tree::code): Likewise. (SLP_TREE_TWO_OPERATORS): Remove. (SLP_TREE_LANE_PERMUTATION): New. (SLP_TREE_CODE): Likewise. (vect_stmt_dominates_stmt_p): Declare. * tree-vectorizer.c (vect_stmt_dominates_stmt_p): New function. * tree-vect-stmts.c (vect_model_simple_cost): Remove SLP_TREE_TWO_OPERATORS handling. * tree-vect-slp.c (_slp_tree::_slp_tree): Amend. (_slp_tree::~_slp_tree): Likewise. (vect_two_operations_perm_ok_p): Remove. (vect_build_slp_tree_1): Remove verification of two-operator permutation here. (vect_build_slp_tree_2): When we have two different operators build two computation SLP nodes and a blend. (vect_print_slp_tree): Print the lane permutation if it exists. (slp_copy_subtree): Copy it. (vect_slp_rearrange_stmts): Re-arrange it. (vect_slp_analyze_node_operations_1): Handle SLP_TREE_CODE VEC_PERM_EXPR explicitely. (vect_schedule_slp_instance): Likewise. Remove old SLP_TREE_TWO_OPERATORS code. (vectorizable_slp_permutation): New function.
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 76cfba5..e262ba0 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -711,6 +711,63 @@ vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
free (stmt_info);
}
+/* Returns true if S1 dominates S2. */
+
+bool
+vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
+{
+ basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
+
+ /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
+ SSA_NAME. Assume it lives at the beginning of function and
+ thus dominates everything. */
+ if (!bb1 || s1 == s2)
+ return true;
+
+ /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
+ if (!bb2)
+ return false;
+
+ if (bb1 != bb2)
+ return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
+
+ /* PHIs in the same basic block are assumed to be
+ executed all in parallel, if only one stmt is a PHI,
+ it dominates the other stmt in the same basic block. */
+ if (gimple_code (s1) == GIMPLE_PHI)
+ return true;
+
+ if (gimple_code (s2) == GIMPLE_PHI)
+ return false;
+
+ /* Inserted vectorized stmts all have UID 0 while the original stmts
+ in the IL have UID increasing within a BB. Walk from both sides
+ until we find the other stmt or a stmt with UID != 0. */
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
+ while (gimple_uid (gsi_stmt (gsi1)) == 0)
+ {
+ gsi_next (&gsi1);
+ if (gsi_end_p (gsi1))
+ return false;
+ if (gsi_stmt (gsi1) == s2)
+ return true;
+ }
+
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
+ while (gimple_uid (gsi_stmt (gsi2)) == 0)
+ {
+ gsi_prev (&gsi2);
+ if (gsi_end_p (gsi2))
+ return false;
+ if (gsi_stmt (gsi2) == s1)
+ return true;
+ }
+
+ if (gimple_uid (gsi_stmt (gsi1)) <= gimple_uid (gsi_stmt (gsi2)))
+ return true;
+ return false;
+}
+
/* A helper function to free scev and LOOP niter information, as well as
clear loop constraint LOOP_C_FINITE. */