aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-12-03 10:25:14 +0100
committerRichard Biener <rguenther@suse.de>2020-12-07 12:07:12 +0100
commitebdfd1606da6b5aa586b0cd156b69b659235c9c2 (patch)
tree5a5649a6f9dd5d39848c76e182968f356e2d32c0
parentcdcbef3c3310a14f2994982b44cb1f8e14c77232 (diff)
downloadgcc-ebdfd1606da6b5aa586b0cd156b69b659235c9c2.zip
gcc-ebdfd1606da6b5aa586b0cd156b69b659235c9c2.tar.gz
gcc-ebdfd1606da6b5aa586b0cd156b69b659235c9c2.tar.bz2
tree-optimization/98113 - vectorize a sequence of BIT_INSERT_EXPRs
This adds the capability to handle a sequence of vector BIT_INSERT_EXPRs to be vectorized similar as to how we vectorize vector constructors. 2020-12-03 Richard Biener <rguenther@suse.de> PR tree-optimization/98113 * tree-vectorizer.h (struct slp_root): New. (_bb_vec_info::roots): New member. * tree-vect-slp.c (vect_analyze_slp): Also walk BB info roots. (_bb_vec_info::_bb_vec_info): Adjust. (_bb_vec_info::~_bb_vec_info): Likewise. (vld_cmp): New. (vect_slp_is_lane_insert): Likewise. (vect_slp_check_for_constructors): Match a series of BIT_INSERT_EXPRs as vector constructor. (vect_slp_analyze_bb_1): Continue if BB info roots is not empty. (vect_slp_analyze_bb_1): Mark the whole BIT_INSERT_EXPR root sequence as pure_slp. * gcc.dg/vect/bb-slp-70.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-70.c17
-rw-r--r--gcc/tree-vect-slp.c192
-rw-r--r--gcc/tree-vectorizer.h12
3 files changed, 200 insertions, 21 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-70.c b/gcc/testsuite/gcc.dg/vect/bb-slp-70.c
new file mode 100644
index 0000000..0eb7011
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-70.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx512vl -mavx512vpopcntdq" { target avx512vpopcntdq } } */
+
+typedef unsigned uv4si __attribute__((vector_size(16)));
+
+uv4si __attribute__((noinline))
+vpopctf (uv4si a)
+{
+ uv4si r;
+ r[2] = __builtin_popcount (a[2]);
+ r[1] = __builtin_popcount (a[1]);
+ r[0] = __builtin_popcount (a[0]);
+ r[3] = __builtin_popcount (a[3]);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" { target avx512vpopcntdq } } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 3bd40cb..2dccca0 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2578,6 +2578,19 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
? slp_inst_kind_store : slp_inst_kind_ctor,
max_tree_size);
+ if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
+ {
+ for (unsigned i = 0; i < bb_vinfo->roots.length (); ++i)
+ {
+ vect_location = bb_vinfo->roots[i].root->stmt;
+ if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
+ bb_vinfo->roots[i].stmts,
+ bb_vinfo->roots[i].root,
+ max_tree_size, bst_map, NULL))
+ bb_vinfo->roots[i].stmts = vNULL;
+ }
+ }
+
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
{
/* Find SLP sequences starting from reduction chains. */
@@ -3336,7 +3349,7 @@ vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
/* Initialize a bb_vec_info struct for the statements in BBS basic blocks. */
_bb_vec_info::_bb_vec_info (vec<basic_block> _bbs, vec_info_shared *shared)
- : vec_info (vec_info::bb, init_cost (NULL), shared), bbs (_bbs)
+ : vec_info (vec_info::bb, init_cost (NULL), shared), bbs (_bbs), roots (vNULL)
{
for (unsigned i = 0; i < bbs.length (); ++i)
{
@@ -3383,6 +3396,10 @@ _bb_vec_info::~_bb_vec_info ()
gimple_set_uid (stmt, -1);
}
}
+
+ for (unsigned i = 0; i < roots.length (); ++i)
+ roots[i].stmts.release ();
+ roots.release ();
}
/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
@@ -4105,6 +4122,38 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
return true;
}
+/* qsort comparator for lane defs. */
+
+static int
+vld_cmp (const void *a_, const void *b_)
+{
+ auto *a = (const std::pair<unsigned, tree> *)a_;
+ auto *b = (const std::pair<unsigned, tree> *)b_;
+ return a->first - b->first;
+}
+
+/* Return true if USE_STMT is a vector lane insert into VEC and set
+ *THIS_LANE to the lane number that is set. */
+
+static bool
+vect_slp_is_lane_insert (gimple *use_stmt, tree vec, unsigned *this_lane)
+{
+ gassign *use_ass = dyn_cast <gassign *> (use_stmt);
+ if (!use_ass
+ || gimple_assign_rhs_code (use_ass) != BIT_INSERT_EXPR
+ || (vec
+ ? gimple_assign_rhs1 (use_ass) != vec
+ : ((vec = gimple_assign_rhs1 (use_ass)), false))
+ || !useless_type_conversion_p (TREE_TYPE (TREE_TYPE (vec)),
+ TREE_TYPE (gimple_assign_rhs2 (use_ass)))
+ || !constant_multiple_p
+ (tree_to_poly_uint64 (gimple_assign_rhs3 (use_ass)),
+ tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (TREE_TYPE (vec)))),
+ this_lane))
+ return false;
+ return true;
+}
+
/* Find any vectorizable constructors and add them to the grouped_store
array. */
@@ -4116,28 +4165,114 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
!gsi_end_p (gsi); gsi_next (&gsi))
{
gassign *assign = dyn_cast<gassign *> (gsi_stmt (gsi));
- if (!assign || gimple_assign_rhs_code (assign) != CONSTRUCTOR)
+ if (!assign)
continue;
tree rhs = gimple_assign_rhs1 (assign);
- if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
- || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
- CONSTRUCTOR_NELTS (rhs))
- || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
- || uniform_vector_p (rhs))
- continue;
+ if (gimple_assign_rhs_code (assign) == CONSTRUCTOR)
+ {
+ if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
+ CONSTRUCTOR_NELTS (rhs))
+ || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
+ || uniform_vector_p (rhs))
+ continue;
- unsigned j;
- tree val;
- FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), j, val)
- if (TREE_CODE (val) != SSA_NAME
- || !bb_vinfo->lookup_def (val))
- break;
- if (j != CONSTRUCTOR_NELTS (rhs))
- continue;
+ unsigned j;
+ tree val;
+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), j, val)
+ if (TREE_CODE (val) != SSA_NAME
+ || !bb_vinfo->lookup_def (val))
+ break;
+ if (j != CONSTRUCTOR_NELTS (rhs))
+ continue;
- stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (assign);
- BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
+ stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (assign);
+ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
+ }
+ else if (gimple_assign_rhs_code (assign) == BIT_INSERT_EXPR
+ && VECTOR_TYPE_P (TREE_TYPE (rhs))
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)).is_constant ()
+ && integer_zerop (gimple_assign_rhs3 (assign))
+ && useless_type_conversion_p
+ (TREE_TYPE (TREE_TYPE (rhs)),
+ TREE_TYPE (gimple_assign_rhs2 (assign))))
+ {
+ /* We start to match on insert to lane zero but since the
+ inserts need not be ordered we'd have to search both
+ the def and the use chains. */
+ tree vectype = TREE_TYPE (rhs);
+ unsigned nlanes = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
+ auto_vec<std::pair<unsigned, tree> > lane_defs (nlanes);
+ auto_sbitmap lanes (nlanes);
+ bitmap_clear (lanes);
+ bitmap_set_bit (lanes, 0);
+ tree def = gimple_assign_lhs (assign);
+ lane_defs.quick_push
+ (std::make_pair (0, gimple_assign_rhs2 (assign)));
+ unsigned lanes_found = 1;
+ /* Start with the use chains, the last stmt will be the root. */
+ stmt_vec_info last = bb_vinfo->lookup_stmt (assign);
+ do
+ {
+ use_operand_p use_p;
+ gimple *use_stmt;
+ if (!single_imm_use (def, &use_p, &use_stmt))
+ break;
+ unsigned this_lane;
+ if (!bb_vinfo->lookup_stmt (use_stmt)
+ || !vect_slp_is_lane_insert (use_stmt, def, &this_lane)
+ || !bb_vinfo->lookup_def (gimple_assign_rhs2 (use_stmt)))
+ break;
+ if (bitmap_bit_p (lanes, this_lane))
+ break;
+ lanes_found++;
+ bitmap_set_bit (lanes, this_lane);
+ gassign *use_ass = as_a <gassign *> (use_stmt);
+ lane_defs.quick_push (std::make_pair
+ (this_lane, gimple_assign_rhs2 (use_ass)));
+ last = bb_vinfo->lookup_stmt (use_ass);
+ def = gimple_assign_lhs (use_ass);
+ }
+ while (lanes_found < nlanes);
+ if (lanes_found < nlanes)
+ {
+ /* Now search the def chain. */
+ def = gimple_assign_rhs1 (assign);
+ do
+ {
+ if (!has_single_use (def))
+ break;
+ gimple *def_stmt = SSA_NAME_DEF_STMT (def);
+ unsigned this_lane;
+ if (!bb_vinfo->lookup_stmt (def_stmt)
+ || !vect_slp_is_lane_insert (def_stmt,
+ NULL_TREE, &this_lane)
+ || !bb_vinfo->lookup_def (gimple_assign_rhs2 (def_stmt)))
+ break;
+ if (bitmap_bit_p (lanes, this_lane))
+ break;
+ lanes_found++;
+ bitmap_set_bit (lanes, this_lane);
+ lane_defs.quick_push (std::make_pair
+ (this_lane,
+ gimple_assign_rhs2 (def_stmt)));
+ def = gimple_assign_rhs1 (def_stmt);
+ }
+ while (lanes_found < nlanes);
+ }
+ if (lanes_found == nlanes)
+ {
+ /* Sort lane_defs after the lane index and register the root. */
+ lane_defs.qsort (vld_cmp);
+ vec<stmt_vec_info> stmts;
+ stmts.create (nlanes);
+ for (unsigned i = 0; i < nlanes; ++i)
+ stmts.quick_push (bb_vinfo->lookup_def (lane_defs[i].second));
+ bb_vinfo->roots.safe_push (slp_root (slp_inst_kind_ctor,
+ stmts, last));
+ }
+ }
}
}
@@ -4227,7 +4362,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
/* If there are no grouped stores and no constructors in the region
there is no need to continue with pattern recog as vect_analyze_slp
will fail anyway. */
- if (bb_vinfo->grouped_stores.is_empty ())
+ if (bb_vinfo->grouped_stores.is_empty ()
+ && bb_vinfo->roots.is_empty ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4290,8 +4426,22 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
relevant. */
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
- if (SLP_INSTANCE_ROOT_STMT (instance))
- STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
+ if (stmt_vec_info root = SLP_INSTANCE_ROOT_STMT (instance))
+ {
+ STMT_SLP_TYPE (root) = pure_slp;
+ if (is_gimple_assign (root->stmt)
+ && gimple_assign_rhs_code (root->stmt) == BIT_INSERT_EXPR)
+ {
+ /* ??? We should probably record the whole vector of
+ root stmts so we do not have to back-track here... */
+ for (unsigned n = SLP_TREE_LANES (SLP_INSTANCE_TREE (instance));
+ n != 1; --n)
+ {
+ root = bb_vinfo->lookup_def (gimple_assign_rhs1 (root->stmt));
+ STMT_SLP_TYPE (root) = pure_slp;
+ }
+ }
+ }
i++;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index c0f786c..95e8ea0 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -849,6 +849,16 @@ loop_vec_info_for_loop (class loop *loop)
return (loop_vec_info) loop->aux;
}
+struct slp_root
+{
+ slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
+ stmt_vec_info root_)
+ : kind(kind_), stmts(stmts_), root(root_) {}
+ slp_instance_kind kind;
+ vec<stmt_vec_info> stmts;
+ stmt_vec_info root;
+};
+
typedef class _bb_vec_info : public vec_info
{
public:
@@ -860,6 +870,8 @@ public:
entry edge to cover bbs[0] PHI nodes and have a region entry
insert location. */
vec<basic_block> bbs;
+
+ vec<slp_root> roots;
} *bb_vec_info;
#define BB_VINFO_BB(B) (B)->bb